The following code is a modification of the soupselect demo example. It basically fetches some html and prints a list of links and stores them in a variable:
crawl = function(host)
    var select = require('soupselect').select,
        htmlparser = require("htmlparser"),
        http = require('http'),
        sys = require('sys');
    // fetch some HTML...
    var http = require('http');
    var client = http.createClient(80, host);
    var request = client.request('GET', '/',{'host': host});
    var newPages = []
    request.on('response', function (response) {
        response.setEncoding('utf8');
        var body = "";
        response.on('data', function (chunk) {
            body = body + chunk;
        });
        response.on('end', function() {
            // now we have the whole body, parse it and select the nodes we want...
            var handler = new htmlparser.DefaultHandler(function(err, dom) {
                if (err) {
                    sys.debug("Error: " + err);
                } else {
                    // soupselect happening here...
                    var titles = select(dom, 'a.title');
                    sys.puts("Top stories from reddit");
                    titles.forEach(function(title) {
                        sys.puts("- " + title.children[0].raw + " [" + title.attribs.href + "]\n");
                        newPages.push(title.attribs.href);
                    })
                }
            });
            var parser = new htmlparser.Parser(handler);
            parser.parseComplete(body);
        });
    });
    request.end();
}
what i really want is for this function to return newPages
i want to be able to say newPages = crawl(host); Trouble is im not sure if this makes sense or where to put the return statement. I see that newPages exists before the request is ended but is empty after the request is over.
How do i make that function have a return value that is newPages?
 
     
    