There are sites whose DOM and contents are generated dynamically when the page loads. (Angularjs-based sites are notorious for this)
What approach do you use? I tried both phantomjs and jsdom but it seems I am unable get the page to execute its javascript before I scrape.
Here's a simple jsdom example (not angularjs-based but still dynamically generated)
var env = require('jsdom').env;
exports.scrape = function(link, callback) {
  var config = {
    url: link,
    headers: { 
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36'
      },
    done: jsdomDone
  };
  env(config);
}
function jsdomDone(err, window) {
  var info = null;
  if(err) {
    console.error(err);
  } else {
    var $ = require('jquery')(window);
    console.log($('.profilePic').attr('src'));
  }
}
exports.scrape('https://www.facebook.com/elcompanies');
I tried phantomjs with moderate success.
var page = new WebPage()
var fs = require('fs');
page.onLoadFinished = function() {
  console.log("page load finished");
  window.setTimeout(function() {
    page.render('export.png');
    fs.write('1.html', page.content, 'w');
    phantom.exit();
  }, 10000);
};
page.open("https://www.facebook.com/elcompanies", function() {
  page.evaluate(function() {
  });
});
Here I wait for the onLoadFinished event and even put a 10-second timer. The interesting thing is that while my export.png image capture of the page shows a fully rendered page, my 1.html doesn't show the .profilePic class element in its rightful place. It seems to be sitting in some javascript code, surrounded by some kind of "require("TimeSlice").guard(function() {bigPipe.onPageletArrive({..." block
If you can provide me a working example that scrapes the image off this page, that'd be helpful.
 
     
     
    