I'm writing a web scraper that uses regex to extract information in a paragraph and store it in an object. Then I add the object to an array. Here's my full code:
function scrapeCourseData(htmlString) {
// scrapes a specific department's course list
var tempArr = [];
console.log(tempArr); // outputs '[]'
$ = cheerio.load(htmlString);
// #coursestextcontainer contains the actual information for every single course listed in a department
$('#coursestextcontainer').find('.courseblock').each(function(i, elem) {
// finds all divs of type courseblock, iterates though each of them,
// extracting course information from children.
console.log('courseblock ' + (i + 1));
var courseText = $('strong', '.courseblocktitle', elem).text(); // Gets the text that will be parsed
var regex = /([A-Z]{4}\s[A-Z]{1,2}\d{4})\s(.*?)(?:\.*)(\d{1,2}(?:\.?|-?)\d{0,2}\spoints?)/g;
var regexGroups = Object.freeze({
NUMBER: 1,
NAME: 2,
CREDITS: 3
});
var match, course;
while ((match = regex.exec(courseText)) !== null) { // when regex.exec returns null, no more matches, and loop stops.
course = {
number: match[regexGroups.NUMBER],
name: match[regexGroups.NAME],
credits: match[regexGroups.CREDITS]
};
tempArr.push(course); // doesn't work-- result is array full of 'null'
console.log(course); // but this outputs as a valid object, e.g. { number: 'AFAS W3030'... }
}
});
console.log("Complete tempArr: " + tempArr); // outputs [object Object],[object Object],[object Object], etc.
for (var j of tempArr) {
dataJSONObject.push(tempArr[j]);
console.log('\ntempArray at ' + j + ': ' + tempArr[j]); // outputs [object Object]: undefined
}
console.log('\n');
}
When I first define tempArr as [] and output it to the console, I get the expected result [].
The objects I form from regex matches are also valid as expected at runtime.
However, when I try to push those objects to tempArr, and then print tempArr, it outputs as undefined.
I've been poking around other stackoverflow questions and I'm pretty sure my problem is that when I'm pushing to tempArr, I'm doing so outside of its scope. I've tried moving around where I declare tempArr (e.g. by putting it outside its function to make it global), but I still get undefined after pushing. What am I missing?