skip classes with no students before scraping process

average
Matt Huntington 2 years ago
parent 7858d9c63f
commit 88c8a9d383

@ -97,14 +97,25 @@ const scrapeInstance = async (link, page) => {
const instanceLinks = await page.$$('tr td:nth-child(2) a'); const instanceLinks = await page.$$('tr td:nth-child(2) a');
for(instance of instanceLinks){ for(instance of instanceLinks){
const td = await instance.getProperty('parentNode');
const linkHTML = await instance.getProperty('href'); const tr = await td.getProperty('parentNode');
const href = await linkHTML.jsonValue() const sibling = await tr.$('.numeric');
const instanceID = href.match(/[0-9]*$/)[0]; const siblingHTML = await sibling.getProperty('innerHTML');
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`); const numStudents = parseInt(await siblingHTML.jsonValue());
if(res.rowCount === 0){ if(numStudents > 0){
await scrapeInstance(instance, page); const linkHTML = await instance.getProperty('href');
const href = await linkHTML.jsonValue()
const instanceID = href.match(/[0-9]*$/)[0];
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`);
if(res.rowCount === 0){
await scrapeInstance(instance, page);
} else {
console.log(instanceID, 'found');
}
} else {
console.log('no students, skipping');
} }
} }
console.log('done'); console.log('done');

Loading…
Cancel
Save