|
|
|
@ -97,14 +97,25 @@ const scrapeInstance = async (link, page) => {
|
|
|
|
const instanceLinks = await page.$$('tr td:nth-child(2) a');
|
|
|
|
const instanceLinks = await page.$$('tr td:nth-child(2) a');
|
|
|
|
|
|
|
|
|
|
|
|
for(instance of instanceLinks){
|
|
|
|
for(instance of instanceLinks){
|
|
|
|
|
|
|
|
const td = await instance.getProperty('parentNode');
|
|
|
|
|
|
|
|
const tr = await td.getProperty('parentNode');
|
|
|
|
|
|
|
|
const sibling = await tr.$('.numeric');
|
|
|
|
|
|
|
|
const siblingHTML = await sibling.getProperty('innerHTML');
|
|
|
|
|
|
|
|
const numStudents = parseInt(await siblingHTML.jsonValue());
|
|
|
|
|
|
|
|
if(numStudents > 0){
|
|
|
|
const linkHTML = await instance.getProperty('href');
|
|
|
|
const linkHTML = await instance.getProperty('href');
|
|
|
|
const href = await linkHTML.jsonValue()
|
|
|
|
const href = await linkHTML.jsonValue()
|
|
|
|
const instanceID = href.match(/[0-9]*$/)[0];
|
|
|
|
const instanceID = href.match(/[0-9]*$/)[0];
|
|
|
|
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`);
|
|
|
|
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`);
|
|
|
|
if(res.rowCount === 0){
|
|
|
|
if(res.rowCount === 0){
|
|
|
|
await scrapeInstance(instance, page);
|
|
|
|
await scrapeInstance(instance, page);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
console.log(instanceID, 'found');
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
console.log('no students, skipping');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
console.log('done');
|
|
|
|
console.log('done');
|
|
|
|
|