dealing with broken instacnes

average
Matthew Huntington 2 years ago
parent c702fa9b2d
commit 5e0f21ff8f

@ -1,5 +1,6 @@
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer');
const pg = require('pg'); const pg = require('pg');
const brokenInstances = [8302, 8294];
const client = new pg.Client({ const client = new pg.Client({
database:"outcomes_tracker" database:"outcomes_tracker"
@ -16,10 +17,9 @@ const scrapeInstance = async (link, page) => {
await page.waitForNavigation(); await page.waitForNavigation();
const totalStudentsElem = await page.$('.instance-student-detail .students'); const totalStudentsElem = await page.$('.instance-student-detail .students');
const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML'); if(totalStudentsElem){
const totalStudents = parseInt(await totalStudentsHTML.jsonValue()); const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML');
const totalStudents = parseInt(await totalStudentsHTML.jsonValue());
if(totalStudents > 0){
const h3elem = await page.$('h3'); const h3elem = await page.$('h3');
const h3HTML = await h3elem.getProperty('innerHTML'); const h3HTML = await h3elem.getProperty('innerHTML');
@ -53,7 +53,8 @@ const scrapeInstance = async (link, page) => {
console.log({ instanceID, course, graduationDate, totalStudents, dropped, graduates, seeking, outcomes90Numeric, outcomes90Percent }); console.log({ instanceID, course, graduationDate, totalStudents, dropped, graduates, seeking, outcomes90Numeric, outcomes90Percent });
const res = await client.query(`INSERT INTO instances (instance_id, course, graduation_date, total_students, dropped, graduates, seekers, ninety_day_outcomes) VALUES (${instanceID}, '${course}', '${graduationDate}', ${totalStudents}, ${dropped}, ${graduates}, ${seeking}, ${outcomes90Numeric})`); const res = await client.query(`INSERT INTO instances (instance_id, course, graduation_date, total_students, dropped, graduates, seekers, ninety_day_outcomes) VALUES (${instanceID}, '${course}', '${graduationDate}', ${totalStudents}, ${dropped}, ${graduates}, ${seeking}, ${outcomes90Numeric})`);
} else {
await page.screenshot({ path: 'outcomes.png' })
} }
return page.goBack(); return page.goBack();
@ -108,7 +109,9 @@ const scrapeInstance = async (link, page) => {
const instanceID = href.match(/[0-9]*$/)[0]; const instanceID = href.match(/[0-9]*$/)[0];
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`); const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`);
if(res.rowCount === 0){ if(res.rowCount === 0){
await scrapeInstance(instance, page); if(!brokenInstances.includes(parseInt(instanceID))){
await scrapeInstance(instance, page);
}
} else { } else {
console.log(instanceID, 'found'); console.log(instanceID, 'found');
} }

Loading…
Cancel
Save