|
|
|
|
@ -1,5 +1,6 @@
|
|
|
|
|
const puppeteer = require('puppeteer');
|
|
|
|
|
const pg = require('pg');
|
|
|
|
|
const brokenInstances = [8302, 8294];
|
|
|
|
|
|
|
|
|
|
const client = new pg.Client({
|
|
|
|
|
database:"outcomes_tracker"
|
|
|
|
|
@ -16,10 +17,9 @@ const scrapeInstance = async (link, page) => {
|
|
|
|
|
await page.waitForNavigation();
|
|
|
|
|
|
|
|
|
|
const totalStudentsElem = await page.$('.instance-student-detail .students');
|
|
|
|
|
const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML');
|
|
|
|
|
const totalStudents = parseInt(await totalStudentsHTML.jsonValue());
|
|
|
|
|
|
|
|
|
|
if(totalStudents > 0){
|
|
|
|
|
if(totalStudentsElem){
|
|
|
|
|
const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML');
|
|
|
|
|
const totalStudents = parseInt(await totalStudentsHTML.jsonValue());
|
|
|
|
|
|
|
|
|
|
const h3elem = await page.$('h3');
|
|
|
|
|
const h3HTML = await h3elem.getProperty('innerHTML');
|
|
|
|
|
@ -53,7 +53,8 @@ const scrapeInstance = async (link, page) => {
|
|
|
|
|
|
|
|
|
|
console.log({ instanceID, course, graduationDate, totalStudents, dropped, graduates, seeking, outcomes90Numeric, outcomes90Percent });
|
|
|
|
|
const res = await client.query(`INSERT INTO instances (instance_id, course, graduation_date, total_students, dropped, graduates, seekers, ninety_day_outcomes) VALUES (${instanceID}, '${course}', '${graduationDate}', ${totalStudents}, ${dropped}, ${graduates}, ${seeking}, ${outcomes90Numeric})`);
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
await page.screenshot({ path: 'outcomes.png' })
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return page.goBack();
|
|
|
|
|
@ -108,7 +109,9 @@ const scrapeInstance = async (link, page) => {
|
|
|
|
|
const instanceID = href.match(/[0-9]*$/)[0];
|
|
|
|
|
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`);
|
|
|
|
|
if(res.rowCount === 0){
|
|
|
|
|
await scrapeInstance(instance, page);
|
|
|
|
|
if(!brokenInstances.includes(parseInt(instanceID))){
|
|
|
|
|
await scrapeInstance(instance, page);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
console.log(instanceID, 'found');
|
|
|
|
|
}
|
|
|
|
|
|