You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
129 lines
4.6 KiB
129 lines
4.6 KiB
const puppeteer = require('puppeteer');
|
|
const pg = require('pg');
|
|
const brokenInstances = [8302, 8294];
|
|
|
|
const client = new pg.Client({
|
|
database:"outcomes_tracker"
|
|
})
|
|
|
|
const delay = (ms) => {
|
|
return new Promise((resolve) => {
|
|
setTimeout(resolve, ms);
|
|
});
|
|
}
|
|
|
|
const scrapeInstance = async (link, page) => {
|
|
await link.click();
|
|
await page.waitForNavigation();
|
|
|
|
const totalStudentsElem = await page.$('.instance-student-detail .students');
|
|
if(totalStudentsElem){
|
|
const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML');
|
|
const totalStudents = parseInt(await totalStudentsHTML.jsonValue());
|
|
|
|
const h3elem = await page.$('h3');
|
|
const h3HTML = await h3elem.getProperty('innerHTML');
|
|
const h3Value = await h3HTML.jsonValue()
|
|
const course = h3Value.match(/(?<=\()[ 0-9A-Za-z\-]*(?=\))/g)[0]
|
|
|
|
const droppedElem = await page.$('.instance-student-detail .dropped');
|
|
const droppedHTML = await droppedElem.getProperty('innerHTML');
|
|
const dropped = parseInt(await droppedHTML.jsonValue())
|
|
|
|
const graduatesElem = await page.$('.graduates');
|
|
const graduatesHTML = await graduatesElem.getProperty('innerHTML');
|
|
const graduates = parseInt(await graduatesHTML.jsonValue())
|
|
|
|
const seekingElem = await page.$('.job-seeking');
|
|
const seekingHTML = await seekingElem.getProperty('innerHTML');
|
|
const seeking = parseInt(await seekingHTML.jsonValue())
|
|
|
|
const outcomes90elem = await page.$('.full-time-90-days-actuals')
|
|
const outcomes90HTML = await outcomes90elem.getProperty('innerHTML');
|
|
const outcomes90 = await outcomes90HTML.jsonValue();
|
|
const outcomes90Numeric = parseInt(outcomes90.split(' ')[0])
|
|
const outcomes90Percent = outcomes90.split(' ')[1].replace(/[()]/g, '')
|
|
|
|
const instanceID = parseInt(page.url().match(/[0-9]*$/g)[0])
|
|
|
|
const graduationElem = await page.$('.course-header__detail')
|
|
const graduationHTML = await graduationElem.getProperty('innerHTML');
|
|
const instanceHeaderText = await graduationHTML.jsonValue()
|
|
const graduationDate = instanceHeaderText.split(' - ')[1].trim();
|
|
|
|
console.log({ instanceID, course, graduationDate, totalStudents, dropped, graduates, seeking, outcomes90Numeric, outcomes90Percent });
|
|
const res = await client.query(`INSERT INTO instances (instance_id, course, graduation_date, total_students, dropped, graduates, seekers, ninety_day_outcomes) VALUES (${instanceID}, '${course}', '${graduationDate}', ${totalStudents}, ${dropped}, ${graduates}, ${seeking}, ${outcomes90Numeric})`);
|
|
} else {
|
|
await page.screenshot({ path: 'outcomes.png' })
|
|
}
|
|
|
|
return page.goBack();
|
|
}
|
|
|
|
(async () => {
|
|
await client.connect();
|
|
const res = await client.query('SELECT $1::text as message', ['Connected to Postgres'])
|
|
console.log(res.rows[0].message)
|
|
|
|
const browser = await puppeteer.launch({headless:'new'});
|
|
const page = await browser.newPage();
|
|
await page.goto('https://outcomes.generalassemb.ly/');
|
|
await page.type('input[type="email"]', 'matt.huntington@generalassemb.ly');
|
|
await page.click('button[type="submit"]');
|
|
|
|
console.log('entered email');
|
|
await page.waitForNavigation()
|
|
await delay(500) //wtf
|
|
|
|
await page.type('input[type="text"]', 'matt.huntington@generalassemb.ly');
|
|
await page.type('input[type="password"]', 'Hunt!ngt0n80!');
|
|
await page.click('input[type="submit"]');
|
|
|
|
console.log('entered okta creds');
|
|
await page.waitForSelector('div[data-se="okta_verify-push"] a');
|
|
|
|
await page.click('div[data-se="okta_verify-push"] a');
|
|
|
|
console.log('selected push notification');
|
|
await page.waitForNavigation();
|
|
await page.waitForSelector('#from');
|
|
|
|
console.log('logged in');
|
|
await page.type('#from', '01/01/2013');
|
|
await page.click('input[value="Filter"]');
|
|
|
|
console.log('filtering instances');
|
|
await page.waitForNavigation();
|
|
|
|
const instanceLinks = await page.$$('tr td:nth-child(2) a');
|
|
|
|
for(instance of instanceLinks){
|
|
const td = await instance.getProperty('parentNode');
|
|
const tr = await td.getProperty('parentNode');
|
|
const sibling = await tr.$('.numeric');
|
|
const siblingHTML = await sibling.getProperty('innerHTML');
|
|
const numStudents = parseInt(await siblingHTML.jsonValue());
|
|
if(numStudents > 0){
|
|
const linkHTML = await instance.getProperty('href');
|
|
const href = await linkHTML.jsonValue()
|
|
const instanceID = href.match(/[0-9]*$/)[0];
|
|
const res = await client.query(`SELECT * FROM instances WHERE instance_id = ${instanceID}`);
|
|
if(res.rowCount === 0){
|
|
if(!brokenInstances.includes(parseInt(instanceID))){
|
|
await scrapeInstance(instance, page);
|
|
}
|
|
} else {
|
|
console.log(instanceID, 'found');
|
|
}
|
|
} else {
|
|
console.log('no students, skipping');
|
|
}
|
|
|
|
}
|
|
|
|
console.log('done');
|
|
//await page.screenshot({ path: 'outcomes.png' })
|
|
await browser.close();
|
|
await client.end();
|
|
})();
|