scraped one instance

average
Matthew Huntington 2 years ago
parent 765e1b044f
commit 7cd701931b

2
.gitignore vendored

@ -1,3 +1,3 @@
node_modules node_modules
*.swp *.swp
outcomes.png *.png

3794
package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -6,6 +6,39 @@ const delay = (ms) => {
}); });
} }
const scrapeInstance = async (link, page) => {
return new Promise(async (resolve)=>{
console.log('scraping instance');
await link.click();
await page.waitForNavigation();
const totalStudentsElem = await page.$('.instance-student-detail .students');
const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML');
const totalStudents = parseInt(await totalStudentsHTML.jsonValue());
const droppedElem = await page.$('.instance-student-detail .dropped');
const droppedHTML = await droppedElem.getProperty('innerHTML');
const dropped = parseInt(await droppedHTML.jsonValue())
const outcomes90elem = await page.$('.full-time-90-days-actuals')
const outcomes90HTML = await outcomes90elem.getProperty('innerHTML');
const outcomes90 = await outcomes90HTML.jsonValue();
const outcomes90Numeric = parseInt(outcomes90.split(' ')[0])
const outcomes90Percent = outcomes90.split(' ')[1].replace(/[()]/g, '')
const instanceID = parseInt(page.url().match(/[0-9]*$/g)[0])
const graduationElem = await page.$('.course-header__detail')
const graduationHTML = await graduationElem.getProperty('innerHTML');
const instanceHeaderText = await graduationHTML.jsonValue()
const graduationDate = instanceHeaderText.split(' - ')[1].trim();
console.log({ instanceID, graduationDate, totalStudents, dropped, outcomes90Numeric, outcomes90Percent });
resolve()
})
}
(async () => { (async () => {
const browser = await puppeteer.launch(); const browser = await puppeteer.launch();
const page = await browser.newPage(); const page = await browser.newPage();
@ -14,7 +47,8 @@ const delay = (ms) => {
await page.click('button[type="submit"]'); await page.click('button[type="submit"]');
console.log('entered email'); console.log('entered email');
await page.waitForNavigation(); await page.waitForNavigation()
await delay(500) //wtf
await page.type('input[type="text"]', 'matt.huntington@generalassemb.ly'); await page.type('input[type="text"]', 'matt.huntington@generalassemb.ly');
await page.type('input[type="password"]', 'Hunt!ngt0n80!'); await page.type('input[type="password"]', 'Hunt!ngt0n80!');
@ -36,14 +70,16 @@ const delay = (ms) => {
console.log('filtering instances'); console.log('filtering instances');
await page.waitForNavigation(); await page.waitForNavigation();
const elems = await page.$$('tr td:nth-child(2) a'); const instanceLinks = await page.$$('tr td:nth-child(2) a');
for(elem of elems){
const prop = await elem.getProperty('innerHTML'); await scrapeInstance(instanceLinks[0], page);
const value = await prop.jsonValue(); //for(elem of elems){
console.log(value); //const prop = await elem.getProperty('innerHTML');
} //const value = await prop.jsonValue();
//console.log(value);
//}
console.log('done'); console.log('done');
await page.screenshot({ path: 'outcomes.png' }) //await page.screenshot({ path: 'outcomes.png' })
await browser.close(); await browser.close();
})(); })();

Loading…
Cancel
Save