From 67c79fd7f03388791f554adea569af958c41be22 Mon Sep 17 00:00:00 2001 From: Matt Huntington Date: Wed, 20 Sep 2023 09:22:24 -0400 Subject: [PATCH] cleaning up --- scrape.js | 57 ++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/scrape.js b/scrape.js index 6dbea86..13e1afb 100644 --- a/scrape.js +++ b/scrape.js @@ -7,36 +7,33 @@ const delay = (ms) => { } const scrapeInstance = async (link, page) => { - return new Promise(async (resolve)=>{ - console.log('scraping instance'); - await link.click(); - await page.waitForNavigation(); - - const totalStudentsElem = await page.$('.instance-student-detail .students'); - const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML'); - const totalStudents = parseInt(await totalStudentsHTML.jsonValue()); - - const droppedElem = await page.$('.instance-student-detail .dropped'); - const droppedHTML = await droppedElem.getProperty('innerHTML'); - const dropped = parseInt(await droppedHTML.jsonValue()) - - const outcomes90elem = await page.$('.full-time-90-days-actuals') - const outcomes90HTML = await outcomes90elem.getProperty('innerHTML'); - const outcomes90 = await outcomes90HTML.jsonValue(); - const outcomes90Numeric = parseInt(outcomes90.split(' ')[0]) - const outcomes90Percent = outcomes90.split(' ')[1].replace(/[()]/g, '') - - const instanceID = parseInt(page.url().match(/[0-9]*$/g)[0]) - - const graduationElem = await page.$('.course-header__detail') - const graduationHTML = await graduationElem.getProperty('innerHTML'); - const instanceHeaderText = await graduationHTML.jsonValue() - const graduationDate = instanceHeaderText.split(' - ')[1].trim(); - - console.log({ instanceID, graduationDate, totalStudents, dropped, outcomes90Numeric, outcomes90Percent }); - - resolve() - }) + console.log('scraping instance'); + await link.click(); + await page.waitForNavigation(); + + const totalStudentsElem = await page.$('.instance-student-detail .students'); + const totalStudentsHTML = await totalStudentsElem.getProperty('innerHTML'); + const totalStudents = parseInt(await totalStudentsHTML.jsonValue()); + + const droppedElem = await page.$('.instance-student-detail .dropped'); + const droppedHTML = await droppedElem.getProperty('innerHTML'); + const dropped = parseInt(await droppedHTML.jsonValue()) + + const outcomes90elem = await page.$('.full-time-90-days-actuals') + const outcomes90HTML = await outcomes90elem.getProperty('innerHTML'); + const outcomes90 = await outcomes90HTML.jsonValue(); + const outcomes90Numeric = parseInt(outcomes90.split(' ')[0]) + const outcomes90Percent = outcomes90.split(' ')[1].replace(/[()]/g, '') + + const instanceID = parseInt(page.url().match(/[0-9]*$/g)[0]) + + const graduationElem = await page.$('.course-header__detail') + const graduationHTML = await graduationElem.getProperty('innerHTML'); + const instanceHeaderText = await graduationHTML.jsonValue() + const graduationDate = instanceHeaderText.split(' - ')[1].trim(); + + console.log({ instanceID, graduationDate, totalStudents, dropped, outcomes90Numeric, outcomes90Percent }); + } (async () => {