-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.js
47 lines (40 loc) · 2.09 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
const fs = require('fs');
const path = require('path');
const isEqual = require('lodash/isEqual');
const axios = require('axios');
const cheerio = require('cheerio');
axios.get('https://apcentral.collegeboard.org/courses')
.then(({ data }) => {
const $ = cheerio.load(data);
const testNodes = $('#block-course-index-course-index').find('li').toArray();
return Promise.all(testNodes.map(testNode =>
new Promise((resolve, reject) => {
axios.get(`https://apcentral.collegeboard.org${ (() => {
const link = $(testNode).find('a').attr('href');
return link.slice(0, link.indexOf('?'));
})() }/exam`)
.then(({ data }) => {
const $ = cheerio.load(data);
const nodes = $('.node.node-free-form-text.view-mode-rich_list').toArray();
const examFormatNode = $($(nodes[nodes.findIndex(node => $(node).text().trim() === 'Exam Format') + 1]).find('.field-item.even'));
const children = examFormatNode.children().toArray();
const title = $('title').text();
const testData = {
name: title.slice(0, title.indexOf(': The Exam')),
sections: {}
};
children.forEach((child, idx) => {
if (child.tagName === 'h3') {
testData.sections[$(child).text()] = $(children[idx + 1]).text();
}
});
resolve(testData);
})
.catch(() => resolve({}));
})
));
})
.then(testData => {
console.log('Done!');
fs.writeFileSync(path.join(__dirname, 'src/data/tests.json'), JSON.stringify(testData.filter(data => !(data.sections === undefined) && !isEqual(data.sections, {}))));
});