| @@ -21,21 +21,29 @@ agenda.define('scrape', async function (job, done) { | |||
| let collection = dbo.collection('scrapes'); | |||
| let scrape = await collection.findOne({ _id: _id }); | |||
| const filterPage = await axios(scrape.sourceUrl); | |||
| const html = filterPage.data; | |||
| const $ = cheerio.load(html); | |||
| const propertyLins = $('#placardContainer .property-link').map(function () { | |||
| return $(this).attr('href'); | |||
| }).get(); | |||
| var properties = []; | |||
| for (const link of propertyLins){ | |||
| var response = await axios(link); | |||
| var property = apartments.apartment(cheerio.load(response.data)); | |||
| properties.push(property); | |||
| console.log(`${link} scraped.`); | |||
| for (var page = 1; page <= scrape.pageCount; page++) { | |||
| console.log("scrapping page " + page) | |||
| const filterPage = await axios(scrape.sourceUrl + `/${page}`); | |||
| const html = filterPage.data; | |||
| const $ = cheerio.load(html); | |||
| const propertyLinks = $('#placardContainer .property-link').map(function () { | |||
| return $(this).attr('href'); | |||
| }).get(); | |||
| var properties = []; | |||
| for (const link of propertyLinks) { | |||
| try { | |||
| var response = await axios(link); | |||
| var property = apartments.apartment(cheerio.load(response.data)); | |||
| properties.push(property); | |||
| console.log(`${link} scraped.`); | |||
| }catch(err){ | |||
| console.error(`${link} scrape failed.`); | |||
| } | |||
| } | |||
| } | |||
| await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } }); | |||
| @@ -76,70 +84,6 @@ MongoClient.connect(mongoUrl, function (err, db) { | |||
| }); | |||
| }); | |||
| // app.set('json spaces', 2); | |||
| // const axios = require('axios'); | |||
| // const cheerio = require('cheerio'); | |||
| // const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/'; | |||
| // app.get('/', (req, res) => { | |||
| // axios(url).then(response => { | |||
| // const html = response.data; | |||
| // const $ = cheerio.load(html); | |||
| // var data = apartments.apartment($); | |||
| // res.json(data); | |||
| // }); | |||
| // }); | |||
| // app.get('/houses/*', (req, res) => { | |||
| // var url = req.params[0]; | |||
| // axios(url).then(response => { | |||
| // const html = response.data; | |||
| // const $ = cheerio.load(html); | |||
| // var data = houses.house($); | |||
| // res.json(data); | |||
| // }); | |||
| // }); | |||
| // app.get('/filters/*', async (req, res) => { | |||
| // var url = req.params[0]; | |||
| // const filterPage = await axios(url); | |||
| // const html = filterPage.data; | |||
| // const $ = cheerio.load(html); | |||
| // const propertyLins = $('#placardContainer .property-link').map(function () { | |||
| // return $(this).attr('href'); | |||
| // }).get(); | |||
| // var properties = []; | |||
| // for (const link of propertyLins){ | |||
| // var response = await axios(link); | |||
| // var property = apartments.apartment(cheerio.load(response.data)); | |||
| // properties.push(property); | |||
| // } | |||
| // res.json(properties); | |||
| // }); | |||
| // app.get('/apartments/*', (req, res) => { | |||
| // var url = req.params[0]; | |||
| // axios(url).then(response => { | |||
| // const html = response.data; | |||
| // const $ = cheerio.load(html); | |||
| // var data = apartments.apartment($); | |||
| // createListing(client, data); | |||
| // res.json(data); | |||
| // }); | |||
| // }); | |||
| app.get("/scrapes", async (req, res) => { | |||
| try { | |||
| const dbo = client.db(database); | |||
| @@ -174,37 +118,59 @@ app.post("/scrapes/", async (req, res) => { | |||
| // query builder | |||
| var query = `https://www.apartments.com`; | |||
| if(type){ | |||
| if (type) { | |||
| query += `/${type}`; | |||
| } | |||
| if(location){ | |||
| if (location) { | |||
| var locationQuery = location.replace(", ", "-").replace(" ", "-").toLowerCase(); | |||
| query += `/${locationQuery}`; | |||
| } | |||
| if(beds){ | |||
| if (beds) { | |||
| query += `/${beds}-bedrooms`; | |||
| } | |||
| if(price){ | |||
| if(beds){ | |||
| if (price) { | |||
| if (beds) { | |||
| query += `-over-${price}`; | |||
| }else{ | |||
| } else { | |||
| query += `/over-${price}`; | |||
| } | |||
| } | |||
| if(lifestyle){ | |||
| if (lifestyle) { | |||
| query += `/${lifestyle}`; | |||
| } | |||
| console.log(query); | |||
| //todo: save data into the database | |||
| const filterPage = await axios(query); | |||
| const html = filterPage.data; | |||
| const $ = cheerio.load(html); | |||
| var $pageRange = $(".pageRange"); | |||
| var pagesCount = 0; | |||
| var resultCount = 0; | |||
| if (!$pageRange.length) { | |||
| let propertyLinks = $('#placardContainer .property-link').map(function () { | |||
| return $(this).attr('href'); | |||
| }).get(); | |||
| if (!propertyLinks.length) { | |||
| console.error("No results"); | |||
| return res.status(404).json(); | |||
| } | |||
| resultCount = propertyLinks.length; | |||
| } else { | |||
| pagesCount = $pageRange.text().slice($pageRange.text().lastIndexOf("of ") + 3); | |||
| resultCount = pagesCount * 25; | |||
| } | |||
| try { | |||
| const dbo = client.db(database); | |||
| let collection = dbo.collection('scrapes'); | |||
| const dt = new Date(); | |||
| dt.setSeconds( dt.getSeconds() + resultCount ); | |||
| let res = await collection.insertOne({ | |||
| count: 21, | |||
| estimate: Date.now(), | |||
| count: resultCount, | |||
| pageCount: pagesCount, | |||
| estimate: dt, | |||
| sourceUrl: query, | |||
| location: location, | |||
| filters: [ | |||