| const config = require('config'); | const config = require('config'); | ||||
| const axios = require("axios"); | |||||
| const cheerio = require("cheerio"); | |||||
| const express = require('express'); | const express = require('express'); | ||||
| const cors = require('cors'); | const cors = require('cors'); | ||||
| const MongoClient = require('mongodb').MongoClient; | const MongoClient = require('mongodb').MongoClient; | ||||
| const dbo = client.db(database); | const dbo = client.db(database); | ||||
| let collection = dbo.collection('scrapes'); | let collection = dbo.collection('scrapes'); | ||||
| let scrape = await collection.findOne({ _id: _id }); | let scrape = await collection.findOne({ _id: _id }); | ||||
| scrape.sourceUrl = "https://www.apartments.com/new-york-ny/"; | |||||
| // let response = await axios(scrape.sourceUrl); | |||||
| // const html = response.data; | |||||
| // const $ = cheerio.load(html); | |||||
| const filterPage = await axios(scrape.sourceUrl); | |||||
| const html = filterPage.data; | |||||
| const $ = cheerio.load(html); | |||||
| // var data = apartments.apartment($); | |||||
| // createListing(client, data); | |||||
| const propertyLins = $('#placardContainer .property-link').map(function () { | |||||
| return $(this).attr('href'); | |||||
| }).get(); | |||||
| await collection.updateOne({ _id: _id }, { $set: { status: "done" } }); | |||||
| var properties = []; | |||||
| for (const link of propertyLins){ | |||||
| var response = await axios(link); | |||||
| var property = apartments.apartment(cheerio.load(response.data)); | |||||
| properties.push(property); | |||||
| console.log(`${link} scraped.`); | |||||
| } | |||||
| await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } }); | |||||
| console.log(`${_id} scraped.`); | |||||
| return done(); | return done(); | ||||
| } catch (err) { | } catch (err) { | ||||
| console.log(err); | console.log(err); |