const config = require('config'); const axios = require("axios"); const cheerio = require("cheerio"); const express = require('express'); const cors = require('cors'); const MongoClient = require('mongodb').MongoClient; const ObjectID = require('mongodb').ObjectID; var apartments = require('./apartments.js'); var houses = require('./houses.js'); var condos = require('./condos.js'); // jobs var mongoUrl = config.get("mongo"); var agendaDb = config.get("agenda"); const Agenda = require('agenda').Agenda; const agenda = new Agenda({ db: { address: agendaDb } }); agenda.define('scrape', async function (job, done) { const { _id } = job.attrs.data; try { const dbo = client.db(database); let collection = dbo.collection('scrapes'); let scrape = await collection.findOne({ _id: _id }); scrape.sourceUrl = "https://www.apartments.com/condos/chicago-il/"; const filterPage = await axios(scrape.sourceUrl); const html = filterPage.data; const $ = cheerio.load(html); const propertyLins = $('#placardContainer .property-link').map(function () { return $(this).attr('href'); }).get(); var properties = []; for (const link of propertyLins){ var response = await axios(link); //apartments //var property = apartments.apartment(cheerio.load(response.data)); //houses //var property = houses.house(cheerio.load(response.data)); //condos var property = condos.condo(cheerio.load(response.data)); properties.push(property); console.log(`${link} scraped.`); } await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } }); console.log(`${_id} scraped.`); return done(); } catch (err) { console.log(err); } }); (async function () { await agenda.start(); })(); // express application const app = express(); app.use(express.json()); app.use(cors()); // database setup var mongoUrl = config.get("mongo"); var database = config.get("database"); var client = undefined; MongoClient.connect(mongoUrl, function (err, db) { if (err) throw err; console.log("Database created!"); console.log(mongoUrl); client = db; var dbo = db.db(database); dbo.createCollection("scrapes", function (err, res) { if (err) { console.log("Collection already created!"); return; } console.log("Collection created!"); }); }); // app.set('json spaces', 2); // const axios = require('axios'); // const cheerio = require('cheerio'); // const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/'; // app.get('/', (req, res) => { // axios(url).then(response => { // const html = response.data; // const $ = cheerio.load(html); // var data = apartments.apartment($); // res.json(data); // }); // }); // app.get('/houses/*', (req, res) => { // var url = req.params[0]; // axios(url).then(response => { // const html = response.data; // const $ = cheerio.load(html); // var data = houses.house($); // res.json(data); // }); // }); // app.get('/filters/*', async (req, res) => { // var url = req.params[0]; // const filterPage = await axios(url); // const html = filterPage.data; // const $ = cheerio.load(html); // const propertyLins = $('#placardContainer .property-link').map(function () { // return $(this).attr('href'); // }).get(); // var properties = []; // for (const link of propertyLins){ // var response = await axios(link); // var property = apartments.apartment(cheerio.load(response.data)); // properties.push(property); // } // res.json(properties); // }); // app.get('/apartments/*', (req, res) => { // var url = req.params[0]; // axios(url).then(response => { // const html = response.data; // const $ = cheerio.load(html); // var data = apartments.apartment($); // createListing(client, data); // res.json(data); // }); // }); app.get("/scrapes", async (req, res) => { try { const dbo = client.db(database); let collection = dbo.collection('scrapes'); let data = await collection.find({}).toArray(); return res.json(data); } catch (err) { console.log(err); return res.status(500).json(); } }); app.get("/scrapes/:id", async (req, res) => { const id = req.params.id; try { const dbo = client.db(database); let collection = dbo.collection('scrapes'); var o_id = new ObjectID(id); let data = await collection.findOne({ _id: o_id }); return res.json(data); } catch (err) { console.log(err); res.status(500).json(); } }); app.post("/scrapes/", async (req, res) => { const location = req.body.location; const price = req.body.price; const beds = req.body.beds; const type = req.body.type; const lifestyle = req.body.lifestyle; // query builder //todo: save data into the database try { const dbo = client.db(database); let collection = dbo.collection('scrapes'); let res = await collection.insertOne({ count: 21, estimate: Date.now(), sourceUrl: "https://www.apartments.com", location: location, filters: [ { name: 'price', value: price }, { name: 'beds', value: beds }, { name: 'type', value: type }, { name: 'lifestyle', value: lifestyle }, ], status: "requested" }); console.log(res); } catch (err) { console.log(err); return res.status(500).json(); } return res.json(); }); app.patch("/scrapes/:id/execute", async (req, res) => { const id = req.params.id; try { const dbo = client.db(database); let collection = dbo.collection('scrapes'); var o_id = new ObjectID(id); var newvalues = { $set: { status: "pending" } }; await collection.updateOne({ _id: o_id }, newvalues); agenda.now('scrape', { _id: o_id }); return res.status(204).json(); } catch (err) { console.log(err); res.status(500).json(); } }); const port = 3333; app.listen(port, () => { console.log(`Example app listening at http://localhost:${port}`) }); // Handles graceful stopping of jobs function graceful() { agenda.stop(function () { client.close(function (e) { if (e) logger.error(e); process.exit(0); }); }); } process.on('SIGTERM', graceful); process.on('SIGINT', graceful);