Browse Source

All pages scraping, scrape additional info

master
diligent 4 years ago
parent
commit
32170888a2
1 changed files with 56 additions and 90 deletions
  1. 56
    90
      app.js

+ 56
- 90
app.js View File

@@ -21,21 +21,29 @@ agenda.define('scrape', async function (job, done) {
let collection = dbo.collection('scrapes');
let scrape = await collection.findOne({ _id: _id });

const filterPage = await axios(scrape.sourceUrl);
const html = filterPage.data;
const $ = cheerio.load(html);

const propertyLins = $('#placardContainer .property-link').map(function () {
return $(this).attr('href');
}).get();

var properties = [];
for (const link of propertyLins){
var response = await axios(link);

var property = apartments.apartment(cheerio.load(response.data));
properties.push(property);
console.log(`${link} scraped.`);
for (var page = 1; page <= scrape.pageCount; page++) {
console.log("scrapping page " + page)

const filterPage = await axios(scrape.sourceUrl + `/${page}`);
const html = filterPage.data;
const $ = cheerio.load(html);

const propertyLinks = $('#placardContainer .property-link').map(function () {
return $(this).attr('href');
}).get();

var properties = [];
for (const link of propertyLinks) {
try {
var response = await axios(link);

var property = apartments.apartment(cheerio.load(response.data));
properties.push(property);
console.log(`${link} scraped.`);
}catch(err){
console.error(`${link} scrape failed.`);
}
}
}

await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } });
@@ -76,70 +84,6 @@ MongoClient.connect(mongoUrl, function (err, db) {
});
});




// app.set('json spaces', 2);

// const axios = require('axios');
// const cheerio = require('cheerio');


// const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/';
// app.get('/', (req, res) => {
// axios(url).then(response => {
// const html = response.data;
// const $ = cheerio.load(html);

// var data = apartments.apartment($);
// res.json(data);
// });
// });
// app.get('/houses/*', (req, res) => {
// var url = req.params[0];
// axios(url).then(response => {
// const html = response.data;
// const $ = cheerio.load(html);

// var data = houses.house($);
// res.json(data);
// });
// });
// app.get('/filters/*', async (req, res) => {
// var url = req.params[0];
// const filterPage = await axios(url);
// const html = filterPage.data;
// const $ = cheerio.load(html);

// const propertyLins = $('#placardContainer .property-link').map(function () {
// return $(this).attr('href');
// }).get();

// var properties = [];
// for (const link of propertyLins){
// var response = await axios(link);

// var property = apartments.apartment(cheerio.load(response.data));
// properties.push(property);
// }

// res.json(properties);
// });
// app.get('/apartments/*', (req, res) => {
// var url = req.params[0];
// axios(url).then(response => {
// const html = response.data;
// const $ = cheerio.load(html);

// var data = apartments.apartment($);

// createListing(client, data);
// res.json(data);


// });
// });

app.get("/scrapes", async (req, res) => {
try {
const dbo = client.db(database);
@@ -174,37 +118,59 @@ app.post("/scrapes/", async (req, res) => {

// query builder
var query = `https://www.apartments.com`;
if(type){
if (type) {
query += `/${type}`;
}
if(location){
if (location) {
var locationQuery = location.replace(", ", "-").replace(" ", "-").toLowerCase();
query += `/${locationQuery}`;
}
if(beds){
if (beds) {
query += `/${beds}-bedrooms`;
}

if(price){
if(beds){
if (price) {
if (beds) {
query += `-over-${price}`;
}else{
} else {
query += `/over-${price}`;
}
}
if(lifestyle){
if (lifestyle) {
query += `/${lifestyle}`;
}

console.log(query);
//todo: save data into the database

const filterPage = await axios(query);
const html = filterPage.data;
const $ = cheerio.load(html);
var $pageRange = $(".pageRange");
var pagesCount = 0;
var resultCount = 0;
if (!$pageRange.length) {
let propertyLinks = $('#placardContainer .property-link').map(function () {
return $(this).attr('href');
}).get();
if (!propertyLinks.length) {
console.error("No results");
return res.status(404).json();
}
resultCount = propertyLinks.length;
} else {
pagesCount = $pageRange.text().slice($pageRange.text().lastIndexOf("of ") + 3);
resultCount = pagesCount * 25;
}

try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');

const dt = new Date();
dt.setSeconds( dt.getSeconds() + resultCount );
let res = await collection.insertOne({
count: 21,
estimate: Date.now(),
count: resultCount,
pageCount: pagesCount,
estimate: dt,
sourceUrl: query,
location: location,
filters: [

Loading…
Cancel
Save