Browse Source

Scrape job setuped on demand

newChanges
diligent 4 years ago
parent
commit
ee4c559616
5 changed files with 187 additions and 28 deletions
  1. 0
    2
      apartments.js
  2. 52
    25
      app.js
  3. 2
    1
      config/default.json
  4. 132
    0
      package-lock.json
  5. 1
    0
      package.json

+ 0
- 2
apartments.js View File

var request = require('request').defaults({ encoding: null }); var request = require('request').defaults({ encoding: null });
module.exports.apartment = function($) { module.exports.apartment = function($) {


var result = { var result = {
// images: request.get($('.aspectRatioImage').find('img')[0].attribs.src, function (error, response, body) { // images: request.get($('.aspectRatioImage').find('img')[0].attribs.src, function (error, response, body) {
// if (!error && response.statusCode == 200) { // if (!error && response.statusCode == 200) {

+ 52
- 25
app.js View File

var config = require('config');
const config = require('config');
const express = require('express'); const express = require('express');
const cors = require('cors'); const cors = require('cors');
const MongoClient = require('mongodb').MongoClient; const MongoClient = require('mongodb').MongoClient;
const ObjectID = require('mongodb').ObjectID; const ObjectID = require('mongodb').ObjectID;


var apartments = require('./apartments.js');
var houses = require('./houses.js');

// jobs
var mongoUrl = config.get("mongo");
var agendaDb = config.get("agenda");
const Agenda = require('agenda').Agenda;
const agenda = new Agenda({ db: { address: agendaDb } });
agenda.define('scrape', async function (job, done) {
const { _id } = job.attrs.data;
try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');
let scrape = await collection.findOne({ _id: _id });

// let response = await axios(scrape.sourceUrl);
// const html = response.data;
// const $ = cheerio.load(html);

// var data = apartments.apartment($);
// createListing(client, data);

await collection.updateOne({ _id: _id }, { $set: { status: "done" } });
return done();
} catch (err) {
console.log(err);
}
});
(async function () {
await agenda.start();
})();

// express application // express application
const app = express(); const app = express();


}); });
}); });


// const cron = require('node-cron');


// var apartments = require('./apartments.js');
// var houses = require('./houses.js');



// app.set('json spaces', 2); // app.set('json spaces', 2);


let collection = dbo.collection('scrapes'); let collection = dbo.collection('scrapes');


var o_id = new ObjectID(id); var o_id = new ObjectID(id);
let data = await collection.findOne({ _id: o_id});
let data = await collection.findOne({ _id: o_id });
return res.json(data); return res.json(data);
} catch (err) { } catch (err) {
console.log(err); console.log(err);
let collection = dbo.collection('scrapes'); let collection = dbo.collection('scrapes');


var o_id = new ObjectID(id); var o_id = new ObjectID(id);
var newvalues = { $set: {status: "pending"} };
let data = await collection.updateOne({ _id: o_id}, newvalues);
var newvalues = { $set: { status: "pending" } };
await collection.updateOne({ _id: o_id }, newvalues);
agenda.now('scrape', { _id: o_id });
return res.status(204).json(); return res.status(204).json();
} catch (err) { } catch (err) {
console.log(err); console.log(err);
}); });


const port = 3333; const port = 3333;
// var task = cron.schedule('* * * * *', function() {
// console.log(`Runned job...`)
// });
// var options = {
// host: 'http://localhost',
// port:port,
// path: '/apartments/https://www.apartments.com/essex-on-the-park-chicago-il/begd58b/',
// method: 'GET'
// };


// task.start()

// task.stop();

app.listen(port, () => { app.listen(port, () => {
console.log(`Example app listening at http://localhost:${port}`) console.log(`Example app listening at http://localhost:${port}`)
}); });
process.on('exit', function () {
client.close();
});

// Handles graceful stopping of jobs
function graceful() {
agenda.stop(function () {
client.close(function (e) {
if (e) logger.error(e);
process.exit(0);
});
});
}

process.on('SIGTERM', graceful);
process.on('SIGINT', graceful);

+ 2
- 1
config/default.json View File

{ {
"mongo":"mongodb://localhost:27017/", "mongo":"mongodb://localhost:27017/",
"database":"scraper"
"database":"scraper",
"agenda":"mongodb://localhost:27017/scraper-agenda"
} }

+ 132
- 0
package-lock.json View File

"negotiator": "0.6.2" "negotiator": "0.6.2"
} }
}, },
"agenda": {
"version": "4.1.3",
"resolved": "https://registry.npmjs.org/agenda/-/agenda-4.1.3.tgz",
"integrity": "sha512-QT89CzmO67dwM3Ku7j4qLemm4VEBSMu/bLMbgbQCuE9utJEF0+ZTCCY0Cd/OkoqsMq7d92x02FWnLe7LoIUKAQ==",
"requires": {
"cron-parser": "^3.0.0",
"date.js": "~0.3.3",
"debug": "~4.3.0",
"human-interval": "~2.0.0",
"moment-timezone": "~0.5.27",
"mongodb": "~3.6.2"
},
"dependencies": {
"debug": {
"version": "4.3.2",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz",
"integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==",
"requires": {
"ms": "2.1.2"
}
},
"ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
}
}
},
"ajv": { "ajv": {
"version": "6.12.6", "version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
} }
} }
}, },
"call-bind": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
"integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==",
"requires": {
"function-bind": "^1.1.1",
"get-intrinsic": "^1.0.2"
}
},
"camelcase": { "camelcase": {
"version": "5.3.1", "version": "5.3.1",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
"vary": "^1" "vary": "^1"
} }
}, },
"cron-parser": {
"version": "3.5.0",
"resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-3.5.0.tgz",
"integrity": "sha512-wyVZtbRs6qDfFd8ap457w3XVntdvqcwBGxBoTvJQH9KGVKL/fB+h2k3C8AqiVxvUQKN1Ps/Ns46CNViOpVDhfQ==",
"requires": {
"is-nan": "^1.3.2",
"luxon": "^1.26.0"
}
},
"crypto-random-string": { "crypto-random-string": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/crypto-random-string/-/crypto-random-string-2.0.0.tgz", "resolved": "https://registry.npmjs.org/crypto-random-string/-/crypto-random-string-2.0.0.tgz",
"assert-plus": "^1.0.0" "assert-plus": "^1.0.0"
} }
}, },
"date.js": {
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/date.js/-/date.js-0.3.3.tgz",
"integrity": "sha512-HgigOS3h3k6HnW011nAb43c5xx5rBXk8P2v/WIT9Zv4koIaVXiH2BURguI78VVp+5Qc076T7OR378JViCnZtBw==",
"requires": {
"debug": "~3.1.0"
},
"dependencies": {
"debug": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
"integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
"requires": {
"ms": "2.0.0"
}
}
}
},
"debug": { "debug": {
"version": "2.6.9", "version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-0ISdNousHvZT2EiFlZeZAHBUvSxmKswVCEf8hW7KWgG4a8MVEu/3Vb6uWYozkjylyCxe0JBIiRB1jV45S70WVQ==", "integrity": "sha512-0ISdNousHvZT2EiFlZeZAHBUvSxmKswVCEf8hW7KWgG4a8MVEu/3Vb6uWYozkjylyCxe0JBIiRB1jV45S70WVQ==",
"dev": true "dev": true
}, },
"define-properties": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz",
"integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==",
"requires": {
"object-keys": "^1.0.12"
}
},
"delayed-stream": { "delayed-stream": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"dev": true, "dev": true,
"optional": true "optional": true
}, },
"function-bind": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz",
"integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A=="
},
"get-intrinsic": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz",
"integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==",
"requires": {
"function-bind": "^1.1.1",
"has": "^1.0.3",
"has-symbols": "^1.0.1"
}
},
"get-stream": { "get-stream": {
"version": "4.1.0", "version": "4.1.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
"har-schema": "^2.0.0" "har-schema": "^2.0.0"
} }
}, },
"has": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
"integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==",
"requires": {
"function-bind": "^1.1.1"
}
},
"has-flag": { "has-flag": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
"dev": true "dev": true
}, },
"has-symbols": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz",
"integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw=="
},
"has-yarn": { "has-yarn": {
"version": "2.1.0", "version": "2.1.0",
"resolved": "https://registry.npmjs.org/has-yarn/-/has-yarn-2.1.0.tgz", "resolved": "https://registry.npmjs.org/has-yarn/-/has-yarn-2.1.0.tgz",
"sshpk": "^1.7.0" "sshpk": "^1.7.0"
} }
}, },
"human-interval": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/human-interval/-/human-interval-2.0.1.tgz",
"integrity": "sha512-r4Aotzf+OtKIGQCB3odUowy4GfUDTy3aTWTfLd7ZF2gBCy3XW3v/dJLRefZnOFFnjqs5B1TypvS8WarpBkYUNQ==",
"requires": {
"numbered": "^1.1.0"
}
},
"iconv-lite": { "iconv-lite": {
"version": "0.4.24", "version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
"is-path-inside": "^3.0.1" "is-path-inside": "^3.0.1"
} }
}, },
"is-nan": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.3.2.tgz",
"integrity": "sha512-E+zBKpQ2t6MEo1VsonYmluk9NxGrbzpeeLC2xIViuO2EjU2xsXsBPwTr3Ykv9l08UYEVEdWeRZNouaZqF6RN0w==",
"requires": {
"call-bind": "^1.0.0",
"define-properties": "^1.1.3"
}
},
"is-npm": { "is-npm": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/is-npm/-/is-npm-4.0.0.tgz", "resolved": "https://registry.npmjs.org/is-npm/-/is-npm-4.0.0.tgz",
"integrity": "sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA==", "integrity": "sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA==",
"dev": true "dev": true
}, },
"luxon": {
"version": "1.27.0",
"resolved": "https://registry.npmjs.org/luxon/-/luxon-1.27.0.tgz",
"integrity": "sha512-VKsFsPggTA0DvnxtJdiExAucKdAnwbCCNlMM5ENvHlxubqWd0xhZcdb4XgZ7QFNhaRhilXCFxHuoObP5BNA4PA=="
},
"make-dir": { "make-dir": {
"version": "3.1.0", "version": "3.1.0",
"resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz",
"boolbase": "^1.0.0" "boolbase": "^1.0.0"
} }
}, },
"numbered": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/numbered/-/numbered-1.1.0.tgz",
"integrity": "sha512-pv/ue2Odr7IfYOO0byC1KgBI10wo5YDauLhxY6/saNzAdAs0r1SotGCPzzCLNPL0xtrAwWRialLu23AAu9xO1g=="
},
"oauth-sign": { "oauth-sign": {
"version": "0.9.0", "version": "0.9.0",
"resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
"integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM="
}, },
"object-keys": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
"integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="
},
"on-finished": { "on-finished": {
"version": "2.3.0", "version": "2.3.0",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",

+ 1
- 0
package.json View File

"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"agenda": "^4.1.3",
"axios": "^0.21.1", "axios": "^0.21.1",
"body-parser": "^1.19.0", "body-parser": "^1.19.0",
"cheerio": "^1.0.0-rc.10", "cheerio": "^1.0.0-rc.10",

Loading…
Cancel
Save