Kaynağa Gözat

Scrape job setuped on demand

newChanges
diligent 4 yıl önce
ebeveyn
işleme
ee4c559616
5 değiştirilmiş dosya ile 187 ekleme ve 28 silme
  1. 0
    2
      apartments.js
  2. 52
    25
      app.js
  3. 2
    1
      config/default.json
  4. 132
    0
      package-lock.json
  5. 1
    0
      package.json

+ 0
- 2
apartments.js Dosyayı Görüntüle

@@ -1,7 +1,5 @@
var request = require('request').defaults({ encoding: null });
module.exports.apartment = function($) {


var result = {
// images: request.get($('.aspectRatioImage').find('img')[0].attribs.src, function (error, response, body) {
// if (!error && response.statusCode == 200) {

+ 52
- 25
app.js Dosyayı Görüntüle

@@ -1,9 +1,42 @@
var config = require('config');
const config = require('config');
const express = require('express');
const cors = require('cors');
const MongoClient = require('mongodb').MongoClient;
const ObjectID = require('mongodb').ObjectID;

var apartments = require('./apartments.js');
var houses = require('./houses.js');

// jobs
var mongoUrl = config.get("mongo");
var agendaDb = config.get("agenda");
const Agenda = require('agenda').Agenda;
const agenda = new Agenda({ db: { address: agendaDb } });
agenda.define('scrape', async function (job, done) {
const { _id } = job.attrs.data;
try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');
let scrape = await collection.findOne({ _id: _id });

// let response = await axios(scrape.sourceUrl);
// const html = response.data;
// const $ = cheerio.load(html);

// var data = apartments.apartment($);
// createListing(client, data);

await collection.updateOne({ _id: _id }, { $set: { status: "done" } });
return done();
} catch (err) {
console.log(err);
}
});
(async function () {
await agenda.start();
})();

// express application
const app = express();

@@ -31,10 +64,8 @@ MongoClient.connect(mongoUrl, function (err, db) {
});
});

// const cron = require('node-cron');

// var apartments = require('./apartments.js');
// var houses = require('./houses.js');


// app.set('json spaces', 2);

@@ -115,7 +146,7 @@ app.get("/scrapes/:id", async (req, res) => {
let collection = dbo.collection('scrapes');

var o_id = new ObjectID(id);
let data = await collection.findOne({ _id: o_id});
let data = await collection.findOne({ _id: o_id });
return res.json(data);
} catch (err) {
console.log(err);
@@ -164,8 +195,9 @@ app.patch("/scrapes/:id/execute", async (req, res) => {
let collection = dbo.collection('scrapes');

var o_id = new ObjectID(id);
var newvalues = { $set: {status: "pending"} };
let data = await collection.updateOne({ _id: o_id}, newvalues);
var newvalues = { $set: { status: "pending" } };
await collection.updateOne({ _id: o_id }, newvalues);
agenda.now('scrape', { _id: o_id });
return res.status(204).json();
} catch (err) {
console.log(err);
@@ -174,24 +206,19 @@ app.patch("/scrapes/:id/execute", async (req, res) => {
});

const port = 3333;
// var task = cron.schedule('* * * * *', function() {
// console.log(`Runned job...`)
// });
// var options = {
// host: 'http://localhost',
// port:port,
// path: '/apartments/https://www.apartments.com/essex-on-the-park-chicago-il/begd58b/',
// method: 'GET'
// };


// task.start()

// task.stop();

app.listen(port, () => {
console.log(`Example app listening at http://localhost:${port}`)
});
process.on('exit', function () {
client.close();
});

// Handles graceful stopping of jobs
function graceful() {
agenda.stop(function () {
client.close(function (e) {
if (e) logger.error(e);
process.exit(0);
});
});
}

process.on('SIGTERM', graceful);
process.on('SIGINT', graceful);

+ 2
- 1
config/default.json Dosyayı Görüntüle

@@ -1,4 +1,5 @@
{
"mongo":"mongodb://localhost:27017/",
"database":"scraper"
"database":"scraper",
"agenda":"mongodb://localhost:27017/scraper-agenda"
}

+ 132
- 0
package-lock.json Dosyayı Görüntüle

@@ -34,6 +34,34 @@
"negotiator": "0.6.2"
}
},
"agenda": {
"version": "4.1.3",
"resolved": "https://registry.npmjs.org/agenda/-/agenda-4.1.3.tgz",
"integrity": "sha512-QT89CzmO67dwM3Ku7j4qLemm4VEBSMu/bLMbgbQCuE9utJEF0+ZTCCY0Cd/OkoqsMq7d92x02FWnLe7LoIUKAQ==",
"requires": {
"cron-parser": "^3.0.0",
"date.js": "~0.3.3",
"debug": "~4.3.0",
"human-interval": "~2.0.0",
"moment-timezone": "~0.5.27",
"mongodb": "~3.6.2"
},
"dependencies": {
"debug": {
"version": "4.3.2",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz",
"integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==",
"requires": {
"ms": "2.1.2"
}
},
"ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
}
}
},
"ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@@ -261,6 +289,15 @@
}
}
},
"call-bind": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
"integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==",
"requires": {
"function-bind": "^1.1.1",
"get-intrinsic": "^1.0.2"
}
},
"camelcase": {
"version": "5.3.1",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
@@ -450,6 +487,15 @@
"vary": "^1"
}
},
"cron-parser": {
"version": "3.5.0",
"resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-3.5.0.tgz",
"integrity": "sha512-wyVZtbRs6qDfFd8ap457w3XVntdvqcwBGxBoTvJQH9KGVKL/fB+h2k3C8AqiVxvUQKN1Ps/Ns46CNViOpVDhfQ==",
"requires": {
"is-nan": "^1.3.2",
"luxon": "^1.26.0"
}
},
"crypto-random-string": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/crypto-random-string/-/crypto-random-string-2.0.0.tgz",
@@ -481,6 +527,24 @@
"assert-plus": "^1.0.0"
}
},
"date.js": {
"version": "0.3.3",
"resolved": "https://registry.npmjs.org/date.js/-/date.js-0.3.3.tgz",
"integrity": "sha512-HgigOS3h3k6HnW011nAb43c5xx5rBXk8P2v/WIT9Zv4koIaVXiH2BURguI78VVp+5Qc076T7OR378JViCnZtBw==",
"requires": {
"debug": "~3.1.0"
},
"dependencies": {
"debug": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz",
"integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
"requires": {
"ms": "2.0.0"
}
}
}
},
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
@@ -510,6 +574,14 @@
"integrity": "sha512-0ISdNousHvZT2EiFlZeZAHBUvSxmKswVCEf8hW7KWgG4a8MVEu/3Vb6uWYozkjylyCxe0JBIiRB1jV45S70WVQ==",
"dev": true
},
"define-properties": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz",
"integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==",
"requires": {
"object-keys": "^1.0.12"
}
},
"delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
@@ -750,6 +822,21 @@
"dev": true,
"optional": true
},
"function-bind": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz",
"integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A=="
},
"get-intrinsic": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz",
"integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==",
"requires": {
"function-bind": "^1.1.1",
"has": "^1.0.3",
"has-symbols": "^1.0.1"
}
},
"get-stream": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz",
@@ -824,12 +911,25 @@
"har-schema": "^2.0.0"
}
},
"has": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
"integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==",
"requires": {
"function-bind": "^1.1.1"
}
},
"has-flag": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
"dev": true
},
"has-symbols": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz",
"integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw=="
},
"has-yarn": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/has-yarn/-/has-yarn-2.1.0.tgz",
@@ -880,6 +980,14 @@
"sshpk": "^1.7.0"
}
},
"human-interval": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/human-interval/-/human-interval-2.0.1.tgz",
"integrity": "sha512-r4Aotzf+OtKIGQCB3odUowy4GfUDTy3aTWTfLd7ZF2gBCy3XW3v/dJLRefZnOFFnjqs5B1TypvS8WarpBkYUNQ==",
"requires": {
"numbered": "^1.1.0"
}
},
"iconv-lite": {
"version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
@@ -971,6 +1079,15 @@
"is-path-inside": "^3.0.1"
}
},
"is-nan": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.3.2.tgz",
"integrity": "sha512-E+zBKpQ2t6MEo1VsonYmluk9NxGrbzpeeLC2xIViuO2EjU2xsXsBPwTr3Ykv9l08UYEVEdWeRZNouaZqF6RN0w==",
"requires": {
"call-bind": "^1.0.0",
"define-properties": "^1.1.3"
}
},
"is-npm": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/is-npm/-/is-npm-4.0.0.tgz",
@@ -1085,6 +1202,11 @@
"integrity": "sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA==",
"dev": true
},
"luxon": {
"version": "1.27.0",
"resolved": "https://registry.npmjs.org/luxon/-/luxon-1.27.0.tgz",
"integrity": "sha512-VKsFsPggTA0DvnxtJdiExAucKdAnwbCCNlMM5ENvHlxubqWd0xhZcdb4XgZ7QFNhaRhilXCFxHuoObP5BNA4PA=="
},
"make-dir": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz",
@@ -1269,6 +1391,11 @@
"boolbase": "^1.0.0"
}
},
"numbered": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/numbered/-/numbered-1.1.0.tgz",
"integrity": "sha512-pv/ue2Odr7IfYOO0byC1KgBI10wo5YDauLhxY6/saNzAdAs0r1SotGCPzzCLNPL0xtrAwWRialLu23AAu9xO1g=="
},
"oauth-sign": {
"version": "0.9.0",
"resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz",
@@ -1279,6 +1406,11 @@
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
"integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM="
},
"object-keys": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
"integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="
},
"on-finished": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",

+ 1
- 0
package.json Dosyayı Görüntüle

@@ -9,6 +9,7 @@
"author": "",
"license": "ISC",
"dependencies": {
"agenda": "^4.1.3",
"axios": "^0.21.1",
"body-parser": "^1.19.0",
"cheerio": "^1.0.0-rc.10",

Loading…
İptal
Kaydet