When I was working with the SDK, I found that the SDK was not very convenient for schedules and deployment of multiple spiders, so I wondered if it could be designed to look like the following
.
| ── packages
│ | ── js_spiders
│ | | ── js_spider_1
│ | | | ── index.js
│ | | ── js_spider_2
│ | | | ── index.js
│ | | ── package.json
│ | | ── .....
│ | ── py_spiders
│ | | ── py_spider_1
│ | | | ── main.py
│ | | ── py_spider_2
│ | | | ── main.py
│ | | ── setup.py
│ | | ── .....
│ ── crawlab.json
│ ── makefile
{
"spiders": [
{
"path": "packages/js_spider",
"exclude_path": "node_modules",
"name": "js spiders",
"description": "js spiders",
"cmd": "node",
"schedules": [
{
"name": "js spider 1 cron",
"cron": "* 1 * * *",
"command": "node js_spider_1/index.js",
"param": "",
"mode": "random",
"description": "js spider 1 cron",
"enabled": true
},
{
"name": "js spider 2 cron",
"cron": "* 2 * * *",
"command": "node js_spider_2/index.js",
"param": "",
"mode": "random",
"description": "js spider 2 cron",
"enabled": true
}
]
},
{
"path": "packages/py_spider",
"exclude_path": ".venv",
"name": "py spiders",
"description": "py spiders",
"cmd": "python",
"schedules": [
{
"name": "py spider 1 cron",
"cron": "* 1 * * *",
"command": "python py_spider_1/main.py",
"param": "",
"mode": "random",
"description": "py spider 1 cron",
"enabled": true
},
{
"name": "py spider 2 cron",
"cron": "* 2 * * *",
"command": "python py_spider_2/main.py",
"param": "",
"mode": "random",
"description": "py spider 2 cron",
"enabled": true
}
]
}
]
}