示例#1
0

# Build the database:
from SpiderKeeper.app.spider.model import *


def init_database():
    db.init_app(app)
    db.create_all()


# regist spider service proxy
from SpiderKeeper.app.proxy.spiderctrl import SpiderAgent
from SpiderKeeper.app.proxy.contrib.scrapy import ScrapydProxy

agent = SpiderAgent()


def regist_server():
    if app.config.get('SERVER_TYPE') == 'scrapyd':
        for server in app.config.get("SERVERS"):
            agent.regist(ScrapydProxy(server))


from SpiderKeeper.app.spider.controller import api_spider_bp

# Register blueprint(s)
app.register_blueprint(api_spider_bp)

# start sync job status scheduler
from SpiderKeeper.app.schedulers.common import sync_job_execution_status_job, sync_spiders, \
示例#2
0
from SpiderKeeper.app.schedulers.model import *


def init_database():
    db.init_app(app)
    db.create_all()


# regist spider service proxy
# SpiderProxy 单个爬虫服务类, 继承单个爬虫服务基类SpiderServiceProxy, 实现基类的功能
# SpiderAgent 爬虫代理服务类, 其实也就是把多个爬虫服务代理的实例统一做一遍轮询操作
from SpiderKeeper.app.proxy.spiderctrl import SpiderAgent
from SpiderKeeper.app.proxy.contrib.scrapy import ScrapydProxy
from SpiderKeeper.app.param_config.model import Serversmachine

agent = SpiderAgent()  # 实例化一个蜘蛛


def regist_server():
    # 从数据库中获取主爬虫的服务器并进行注册
    machines = Serversmachine.query.all()
    for machine in machines:
        machine_dict = machine.to_dict()
        agent.regist(ScrapydProxy(machine_dict['server_ip']), machine_dict.get("is_master"))


# ----------------- 注册各个模块的蓝本 -----------------#
from SpiderKeeper.app.spider.controller import ctrl_spider_bp
from SpiderKeeper.app.user.api import api_user_bp
from SpiderKeeper.app.projects.api import api_project_bp
from SpiderKeeper.app.spider.api import api_spider_bp