Exemplo n.º 1
0
def list_jobs():
    """
    查看任务
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")
    project_name = request.args.get("project_name")

    scrapyd = ScrapydAPI(server_host)
    jobs = scrapyd.list_jobs(project_name)
    lst = []
    for job_status, job_list in jobs.items():
        for job in job_list:
            item = {
                "status": job_status,
                "spider": job["spider"],
                "start_time": scrapyd_utils.format_time(job.get("start_time", "")),
                "end_time": scrapyd_utils.format_time(job.get("end_time", "")),
                "timestamp": scrapyd_utils.get_timestamp(job.get("end_time"), job.get("start_time")),
                "job_id": job["id"]
            }
            lst.append(item)

    data = {
        "server_host": server_host,
        "server_name": server_name,
        "project_name": project_name,
        "jobs": lst,
    }

    return jsonify(data)
Exemplo n.º 2
0
def list_projects():
    """
    显示项目
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")

    scrapyd = ScrapydAPI(server_host)
    projects = scrapyd.list_projects()

    lst = []
    for project in projects:
        versions = scrapyd.list_versions(project)
        for version in versions:
            item = {
                "project_name": project,
                "human_version": scrapyd_utils.format_version(version),
                "version": version
            }
            lst.append(item)

    data = {
        "server_name": server_name,
        "server_host": server_host,
        "projects": lst
    }

    return jsonify(data)
Exemplo n.º 3
0
def delete_version():
    """
    删除项目
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")
    project_name = request.args.get("project_name")
    version = request.args.get("version")
    scrapyd = ScrapydAPI(server_host)
    result = scrapyd.delete_version(project_name, version)
    return jsonify({"message": result})
Exemplo n.º 4
0
def get_server_status(server_list):
    """
    获取服务器状态  版本不一致
    scrapyd=1.2.0
    服务器使用 scrapyd=1.1.0 没有接口 daemon_status

    :param server_list:
    :return:
    """
    servers = []
    count = 0

    for item in server_list:

        server_name = item["server_name"]
        server_host = item["server_host"]

        count += 1
        scrapyd = ScrapydAPI(server_host)
        server_status = scrapyd.daemon_status()

        # 兼容老版本
        if server_status.get("status") == "error":

            projects = scrapyd.list_projects()
            print("{}: {}".format(server_host, projects))

            if len(projects) == 0:
                status = "error"
            else:
                status = "ok"

            server_status = {
                "status": status,
            }

            status = defaultdict(int)
            for project in set(projects):
                jobs = scrapyd.list_jobs(project)

                for key, value in jobs.items():
                    status[key] += len(value)

            server_status.update(status)

        item = {
            "index": count,
            "server_name": server_name,
            "server_host": server_host,
            "server_status": server_status,
        }
        servers.append(item)
    return servers
Exemplo n.º 5
0
def cancel():
    """
    取消爬虫运行
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")
    project_name = request.args.get("project_name")
    job_id = request.args.get("job_id")

    scrapyd = ScrapydAPI(server_host)
    result = scrapyd.cancel(project_name, job_id)

    return jsonify({"message": result})
Exemplo n.º 6
0
def schedule():
    """
    调度运行爬虫
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")
    project_name = request.args.get("project_name")
    spider_name = request.args.get("spider_name")

    scrapyd = ScrapydAPI(server_host)
    result = scrapyd.schedule(project_name, spider_name)

    return jsonify({"message": result})
Exemplo n.º 7
0
def list_spiders():
    """
    查看爬虫列表
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")
    project_name = request.args.get("project_name")

    scrapyd = ScrapydAPI(server_host)
    spiders = scrapyd.list_spiders(project_name)

    data = {
        "server_name": server_name,
        "server_host": server_host,
        "project_name": project_name,
        "spiders": [{"spider_name": spider} for spider in spiders]
    }
    return jsonify(data)
Exemplo n.º 8
0
def run_spider(**kwargs):
    """
    运行爬虫函数
    :param kwargs:
    :return:
    """
    server_host = kwargs["server_host"]
    server_name = kwargs["server_name"]
    project_name = kwargs["project_name"]
    spider_name = kwargs["spider_name"]
    job_id = kwargs["job_id"]
    times = kwargs.get("times")
    times += 1

    scheduler_logging.info("运行爬虫:[{}][{}] {}-{} => {}".format(
        times, server_host, server_name, project_name, spider_name))

    scrapyd = ScrapydAPI(server_host)
    result = scrapyd.schedule(project_name, spider_name)

    # 调度历史
    with scheduler_history.lock:
        history.insert(job_id=job_id,
                       server_host=server_host,
                       server_name=server_name,
                       project_name=project_name,
                       spider_name=spider_name,
                       spider_job_id=result)

    scheduler_logging.info("结束爬虫:[{}] {}-{} => {} {}".format(
        server_host, server_name, project_name, spider_name, result))

    kwargs["times"] = times
    kwargs["spider_job_id"] = result
    kwargs["last_run_time"] = datetime.now().strftime(DATE_TIME_FORMAT)

    set_schedule(kwargs)
Exemplo n.º 9
0
def cancel_all_spider(server):
    """
    取消服务器上所有的爬虫任务
    :param server:
    :return:
    """
    scrapyd = ScrapydAPI(server)
    projects = scrapyd.list_projects()
    for project in projects:
        jobs = scrapyd.list_jobs(project)
        for job, value in jobs.items():
            print(job, value)
            for status in value:
                uid = status.get("id")
                print("{}: {}".format(project, uid))

                scrapyd.cancel(project, uid)