def list_jobs(): """ 查看任务 """ server_host = request.args.get("server_host") server_name = request.args.get("server_name") project_name = request.args.get("project_name") scrapyd = ScrapydAPI(server_host) jobs = scrapyd.list_jobs(project_name) lst = [] for job_status, job_list in jobs.items(): for job in job_list: item = { "status": job_status, "spider": job["spider"], "start_time": scrapyd_utils.format_time(job.get("start_time", "")), "end_time": scrapyd_utils.format_time(job.get("end_time", "")), "timestamp": scrapyd_utils.get_timestamp(job.get("end_time"), job.get("start_time")), "job_id": job["id"] } lst.append(item) data = { "server_host": server_host, "server_name": server_name, "project_name": project_name, "jobs": lst, } return jsonify(data)
def list_projects(): """ 显示项目 """ server_host = request.args.get("server_host") server_name = request.args.get("server_name") scrapyd = ScrapydAPI(server_host) projects = scrapyd.list_projects() lst = [] for project in projects: versions = scrapyd.list_versions(project) for version in versions: item = { "project_name": project, "human_version": scrapyd_utils.format_version(version), "version": version } lst.append(item) data = { "server_name": server_name, "server_host": server_host, "projects": lst } return jsonify(data)
def delete_version(): """ 删除项目 """ server_host = request.args.get("server_host") server_name = request.args.get("server_name") project_name = request.args.get("project_name") version = request.args.get("version") scrapyd = ScrapydAPI(server_host) result = scrapyd.delete_version(project_name, version) return jsonify({"message": result})
def get_server_status(server_list): """ 获取服务器状态 版本不一致 scrapyd=1.2.0 服务器使用 scrapyd=1.1.0 没有接口 daemon_status :param server_list: :return: """ servers = [] count = 0 for item in server_list: server_name = item["server_name"] server_host = item["server_host"] count += 1 scrapyd = ScrapydAPI(server_host) server_status = scrapyd.daemon_status() # 兼容老版本 if server_status.get("status") == "error": projects = scrapyd.list_projects() print("{}: {}".format(server_host, projects)) if len(projects) == 0: status = "error" else: status = "ok" server_status = { "status": status, } status = defaultdict(int) for project in set(projects): jobs = scrapyd.list_jobs(project) for key, value in jobs.items(): status[key] += len(value) server_status.update(status) item = { "index": count, "server_name": server_name, "server_host": server_host, "server_status": server_status, } servers.append(item) return servers
def cancel(): """ 取消爬虫运行 """ server_host = request.args.get("server_host") server_name = request.args.get("server_name") project_name = request.args.get("project_name") job_id = request.args.get("job_id") scrapyd = ScrapydAPI(server_host) result = scrapyd.cancel(project_name, job_id) return jsonify({"message": result})
def schedule(): """ 调度运行爬虫 """ server_host = request.args.get("server_host") server_name = request.args.get("server_name") project_name = request.args.get("project_name") spider_name = request.args.get("spider_name") scrapyd = ScrapydAPI(server_host) result = scrapyd.schedule(project_name, spider_name) return jsonify({"message": result})
def list_spiders(): """ 查看爬虫列表 """ server_host = request.args.get("server_host") server_name = request.args.get("server_name") project_name = request.args.get("project_name") scrapyd = ScrapydAPI(server_host) spiders = scrapyd.list_spiders(project_name) data = { "server_name": server_name, "server_host": server_host, "project_name": project_name, "spiders": [{"spider_name": spider} for spider in spiders] } return jsonify(data)
def run_spider(**kwargs): """ 运行爬虫函数 :param kwargs: :return: """ server_host = kwargs["server_host"] server_name = kwargs["server_name"] project_name = kwargs["project_name"] spider_name = kwargs["spider_name"] job_id = kwargs["job_id"] times = kwargs.get("times") times += 1 scheduler_logging.info("运行爬虫:[{}][{}] {}-{} => {}".format( times, server_host, server_name, project_name, spider_name)) scrapyd = ScrapydAPI(server_host) result = scrapyd.schedule(project_name, spider_name) # 调度历史 with scheduler_history.lock: history.insert(job_id=job_id, server_host=server_host, server_name=server_name, project_name=project_name, spider_name=spider_name, spider_job_id=result) scheduler_logging.info("结束爬虫:[{}] {}-{} => {} {}".format( server_host, server_name, project_name, spider_name, result)) kwargs["times"] = times kwargs["spider_job_id"] = result kwargs["last_run_time"] = datetime.now().strftime(DATE_TIME_FORMAT) set_schedule(kwargs)
def cancel_all_spider(server): """ 取消服务器上所有的爬虫任务 :param server: :return: """ scrapyd = ScrapydAPI(server) projects = scrapyd.list_projects() for project in projects: jobs = scrapyd.list_jobs(project) for job, value in jobs.items(): print(job, value) for status in value: uid = status.get("id") print("{}: {}".format(project, uid)) scrapyd.cancel(project, uid)