def list_spiders(request): spiders ={} result = endpoint.list_spider(OPTS) spider_names = result["spiders"] for spider_name in spider_names: spiders[spider_name]={} jobs = endpoint.list_jobs(OPTS) running_jobs= jobs["running"] pending_jobs= jobs["pending"] finished_jobs= jobs["finished"] for job in running_jobs: job_in_spider = job["spider"] job_id = job["id"] pid = job["pid"] if job.has_key("pid") else "" r_key = "%s:status:%s:latest"%(job_in_spider,job_id) status_key = "%s:status:%s:status"%(job_in_spider,job_id) status =rserver.hgetall(r_key) redis_status =rserver.get(status_key) if len(status)==0: status["status"]="pending" elif redis_status == "stopping": status["status"]="stopping" elif redis_status == "finished": status["status"] == "finished" else: status["status"]="running" status["pid"]=pid spiders[job_in_spider][job_id]=status for job in pending_jobs: job_in_spider = job["spider"] job_id = job["id"] pid = job["pid"] if job.has_key("pid") else "" r_key = "%s:status:%s:latest"%(job_in_spider,job_id) status =rserver.hgetall(r_key) status["status"]="pending" status["pid"]=pid spiders[job_in_spider][job_id]=status for job in finished_jobs: job_in_spider = job["spider"] job_id = job["id"] pid = job["pid"] if job.has_key("pid") else "" r_key = "%s:status:%s:latest"%(job_in_spider,job_id) status_key = "%s:status:%s:status"%(job_in_spider,job_id) redis_status = rserver.get(status_key) #if redis_status != "finished": # rserver.set(status_key, "finished") status =rserver.hgetall(r_key) if len(status)==0: status["status"]="pending" else: status["status"]="finished" status["pid"]=pid spiders[job_in_spider][job_id]=status return HttpResponse(json.dumps(spiders))
def list_spiders(request): spiders = {} result = endpoint.list_spider(OPTS) spider_names = result["spiders"] for spider_name in spider_names: spiders[spider_name] = {} jobs = endpoint.list_jobs(OPTS) running_jobs = jobs["running"] pending_jobs = jobs["pending"] finished_jobs = jobs["finished"] for job in running_jobs: job_in_spider = job["spider"] job_id = job["id"] pid = job["pid"] if job.has_key("pid") else "" r_key = "%s:status:%s:latest" % (job_in_spider, job_id) status_key = "%s:status:%s:status" % (job_in_spider, job_id) status = rserver.hgetall(r_key) redis_status = rserver.get(status_key) if len(status) == 0: status["status"] = "pending" elif redis_status == "stopping": status["status"] = "stopping" elif redis_status == "finished": status["status"] == "finished" else: status["status"] = "running" status["pid"] = pid spiders[job_in_spider][job_id] = status for job in pending_jobs: job_in_spider = job["spider"] job_id = job["id"] pid = job["pid"] if job.has_key("pid") else "" r_key = "%s:status:%s:latest" % (job_in_spider, job_id) status = rserver.hgetall(r_key) status["status"] = "pending" status["pid"] = pid spiders[job_in_spider][job_id] = status for job in finished_jobs: job_in_spider = job["spider"] job_id = job["id"] pid = job["pid"] if job.has_key("pid") else "" r_key = "%s:status:%s:latest" % (job_in_spider, job_id) status_key = "%s:status:%s:status" % (job_in_spider, job_id) redis_status = rserver.get(status_key) #if redis_status != "finished": # rserver.set(status_key, "finished") status = rserver.hgetall(r_key) if len(status) == 0: status["status"] = "pending" else: status["status"] = "finished" status["pid"] = pid spiders[job_in_spider][job_id] = status return HttpResponse(json.dumps(spiders))
def new_crawl(request): name = request.GET.get("name") if name==None: status = {"status":"Error","message":"invalid name"} return HttpResponse(status) spider_counter= 0 jobs = endpoint.list_jobs(OPTS) running_jobs= jobs["running"] for job in running_jobs: job_in_spider = job["spider"] if job_in_spider==name: spider_counter+=1 if spider_counter>=MAX_INST: status={"status":"Error","message":"maximum instances! can not fork any more!"} return HttpResponse(json.dumps(status)) result =endpoint.new_crawl(name,OPTS) return HttpResponse(json.dumps(result))
def new_crawl(request): name = request.GET.get("name") if name == None: status = {"status": "Error", "message": "invalid name"} return HttpResponse(status) spider_counter = 0 jobs = endpoint.list_jobs(OPTS) running_jobs = jobs["running"] for job in running_jobs: job_in_spider = job["spider"] if job_in_spider == name: spider_counter += 1 if spider_counter >= MAX_INST: status = { "status": "Error", "message": "maximum instances! can not fork any more!" } return HttpResponse(json.dumps(status)) result = endpoint.new_crawl(name, OPTS) return HttpResponse(json.dumps(result))