Пример #1
0
def list_spiders(request):
	spiders ={}
	result = endpoint.list_spider(OPTS)
	spider_names = result["spiders"]
	for spider_name in spider_names:
		spiders[spider_name]={}

	jobs = endpoint.list_jobs(OPTS)
	running_jobs= jobs["running"]
	pending_jobs= jobs["pending"]
	finished_jobs= jobs["finished"]
	for job in running_jobs:
		job_in_spider = job["spider"]
		job_id = job["id"]
		pid = job["pid"] if job.has_key("pid") else ""
		r_key = "%s:status:%s:latest"%(job_in_spider,job_id)
		status_key = "%s:status:%s:status"%(job_in_spider,job_id)
		status =rserver.hgetall(r_key)
		redis_status =rserver.get(status_key)
		if len(status)==0:
			status["status"]="pending"
		elif redis_status == "stopping":
			status["status"]="stopping"
		elif redis_status == "finished":
			status["status"] == "finished"
		else:
			status["status"]="running"
		status["pid"]=pid
		spiders[job_in_spider][job_id]=status

	for job in pending_jobs:
		job_in_spider = job["spider"]
		job_id = job["id"]
		pid = job["pid"] if job.has_key("pid") else ""
		r_key = "%s:status:%s:latest"%(job_in_spider,job_id)
		status =rserver.hgetall(r_key)
		status["status"]="pending"
		status["pid"]=pid
		spiders[job_in_spider][job_id]=status

	for job in finished_jobs:
		job_in_spider = job["spider"]
		job_id = job["id"]
		pid = job["pid"] if job.has_key("pid") else ""
		r_key = "%s:status:%s:latest"%(job_in_spider,job_id)
		status_key = "%s:status:%s:status"%(job_in_spider,job_id)
		redis_status = rserver.get(status_key)
		#if redis_status != "finished":
		#	rserver.set(status_key, "finished")
		status =rserver.hgetall(r_key)
		if len(status)==0:
			status["status"]="pending"
		else:
			status["status"]="finished"
		status["pid"]=pid
		spiders[job_in_spider][job_id]=status

	return HttpResponse(json.dumps(spiders))
Пример #2
0
def list_spiders(request):
    spiders = {}
    result = endpoint.list_spider(OPTS)
    spider_names = result["spiders"]
    for spider_name in spider_names:
        spiders[spider_name] = {}

    jobs = endpoint.list_jobs(OPTS)
    running_jobs = jobs["running"]
    pending_jobs = jobs["pending"]
    finished_jobs = jobs["finished"]
    for job in running_jobs:
        job_in_spider = job["spider"]
        job_id = job["id"]
        pid = job["pid"] if job.has_key("pid") else ""
        r_key = "%s:status:%s:latest" % (job_in_spider, job_id)
        status_key = "%s:status:%s:status" % (job_in_spider, job_id)
        status = rserver.hgetall(r_key)
        redis_status = rserver.get(status_key)
        if len(status) == 0:
            status["status"] = "pending"
        elif redis_status == "stopping":
            status["status"] = "stopping"
        elif redis_status == "finished":
            status["status"] == "finished"
        else:
            status["status"] = "running"
        status["pid"] = pid
        spiders[job_in_spider][job_id] = status

    for job in pending_jobs:
        job_in_spider = job["spider"]
        job_id = job["id"]
        pid = job["pid"] if job.has_key("pid") else ""
        r_key = "%s:status:%s:latest" % (job_in_spider, job_id)
        status = rserver.hgetall(r_key)
        status["status"] = "pending"
        status["pid"] = pid
        spiders[job_in_spider][job_id] = status

    for job in finished_jobs:
        job_in_spider = job["spider"]
        job_id = job["id"]
        pid = job["pid"] if job.has_key("pid") else ""
        r_key = "%s:status:%s:latest" % (job_in_spider, job_id)
        status_key = "%s:status:%s:status" % (job_in_spider, job_id)
        redis_status = rserver.get(status_key)
        #if redis_status != "finished":
        #	rserver.set(status_key, "finished")
        status = rserver.hgetall(r_key)
        if len(status) == 0:
            status["status"] = "pending"
        else:
            status["status"] = "finished"
        status["pid"] = pid
        spiders[job_in_spider][job_id] = status

    return HttpResponse(json.dumps(spiders))
Пример #3
0
def new_crawl(request):
	name = request.GET.get("name")
	if name==None:
		status = {"status":"Error","message":"invalid name"}
		return HttpResponse(status)
	spider_counter= 0
	jobs = endpoint.list_jobs(OPTS)
	running_jobs= jobs["running"]
	for job in running_jobs:
		job_in_spider = job["spider"]
		if job_in_spider==name:
			spider_counter+=1
	if spider_counter>=MAX_INST:
		status={"status":"Error","message":"maximum instances! can not fork any more!"}
		return HttpResponse(json.dumps(status))
	result =endpoint.new_crawl(name,OPTS)
	return HttpResponse(json.dumps(result))
Пример #4
0
def new_crawl(request):
    name = request.GET.get("name")
    if name == None:
        status = {"status": "Error", "message": "invalid name"}
        return HttpResponse(status)
    spider_counter = 0
    jobs = endpoint.list_jobs(OPTS)
    running_jobs = jobs["running"]
    for job in running_jobs:
        job_in_spider = job["spider"]
        if job_in_spider == name:
            spider_counter += 1
    if spider_counter >= MAX_INST:
        status = {
            "status": "Error",
            "message": "maximum instances! can not fork any more!"
        }
        return HttpResponse(json.dumps(status))
    result = endpoint.new_crawl(name, OPTS)
    return HttpResponse(json.dumps(result))