示例#1
0
def project_create():
    project_name = request.form['project_name']
    project = Project()
    project.project_name = project_name
    db.session.add(project)
    db.session.commit()
    return redirect("/project/%s/spider/deploy" % project.id, code=302)
示例#2
0
 def post(self):
     project_name = request.form['project_name']
     project = Project()
     project.project_name = project_name
     db.session.add(project)
     db.session.commit()
     return project.to_dict()
示例#3
0
 def get_project_list(self):
     data = request("get", self._scrapyd_url() + "/listprojects.json", return_type="json")
     result = []
     if data:
         for project_name in data['projects']:
             project = Project()
             project.project_name = project_name
             result.append(project)
     return result
示例#4
0
def job_add(project_id):
    project = Project.find_project_by_id(project_id)
    job_instance = JobInstance()
    job_instance.spider_name = request.form['spider_name']
    job_instance.project_id = project_id
    job_instance.spider_arguments = request.form['spider_arguments']
    job_instance.priority = request.form.get('priority', 0)
    job_instance.run_type = request.form['run_type']
    # chose daemon manually
    if request.form['daemon'] != 'auto':
        spider_args = []
        if request.form['spider_arguments']:
            spider_args = request.form['spider_arguments'].split(",")
        spider_args.append("daemon={}".format(request.form['daemon']))
        job_instance.spider_arguments = ','.join(spider_args)
    if job_instance.run_type == JobRunType.ONETIME:
        job_instance.enabled = -1
        db.session.add(job_instance)
        db.session.commit()
        agent.start_spider(job_instance)
    if job_instance.run_type == JobRunType.PERIODIC:
        job_instance.cron_minutes = request.form.get('cron_minutes') or '0'
        job_instance.cron_hour = request.form.get('cron_hour') or '*'
        job_instance.cron_day_of_month = request.form.get(
            'cron_day_of_month') or '*'
        job_instance.cron_day_of_week = request.form.get(
            'cron_day_of_week') or '*'
        job_instance.cron_month = request.form.get('cron_month') or '*'
        # set cron exp manually
        if request.form.get('cron_exp'):
            job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_day_of_week, job_instance.cron_month = \
                request.form['cron_exp'].split(' ')
        db.session.add(job_instance)
        db.session.commit()
    return redirect(request.referrer, code=302)
示例#5
0
 def get(self, project_id):
     project = Project.find_project_by_id(project_id)
     return [
         spider_instance.to_dict()
         for spider_instance in SpiderInstance.query.filter_by(
             project_id=project_id).all()
     ]
示例#6
0
def job_periodic(project_id):
    project = Project.find_project_by_id(project_id)
    job_instance_list = [
        job_instance.to_dict() for job_instance in JobInstance.query.filter_by(
            run_type="periodic", project_id=project_id).all()
    ]
    return render_template("job_periodic.html",
                           job_instance_list=job_instance_list)
示例#7
0
 def log_url(self, job_execution):
     job_instance = JobInstance.find_job_instance_by_id(
         job_execution.job_instance_id)
     project = Project.find_project_by_id(job_instance.project_id)
     for spider_service_instance in self.spider_service_instances:
         if spider_service_instance.server == job_execution.running_on:
             return spider_service_instance.log_url(
                 project.project_name, job_instance.spider_name,
                 job_execution.service_job_execution_id)
示例#8
0
 def cancel_spider(self, job_execution):
     job_instance = JobInstance.find_job_instance_by_id(
         job_execution.job_instance_id)
     project = Project.find_project_by_id(job_instance.project_id)
     for spider_service_instance in self.spider_service_instances:
         if spider_service_instance.server == job_execution.running_on:
             if spider_service_instance.cancel_spider(
                     project.project_name,
                     job_execution.service_job_execution_id):
                 job_execution.end_time = datetime.datetime.now()
                 job_execution.running_status = SpiderStatus.CANCELED
                 db.session.commit()
             break
示例#9
0
def inject_project():
    project_context = {}
    project_context['project_list'] = Project.query.all()
    if project_context['project_list'] and (not session.get('project_id')):
        project = Project.query.first()
        session['project_id'] = project.id
    if session.get('project_id'):
        project_context['project'] = Project.find_project_by_id(
            session['project_id'])
        project_context['spider_list'] = [
            spider_instance.to_dict()
            for spider_instance in SpiderInstance.query.filter_by(
                project_id=session['project_id']).all()
        ]
    else:
        project_context['project'] = {}
    return project_context
示例#10
0
def spider_egg_upload(project_id):
    project = Project.find_project_by_id(project_id)
    if 'file' not in request.files:
        flash('No file part')
        return redirect(request.referrer)
    file = request.files['file']
    # if user does not select file, browser also
    # submit a empty part without filename
    if file.filename == '':
        flash('No selected file')
        return redirect(request.referrer)
    if file:
        filename = secure_filename(file.filename)
        dst = os.path.join(tempfile.gettempdir(), filename)
        file.save(dst)
        agent.deploy(project, dst)
        flash('deploy success!')
    return redirect(request.referrer)
示例#11
0
 def start_spider(self, job_instance):
     project = Project.find_project_by_id(job_instance.project_id)
     spider_name = job_instance.spider_name
     #arguments = {}
     #if job_instance.spider_arguments:
     #    arguments = dict(map(lambda x: x.split("="), job_instance.spider_arguments.split(",")))
     from collections import defaultdict
     arguments = defaultdict(list)
     if job_instance.spider_arguments:
         for k, v in list(
                 map(lambda x: x.split('=', 1),
                     job_instance.spider_arguments.split(','))):
             arguments[k].append(v)
     threshold = 0
     daemon_size = len(self.spider_service_instances)
     if job_instance.priority == JobPriority.HIGH:
         threshold = int(daemon_size / 2)
     if job_instance.priority == JobPriority.HIGHEST:
         threshold = int(daemon_size)
     threshold = 1 if threshold == 0 else threshold
     candidates = self.spider_service_instances
     leaders = []
     if 'daemon' in arguments:
         for candidate in candidates:
             if candidate.server == arguments['daemon']:
                 leaders = [candidate]
     else:
         # TODO optimize some better func to vote the leader
         for i in range(threshold):
             leaders.append(random.choice(candidates))
     for leader in leaders:
         serviec_job_id = leader.start_spider(project.project_name,
                                              spider_name, arguments)
         job_execution = JobExecution()
         job_execution.project_id = job_instance.project_id
         job_execution.service_job_execution_id = serviec_job_id
         job_execution.job_instance_id = job_instance.id
         job_execution.create_time = datetime.datetime.now()
         job_execution.running_on = leader.server
         db.session.add(job_execution)
         db.session.commit()
示例#12
0
def service_stats(project_id):
    project = Project.find_project_by_id(project_id)
    run_stats = JobExecution.list_run_stats_by_hours(project_id)
    return render_template("server_stats.html", run_stats=run_stats)
示例#13
0
def spider_deploy(project_id):
    project = Project.find_project_by_id(project_id)
    return render_template("spider_deploy.html")
示例#14
0
def project_delete(project_id):
    project = Project.find_project_by_id(project_id)
    agent.delete_project(project)
    db.session.delete(project)
    db.session.commit()
    return redirect("/project/manage", code=302)
示例#15
0
 def get_project_list(self):
     project_list = self.spider_service_instances[0].get_project_list()
     Project.load_project(project_list)
     return [project.to_dict() for project in Project.query.all()]