def job_add(project_id): project = Project.find_project_by_id(project_id) job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = request.form['spider_arguments'] job_instance.priority = request.form.get('priority', 0) job_instance.run_type = request.form['run_type'] # chose daemon manually if request.form['daemon'] != 'auto': spider_args = [] if request.form['spider_arguments']: spider_args = request.form['spider_arguments'].split(",") spider_args.append("daemon={}".format(request.form['daemon'])) job_instance.spider_arguments = ','.join(spider_args) if job_instance.run_type == JobRunType.ONETIME: job_instance.enabled = -1 db.session.add(job_instance) db.session.commit() agent.start_spider(job_instance) if job_instance.run_type == JobRunType.PERIODIC: job_instance.cron_minutes = request.form.get('cron_minutes') or '0' job_instance.cron_hour = request.form.get('cron_hour') or '*' job_instance.cron_day_of_month = request.form.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = request.form.get( 'cron_day_of_week') or '*' job_instance.cron_month = request.form.get('cron_month') or '*' # set cron exp manually if request.form.get('cron_exp'): job_instance.cron_minutes, job_instance.cron_hour, job_instance.cron_day_of_month, job_instance.cron_month, job_instance.cron_day_of_week = \ request.form['cron_exp'].split(' ') db.session.add(job_instance) db.session.commit() return redirect(request.referrer, code=302)
def post(self, project_id): post_data = request.form if post_data: job_instance = JobInstance() job_instance.spider_name = post_data['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = post_data.get('spider_arguments') job_instance.desc = post_data.get('desc') job_instance.tags = post_data.get('tags') job_instance.run_type = post_data['run_type'] job_instance.priority = post_data.get('priority', 0) if job_instance.run_type == "periodic": job_instance.cron_minutes = post_data.get( 'cron_minutes') or '0' job_instance.cron_hour = post_data.get('cron_hour') or '*' job_instance.cron_day_of_month = post_data.get( 'cron_day_of_month') or '*' job_instance.cron_day_of_week = post_data.get( 'cron_day_of_week') or '*' job_instance.cron_month = post_data.get('cron_month') or '*' job_instance.cron_month = post_data.get('cron_month') or '*' job_instance.cron_month = post_data.get('cron_month') or '*' job_instance.start_tasks = post_data.get('start_tasks', 1) job_instance.max_start_tasks = post_data.get( 'max_start_tasks', 1) db.session.add(job_instance) db.session.commit() return True
def job_back_in_time(project_id): if not config.BACK_IN_TIME_ENABLED: return redirect(request.referrer, code=302) spider_names = request.form.getlist('spider_name') for spider in spider_names: job_instance = JobInstance() job_instance.project_id = project_id job_instance.spider_name = spider spider_args = request.form['spider_arguments'].split(",") spider_args.append("--callback={}".format(request.form['callback'])) spider_args.append("SCRAPY_PROJECT=SCRAPY_PROJECT") job_instance.spider_arguments = ','.join(spider_args) job_instance.priority = request.form.get('priority', 0) job_instance.run_type = JobRunType.ONETIME job_instance.overlapping = True # chose daemon manually if request.form['daemon'] != 'auto': spider_args = [] if request.form['spider_arguments']: spider_args = request.form['spider_arguments'].split(",") spider_args.append("daemon={}".format(request.form['daemon'])) job_instance.spider_arguments = ','.join(spider_args) job_instance.enabled = -1 db.session.add(job_instance) try: db.session.commit() except Exception as e: db.session.rollback() raise e agent.run_back_in_time(job_instance) return redirect(request.referrer, code=302)
def _run_spider(spider_name, project_id): """ Run a spider :param spider_name: :param project_id: :return: """ job_instance = JobInstance() job_instance.project_id = project_id job_instance.spider_name = spider_name job_instance.priority = JobPriority.NORMAL job_instance.run_type = JobRunType.ONETIME job_instance.overlapping = True job_instance.enabled = -1 # settings for tempering the requests throttle_value = _get_throttle_value(spider_name, project_id) job_instance.spider_arguments = "setting=AUTOTHROTTLE_TARGET_CONCURRENCY={}".format( throttle_value) job_instance.throttle_concurrency = throttle_value db.session.add(job_instance) try: db.session.commit() except Exception as e: db.session.rollback() raise e agent.start_spider(job_instance)
def put(self, project_id, spider_id): spider_instance = SpiderInstance.query.filter_by(project_id=project_id, id=spider_id).first() if not spider_instance: abort(404) job_instance = JobInstance() job_instance.spider_name = spider_instance.spider_name job_instance.project_id = project_id job_instance.spider_arguments = request.form.get('spider_arguments') job_instance.desc = request.form.get('desc') job_instance.tags = request.form.get('tags') job_instance.run_type = JobRunType.ONETIME job_instance.priority = request.form.get('priority', 0) job_instance.enabled = -1 db.session.add(job_instance) db.session.commit() agent.start_spider(job_instance) return True
def post(self, project_id): post_data = request.form if post_data: job_instance = JobInstance() job_instance.spider_name = post_data['spider_name'] job_instance.project_id = project_id job_instance.spider_arguments = post_data.get('spider_arguments') job_instance.desc = post_data.get('desc') job_instance.tags = post_data.get('tags') job_instance.run_type = post_data['run_type'] job_instance.priority = post_data.get('priority', 0) if job_instance.run_type == "periodic": job_instance.cron_minutes = post_data.get('cron_minutes') or '0' job_instance.cron_hour = post_data.get('cron_hour') or '*' job_instance.cron_day_of_month = post_data.get('cron_day_of_month') or '*' job_instance.cron_day_of_week = post_data.get('cron_day_of_week') or '*' job_instance.cron_month = post_data.get('cron_month') or '*' db.session.add(job_instance) try: db.session.commit() except Exception as e: db.session.rollback() raise e return True