def load_project(_projectname, spider_path, models_path): """ Load Project File to Database :param spider_path: project spider path :return: """ try: with open(spider_path, 'rb') as fp: spider_script = fp.read().decode('utf8') with open(models_path, 'rb') as fp: models_script = fp.read().decode('utf8') project = Project.objects(name=_projectname).first() if project: project.update(script=spider_script) project.update(models=models_script) else: project = Project(name=_projectname, info="", script=spider_script, models=models_script, generator_interval="60", downloader_interval="60", downloader_dispatch=1) project.save() except Exception: reason = traceback.format_exc() raise CommandError('Failed to load project %s !, Reason: %s' % (spider_path, reason))
def _load_project(self, project_name, spider_path, models_path): """ _load project :param project_name: :param spider_path: :param models_path: :return: """ try: with open(spider_path, 'rb') as fp: spider_script = fp.read().decode('utf8') with open(models_path, 'rb') as fp: models_script = fp.read().decode('utf8') project = Project.objects(name=project_name).first() if project: project.update(script=spider_script) project.update(models=models_script) else: project = Project(name=project_name, info="", script=spider_script, models=models_script, generator_interval="60", downloader_interval="60", downloader_dispatch=1) project.save() message = 'Successfully load project %s !' % (project_name) return {"status": True, "message": message} except Exception: reason = traceback.format_exc() message = 'Failed to load project %s !, Reason: %s' % (spider_path, reason) return {"status": False, "message": message}
def query_projects_by_name(name): """ Get Projects by Name :return: jobs list """ name = smart_unicode(name) if not name: return if name == "--all": projects = Project.objects() else: projects = Project.objects(name=name) return projects
def handle(*args, **options): """ Create New Projects Handler :param args: :param options: :return: """ type = None for _type in options["type"]: if _type in ["json", "csv"]: type = _type else: raise CommandError("Bad Type Parameter: {0}".format(_type)) for _projectname in options['projectname']: project = Project.objects(name=_projectname).first() if project and type == "json": dump = ResultDump(project) result = dump.dump_as_json() return result elif project and type == "csv": dump = ResultDump(project) result = dump.dump_as_csv() return result else: CommandError("Project does not exist!:{0}".format( project.name))
def run_processor(self, project_name, task): """ Create Project :return: """ try: project_name = project_name.strip() project = Project.objects(name=project_name).first() if project is None: message = 'Project %s does not exist!' % (project_name) return {"status": False, "message": message} if project.status != 2: message = 'Project %s status is not Debug, please set status to DEBUG.' % ( project_name) return {"status": False, "message": message} else: from collector.utils import Processor processor = Processor(task=task) result = processor.run_processor() # processor = Processor(task=task["result"]) # result = processor.run_processor() # print result message = 'Successfully run processor %s !' % (project_name) return {"status": True, "message": message, "task": result} except Exception: reason = traceback.format_exc() message = 'Failed to run project %s !, Reason: %s' % (project_name, reason) return {"status": False, "message": message}
def init_project(self, project_name): """ Initialization Project to execute path :return: """ if not os.path.exists(settings.EXECUTE_PATH): os.mkdir(settings.EXECUTE_PATH) project = Project.objects(name=project_name).first() try: project_name = project.name spider_script = project.script models_script = project.models _spider_path = os.path.join(settings.EXECUTE_PATH, "%s_spider.py" % (project_name)) _models_path = os.path.join(settings.EXECUTE_PATH, "%s_models.py" % (project_name)) execute_init = os.path.join(settings.EXECUTE_PATH, "__init__.py") with open(execute_init, 'w') as fp: fp.write("") with open(_spider_path, 'w') as fp: fp.write(spider_script.encode('utf8')) with open(_models_path, 'w') as fp: fp.write(models_script.encode('utf8')) message = 'Successfully init project %s !' % (project_name) return {"status": True, "message": message} except Exception: reason = traceback.format_exc() message = 'Failed to Init project %s !, Reason: %s' % ( project_name, reason) return {"status": False, "message": message}
def __init__(self, project_id): """ Generator Module Initialization :param str """ self.project = Project.objects(id=project_id).first() InitSpider().load_spider(self.project) self.storage = Storage(self.project)
def __init__(self, task=None, _id=None, project_id=None): """ Processor Module Initialization :param Json """ # Debug Status if isinstance(task, dict): project_id = task.get("project") self.project = Project.objects(id=project_id).first() self.storage = Storage(self.project) self.task = self.storage.package_task(task=task) # Run Status elif _id and project_id: self.project = Project.objects(id=project_id).first() self.storage = Storage(self.project) self.task = self.storage.package_task(_id=_id) else: raise TypeError("Bad Parameters.")
def _filter_processor_projects(): """ Projects Filter :return: """ _projects = Project.objects( status=Project.STATUS_ON).order_by('+priority') projects = [] for project in _projects: projects.append(project) return projects
def dump_task_as_json_by_task_id(self, name, task_id): """ Dump as Json :return: """ name = smart_unicode(name) project = Project.objects(name=name).first() if project: result = self.query_task_by_id(name, task_id) else: result = None return result
def dump_result_as_json_by_name(self, name, page, rows): """ Dump as Json :return: """ name = smart_unicode(name) project = Project.objects(name=name).first() if project: result = self.query_result_by_name(name, page, rows) else: result = None return result
def _filter_generator_projects(): """ Projects Filter :return: """ _projects = Project.objects( status=Project.STATUS_ON).order_by('+priority') projects = [] for project in _projects: now = datetime.datetime.now() last = project.last_generator_time interval = int(project.generator_interval) if not project.last_generator_time: projects.append(project) project.update(last_generator_time=now) continue next = last + datetime.timedelta(seconds=interval) if next <= now: projects.append(project) project.update(last_generator_time=now) else: continue return projects
def edit_project_settings(self, data): """ Edit Project Settings :return: """ name = data.get("project").strip() project = Project.objects(name=name).first() if project is None: return { "status": False, "project": name, "message": "Bab Parameters", "code": 4002, } else: try: if data.get("group", False): project.update(group=str(data.get("group").strip())) if data.get("timeout", False): project.update(timeout=int(data.get("timeout".strip()))) if data.get("status", False): project.update(status=int(data.get("status".strip()))) if data.get("priority", False): project.update(priority=int(data.get("priority".strip()))) if data.get("info", False): project.update(info=str(data.get("info".strip()))) if data.get("script", False): project.update( script=str(data.get("script".strip()).encode('utf8'))) if data.get("interval", False): project.update(generator_interval=str( int(data.get("interval").strip()))) if data.get("ip_limit", False): project.update(downloader_interval=str( int(data.get("ip_limit").strip()))) if data.get("number", False): project.update( downloader_dispatch=int(data.get("number").strip())) project.update(update_datetime=datetime.datetime.now()) except ValueError: return { "status": False, "project": name, "message": "Bad Parameters", "reason": traceback.format_exc(), "code": 4003, } except Exception: return { "status": False, "project": name, "message": "Internal Server Error", "code": 5001 } return { "status": True, "project": name, "message": "Operation Succeeded", "code": 2001 }