def init_task_service(url, url_hash, domain, partner_id, request_id): new_init_task_service = TaskService(url=url, url_hash=url_hash, domain=domain, partner_id=partner_id, request_id=request_id) pg_add_wrapper(new_init_task_service)
def init_task_service_with_xpath(url_hash, domain, status_ai, status_xpath, retry_xpath): new_init_task_service = TaskService(url_hash=url_hash, domain=domain, status_ai=status_ai, status_xpath=status_xpath, retry_xpath=retry_xpath) pg_add_wrapper(new_init_task_service)
def create_xpath_parsing_rules(task_main_id, url_hash, create_time=None): if create_time: new_init_rules = XpathParsingRules(task_main_id=task_main_id, url_hash=url_hash, _ctime=create_time) else: new_init_rules = XpathParsingRules(task_main_id=task_main_id, url_hash=url_hash) pg_add_wrapper(new_init_rules)
def init_url_to_content(url, url_hash, content_hash, request_id, replaced=False): new_init_url_content = UrlToContent(url=url, url_hash=url_hash, content_hash=content_hash, request_id=request_id, replaced=replaced) pg_add_wrapper(new_init_url_content)
def init_task_main(url, url_hash, partner_id, domain, request_id, priority, generator=None): new_init_task = TaskMain(url=url, url_hash=url_hash, partner_id=partner_id, domain=domain, request_id=request_id, priority=priority, generator=generator) pg_add_wrapper(new_init_task)
def create_webpages_xpath_with_data(url, url_hash, domain, title, content, content_hash, author=None, publish_date=None, cover=None, meta_description=None, content_p=None, len_p=None, len_char=None): if publish_date is not None and (type(publish_date) is datetime.date or type(publish_date) is datetime.datetime): new_init_webpages = WebpagesPartnerXpath( url=url, url_hash=url_hash, domain=domain, title=title, content=content, content_hash=content_hash, author=author, cover=cover, meta_description=meta_description, content_p=content_p, len_p=len_p, len_char=len_char, publish_date=publish_date) else: new_init_webpages = WebpagesPartnerXpath( url=url, url_hash=url_hash, domain=domain, title=title, content=content, content_hash=content_hash, author=author, cover=cover, meta_description=meta_description, content_p=content_p, len_p=len_p, len_char=len_char) pg_add_wrapper(new_init_webpages)
def init_task_no_service(url, url_hash, domain, request_id): new_init_task_no_service = TaskNoService(url=url, url_hash=url_hash, domain=domain, request_id=request_id) pg_add_wrapper(new_init_task_no_service)
def init_partner_domain_rules(partner_id, domain, rules): new_init_rules = DomainInfo(partner_id=partner_id, domain=domain, rules=rules) pg_add_wrapper(new_init_rules)
def create_webpages_no_service_without_data(url, url_hash, domain): new_init_webpages = WebpagesNoService(url=url, url_hash=url_hash, domain=domain) pg_add_wrapper(new_init_webpages)
def create_webpages_ai_without_data(url, url_hash, domain): new_init_webpages = WebpagesPartnerAi(url=url, url_hash=url_hash, domain=domain) pg_add_wrapper(new_init_webpages)