def decompose(company_id, hard=True): conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) scs = list( conn.query( "select * from source_company where (active is null or active='Y') and (source is not null and source != 13002 and (source < 13100 or source >= 13110)) and companyStatus!=2020 and companyId=%s order by source", company_id)) conn.close() if len(scs) < 2: logger.info( "Company : %s has one active source company, no need decompose", company_id) return True fullName = company["fullName"] name = company["name"] description = company["description"] # init crawler beian_links_crawler = beian_links.BeianLinksCrawler() icp_chinaz_crawler = icp_chinaz.IcpchinazCrawler() screenshot_crawler = screenshot_website.phantomjsScreenshot() for sc in scs: company_info_expand.expand_source_company(sc["id"], beian_links_crawler, icp_chinaz_crawler, screenshot_crawler) company_aggregator_dev.aggregator(sc) return True
def decompose(company_id, hard=True): conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) scs = list(conn.query( "select * from source_company where (active is null or active='Y') and (source is not null and source != 13002 and (source < 13100 or source >= 13110)) and companyStatus!=2020 and companyId=%s order by source", company_id)) conn.close() if len(scs) < 2: logger.info("Company : %s has one active source company, no need decompose", company_id) return True fullName = company["fullName"] name = company["name"] description = company["description"] # init crawler beian_links_crawler = beian_links.BeianLinksCrawler() icp_chinaz_crawler = icp_chinaz.IcpchinazCrawler() screenshot_crawler = screenshot_website.phantomjsScreenshot() reserve_sc = None for sc in scs: logger.info("source company: %s, source: %s, sourceId: %s", sc["id"], sc["source"], sc["sourceId"]) if sc["name"].strip() != "" and sc["name"] == name: # logger.info("Reserve source company: %s, %s for company: %s, %s", sc["id"], sc["name"], company["id"], company["name"]) reserve_sc = sc break # update_column(company,sc) # delete_old_data(company_id) # company_info_expand.expand_source_company(sc["id"], beian_links_crawler, icp_chinaz_crawler,screenshot_crawler) # set_processStatus_zero(company_id, sc["id"]) # company_aggregator.aggregator(sc) # return True # #Must find one sc for decompose # #if no source_company can match company # sc_ids = [str(sc["id"]) for sc in scs if sc.has_key("id")] # logger.info("Can not locate source companys (%s) for company: %s", sc_ids, company_id) # return False if reserve_sc is None: reserve_sc = scs[0] logger.info("Reserve source company: %s, %s for company: %s, %s", reserve_sc["id"], reserve_sc["name"], company["id"], company["name"]) update_column(company,reserve_sc) delete_old_data(company_id) company_info_expand.expand_source_company(reserve_sc["id"], beian_links_crawler, icp_chinaz_crawler,screenshot_crawler) set_processStatus_zero(company_id, reserve_sc["id"], hard) for sc in scs: set_funding_processStatus(sc["id"]) company_aggregator.aggregator(reserve_sc) return True
def expand(sourceCompanyId): company_info_expand.expand_source_company(sourceCompanyId, beian_links_crawler, icp_chinaz_crawler, screenshot_crawler, test=True)
company_id = company["id"] conn = db.connect_torndb() scs = list( conn.query( "select * from source_company where (active is null or active='Y') and (source is not null and source != 13002 and (source < 13100 or source >= 13110)) and companyStatus!=2020 and companyId=%s order by id", company_id)) if len(scs) > 1: conn = db.connect_torndb() #delete from test tables; clean_test_tables(test) #re-do aggregator for each source_company: ids = [] for sc in scs: company_info_expand.expand_source_company(sc["id"], beian_links_crawler, icp_chinaz_crawler, screenshot_crawler, test=True) ids.append(str(sc["id"])) logger.info("Company: %s has %s source companies: %s", company_id, len(scs), ",".join(ids)) round_max = len(scs) round = 0 sc0 = scs.pop(0) logger.info("Insert New company with source company: %s", sc0["id"]) company_aggregator.aggregator(sc0, test=True) while True: