def run(): ids = getCompany() # ids = [143748] remove_wechat(ids) transfer(ids) for company_id in ids: company_aggregator_baseinfo.patch_website(company_id) now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") file_object = open('remove_wechat_%s.txt' % now, 'w') for i in ids: file_object.write(str(i) + '\n') file_object.close()
def aggregator(source_company, test=False): sc = source_company source_company_id = sc["id"] #find company_id if not test: company_id = sc["companyId"] if company_id is not None: logger.info("sourceCompanyId=%s has been merged before.", source_company_id) else: if sc["aggregateGrade"] == 1: company_id = find_company.find_company_grade2(sc) elif sc["source"] in [13120]: company_id = find_company.find_reference(sc) elif sc["source"] in [13400, 13401, 13402]: company_id = find_company.find_company_grade2(sc) else: company_id = find_company.find_company_new(sc) else: #test # company_id = find_company.find_company(sc, test) return logger.info("matched company_id=%s", company_id) #merge company base info if company_id is None: if source_company['source'] == 13120: logger.info("reference") else: logger.info("sourceCompanyId=%s is a new company", source_company_id) company_id = company_aggregator_baseinfo.create_company(sc, test) logger.info("new company_id %s", company_id) if company_id is None: if not test: set_sourcecompany_processstatus(sc["id"]) return else: if source_company["source"] not in [13099, 13050]: send_message_task(company_id, "company_newcover", source_company["source"]) elif source_company["source"] in [13099]: send_message_task(company_id, "company_create", source_company["source"]) else: if source_company['source'] == 13120: aggregator_db_util.update_source_company_found( company_id, source_company_id) set_sourcecompany_processstatus(sc["id"]) return logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id) if not test: aggregator_db_util.update_source_company_found( company_id, source_company_id) company_aggregator_baseinfo.aggregate( company_id, source_company_id, ) conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) if company["corporateId"] is not None: corporate = conn.get("select * from corporate where id=%s", company["corporateId"]) else: corporate = None conn.close() if company["modifyUser"] is not None and company[ "active"] is not None and company["active"] != 'P': logger.info("company %s modified", company["id"]) company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test) company_aggregator_member.aggregate_member(company_id, source_company_id, test) # set_sourcecompany_processstatus(sc["id"]) # return else: #merge others company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test) company_aggregator_member.aggregate_member(company_id, source_company_id, test) # company_aggregator_funding.aggregate_funding(company_id, sc, test) if not test: company_aggregator_footprint.aggregate_footprint( company_id, source_company_id) #company_aggregator_job.aggregate_job(company_id, source_company_id) #news ITjuzi news parser直接聚合, toutian单独聚合 company_aggregator_baseinfo.add_company_alias(company_id, source_company_id, test) if not test: # conn = db.connect_torndb() # company = conn.get("select * from company where id=%s", company_id) # conn.close() if company["verify"] == "Y" or (company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'): pass else: company_aggregator_baseinfo.patch_company_establish_date( company_id) company_aggregator_baseinfo.patch_company_location(company_id) company_aggregator_baseinfo.patch_company_fullname(company_id) company_aggregator_baseinfo.patch_company_status(company_id) company_aggregator_baseinfo.patch_website(company_id) company_aggregator_baseinfo.patch_logo(company_id) company_aggregator_baseinfo.patch_should_index(company_id) if corporate is not None and corporate["modifyUser"] is not None and \ corporate["active"] is not None and corporate["active"] != 'P': logger.info("corporate %s modified", corporate["id"]) if source_company["source"] in [13400, 13401, 13402, 13030, 13022]: # add corporate_alias corporate_aggregator.add_corporate_alias( source_company_id, company_id, corporate["id"]) else: # insert company_corporate or update company_corporate corporate_id = corporate_aggregator.update_corporate(company_id) # add corporate_alias corporate_aggregator.add_corporate_alias(source_company_id, company_id, corporate_id) # add funding corporateId corporate_aggregator.add_funding_corporateId( company_id, corporate_id) # set corporateId corporate_aggregator.set_corporateId(company_id, corporate_id) company_aggregator_baseinfo.patch_corporate_fullname(corporate_id) #double add company fullName if company["verify"] == "Y" or (company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'): pass else: company_aggregator_baseinfo.patch_company_fullname(company_id) # company_aggregator_baseinfo.patch_corporate_fullname(corporate_id) set_sourcecompany_processstatus(sc["id"]) if sc["source"] in [13050]: mongo = db.connect_mongo() collection_company = mongo.job.company collection_company.update_one( { "source": sc["source"], "sourceId": { "$in": [str(sc["sourceId"]), int(sc["sourceId"])] } }, {"$set": { "mapChecked": None }}) mongo.close()
def aggregator(source_company, test=False): sc = source_company source_company_id = sc["id"] #find company_id if not test: company_id = sc["companyId"] if company_id is not None: logger.info("sourceCompanyId=%s has been merged before.", source_company_id) else: if sc["aggregateGrade"] == 1: company_id = find_company.find_company_grade1(sc) else: company_id = find_company.find_company(sc) else: #test company_id = find_company.find_company(sc, test) logger.info("matched company_id=%s", company_id) #merge company base info if company_id is None: logger.info("sourceCompanyId=%s is a new company", source_company_id) company_id = company_aggregator_baseinfo.create_company(sc, test) logger.info("new company_id %s", company_id) if company_id is None: if not test: set_sourcecompany_processstatus(sc["id"]) return else: logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id) if not test: aggregator_db_util.update_source_company_found(company_id, source_company_id) company_aggregator_baseinfo.aggregate(company_id, source_company_id,) conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) if company["corporateId"] is not None: corporate = conn.get("select * from corporate where id=%s", company["corporateId"]) else: corporate = None conn.close() # merge others company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test) company_aggregator_member.aggregate_member(company_id, source_company_id, test) if company["modifyUser"] is not None: logger.info("company %s modified", company["id"]) set_sourcecompany_processstatus(sc["id"]) return # company_aggregator_funding.aggregate_funding(company_id, sc, test) if not test: company_aggregator_footprint.aggregate_footprint(company_id, source_company_id) #company_aggregator_job.aggregate_job(company_id, source_company_id) #news ITjuzi news parser直接聚合, toutian单独聚合 company_aggregator_baseinfo.add_company_alias(company_id, source_company_id, test) if not test: # conn = db.connect_torndb() # company = conn.get("select * from company where id=%s", company_id) # conn.close() if company["verify"] == "Y": pass else: company_aggregator_baseinfo.patch_company_establish_date(company_id) company_aggregator_baseinfo.patch_company_location(company_id) company_aggregator_baseinfo.patch_company_fullname(company_id) company_aggregator_baseinfo.patch_company_status(company_id) company_aggregator_baseinfo.patch_website(company_id) company_aggregator_baseinfo.patch_logo(company_id) company_aggregator_baseinfo.patch_should_index(company_id) if corporate is not None and corporate["modifyUser"] is not None: logger.info("corporate %s modified", corporate["id"]) else: # insert company_corporate or update company_corporate corporate_id = corporate_aggregator.update_corporate(company_id) # add corporate_alias corporate_aggregator.add_corporate_alias(company_id, corporate_id) # add funding corporateId corporate_aggregator.add_funding_corporateId(company_id, corporate_id) # set corporateId corporate_aggregator.set_corporateId(company_id,corporate_id) set_sourcecompany_processstatus(sc["id"]) send_message(company_id,"create")
def aggregator(source_company, test=False, idmax=0): sc = source_company source_company_id = sc["id"] company_ids = find_company.find_company_grade2(sc, idmax) if len(company_ids) == 0: company_id = None else: company_id = None for cid in company_ids: if int(cid)> idmax: company_id = cid break logger.info("matched company_id=%s", company_id) #merge company base info if company_id is None: logger.info("sourceCompanyId=%s is a new company", source_company_id) company_id = company_aggregator_baseinfo.create_company_dev(sc, test) logger.info("new company_id %s", company_id) if company_id is None: exit() else: if source_company["source"] not in [13020,13030]: # send_message_task(company_id,"company_newcover",source_company["source"]) pass else: logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id) if not test: # aggregator_db_util.update_source_company_found(company_id, source_company_id) company_aggregator_baseinfo.aggregate(company_id, source_company_id,) conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) if company["corporateId"] is not None: corporate = conn.get("select * from corporate where id=%s", company["corporateId"]) else: corporate = None conn.close() if company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P': logger.info("company %s modified", company["id"]) # set_sourcecompany_processstatus(sc["id"]) # return else: #merge others company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test) company_aggregator_member.aggregate_member(company_id, source_company_id, test) # company_aggregator_funding.aggregate_funding(company_id, sc, test) if not test: company_aggregator_footprint.aggregate_footprint(company_id, source_company_id) #company_aggregator_job.aggregate_job(company_id, source_company_id) #news ITjuzi news parser直接聚合, toutian单独聚合 company_aggregator_baseinfo.add_company_alias(company_id, source_company_id, test) if not test: # conn = db.connect_torndb() # company = conn.get("select * from company where id=%s", company_id) # conn.close() if company["verify"] == "Y" or ( company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'): pass else: company_aggregator_baseinfo.patch_company_establish_date(company_id) company_aggregator_baseinfo.patch_company_location(company_id) company_aggregator_baseinfo.patch_company_fullname(company_id) company_aggregator_baseinfo.patch_company_status(company_id) company_aggregator_baseinfo.patch_website(company_id) company_aggregator_baseinfo.patch_logo(company_id) company_aggregator_baseinfo.patch_should_index(company_id) if corporate is not None and corporate["modifyUser"] is not None and \ corporate["active"] is not None and corporate["active"] != 'P': logger.info("corporate %s modified", corporate["id"]) else: # insert company_corporate or update company_corporate corporate_id = corporate_aggregator.update_corporate(company_id) # add corporate_alias corporate_aggregator.add_corporate_alias(source_company_id, company_id, corporate_id) # add funding corporateId # corporate_aggregator.add_funding_corporateId(company_id, corporate_id) # set corporateId corporate_aggregator.set_corporateId(company_id,corporate_id) company_aggregator_baseinfo.patch_corporate_fullname(corporate_id) #double add company fullName if company["verify"] == "Y" or ( company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'): pass else: company_aggregator_baseinfo.patch_company_fullname(company_id) # company_aggregator_baseinfo.patch_corporate_fullname(corporate_id) set_sourcecompany_processstatus(sc["id"]) # send_message(company_id,"create") return company_id
def aggregator(source_company, test=False): sc = source_company source_company_id = sc["id"] #find company_id if not test: company_id = sc["companyId"] if company_id is not None: logger.info("sourceCompanyId=%s has been merged before.", source_company_id) else: if sc["aggregateGrade"] == 1: company_id = find_company.find_company_grade2(sc) elif sc["source"] in [13120]: company_id = find_company.find_reference(sc) elif sc["source"] in [13400,13401,13402]: company_id = find_company.find_company_grade2(sc) else: company_id = find_company.find_company_new(sc) else: #test # company_id = find_company.find_company(sc, test) return logger.info("matched company_id=%s", company_id) #merge company base info if company_id is None: #qimingpian if source_company['source'] == 13120: logger.info("reference") else: logger.info("sourceCompanyId=%s is a new company", source_company_id) #create new company and new corporate here with new structure # company_id = company_aggregator_baseinfo.create_company(sc, test) company_id = company_aggregator_baseinfo.create_company_new(sc, test) logger.info("new company_id %s", company_id) if company_id is None: if not test: set_sourcecompany_processstatus(sc["id"]) #source=13120 return return else: # if source_company["source"] not in [13099, 13050, 13055, 13121, 13130]: send_message_task(company_id,"company_newcover",source_company["source"]) elif source_company["source"] in [13130]: send_message_task(company_id, "company_create", source_company["source"]) elif source_company["source"] in [13099]: send_message_task(company_id, "gongshang_create_online", source_company["source"]) else: # 13130 crunchbase 13120 qimingpianI(only have short name) if source_company['source'] in [13120, 13130]: aggregator_db_util.update_source_company_found(company_id, source_company_id) set_sourcecompany_processstatus(sc["id"]) # re check for operationTeam if source_company["source"] == 13130: # victor do not send task for 'P' company conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) if company["active"] in ["P"]: conn.update("update company set active='A' where id=%s",company_id) conn.close() send_message_task(company_id, "company_funding", source_company["source"]) return logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id) if not test: aggregator_db_util.update_source_company_found(company_id, source_company_id) company_aggregator_baseinfo.aggregate(company_id, source_company_id,) conn = db.connect_torndb() company = conn.get("select * from company where id=%s", company_id) ##check corporate if company is None: logger.info("company: %s not existed", company_id) exit() corporate = None if company["corporateId"] is not None: corporate = conn.get("select * from corporate where id=%s", company["corporateId"]) if corporate is None: logger.info("company:%s|%s has no corporate,please check", company["name"], company["id"]) exit() else: logger.info("company:%s|%s has no corporate,please check", company["name"], company["id"]) exit() conn.close() if company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P': logger.info("company %s modified", company["id"]) company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test) company_aggregator_member.aggregate_member(company_id, source_company_id, test) else: #merge others company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test) company_aggregator_member.aggregate_member(company_id, source_company_id, test) if not test: company_aggregator_footprint.aggregate_footprint(company_id, source_company_id) #new add_company_alias without type company_aggregator_baseinfo.add_company_alias_new(company_id, source_company_id, test) if not test: # conn = db.connect_torndb() # company = conn.get("select * from company where id=%s", company_id) # conn.close() if company["verify"] == "Y" or ( company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'): pass else: # company_aggregator_baseinfo.patch_company_establish_date(company_id) # company_aggregator_baseinfo.patch_company_location(company_id) # company_aggregator_baseinfo.patch_company_fullname(company_id) company_aggregator_baseinfo.patch_company_status(company_id) company_aggregator_baseinfo.patch_website(company_id) company_aggregator_baseinfo.patch_logo(company_id) # company_aggregator_baseinfo.patch_should_index(company_id) if corporate is not None and corporate["modifyUser"] is not None and \ corporate["active"] is not None and corporate["active"] != 'P': logger.info("corporate %s modified", corporate["id"]) if source_company["source"] in [13400, 13401, 13402, 13030, 13022]: # add corporate_alias # new add_corporate_alias without type corporate_aggregator.add_corporate_alias_new(source_company_id, company_id, corporate["id"]) else: # add corporate_alias without type corporate_aggregator.add_corporate_alias_new(source_company_id, company_id, corporate["id"]) # add funding corporateId corporate_aggregator.add_funding_corporateId(company_id, corporate["id"]) # set corporateId # corporate_aggregator.set_corporateId(company_id,corporate_id) company_aggregator_baseinfo.patch_corporate_fullname_new(corporate["id"]) company_aggregator_baseinfo.patch_corporate_establish_date(corporate["id"]) company_aggregator_baseinfo.patch_corporate_location(corporate["id"]) #double add company fullName if company["verify"] == "Y" or ( company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'): pass else: #no need pass # company_aggregator_baseinfo.patch_company_fullname(company_id) set_sourcecompany_processstatus(sc["id"]) if sc["source"] in [13050]: mongo = db.connect_mongo() collection_company = mongo.job.company collection_company.update_one({"source": sc["source"], "sourceId":{"$in":[str(sc["sourceId"]),int(sc["sourceId"])]}}, {"$set": {"mapChecked": None}}) mongo.close() send_message(company_id,"create")