def process(): logger.info("itjuzi_news_parser begin...") items = parser_db_util.find_process(SOURCE, TYPE) for item in items: logger.info(item["key_int"]) logger.info(item["url"]) flag = parser(item) if flag: parser_db_util.update_processed(item["_id"]) #break logger.info("itjuzi_news_parser end.")
def process(): logger.info("itjuzi_investorfirm_parser begin...") items = parser_db_util.find_process(SOURCE, TYPE) for item in items: logger.info(item["key"]) logger.info(item["url"]) r = parser(item) if r is None: continue parser_db_util.save_investfirm(r, SOURCE, download_crawler) parser_db_util.update_processed(item["_id"]) logger.info("itjuzi_investorfirm_parser end.")
def process(): logger.info("Demo8_next_parser begin...") items = parser_db_util.find_process(SOURCE, TYPE) for item in items: logger.info(item["url"]) r = parse_base(item) if r is None: continue #logger.info(r) source_company_id = parser_db_util.save_company(r, SOURCE) logger.info("source_company_id=%s", source_company_id) parser_db_util.save_company_score(source_company_id, r["score"]) parser_db_util.save_artifacts(source_company_id, r["artifacts"]) parser_db_util.update_processed(item["_id"]) #break logger.info("Demo8_next_parser end.")
def process(): logger.info("itjuzi_next_parser begin...") items = parser_db_util.find_process(SOURCE, TYPE) for item in items: logger.info(item["url"]) r = parse_base(item) if r is None: continue #logger.info(r) source_company_id = parser_db_util.save_company( r, SOURCE, download_crawler) logger.info("source_company_id=%s", source_company_id) parser_db_util.save_company_score(source_company_id, r["score"]) artifacts = [] for artifact in r["artifacts"]: link = artifact["link"] type, app_market, app_id = url_helper.get_market(link) if type is None: continue if type == 4040 or type == 4050: if app_id is None: continue artifact["type"] = type artifact["domain"] = app_id artifacts.append(artifact) parser_db_util.save_artifacts(source_company_id, artifacts) parser_db_util.update_processed(item["_id"]) #break logger.info("itjuzi_next_parser end.")
def process(): logger.info("36kr_next_parser begin...") items = parser_db_util.find_process(SOURCE, TYPE) for item in items: logger.info(item["url"]) r = parse_base(item) if r is None: continue #logger.info(r) try: source_company_id = parser_db_util.save_company(r, SOURCE, download_crawler) logger.info("source_company_id=%s", source_company_id) parser_db_util.save_company_score(source_company_id, r["score"]) parser_db_util.save_artifacts(source_company_id, r["artifacts"]) parser_db_util.update_processed(item["_id"]) #break except Exception,ex: logger.info(ex) continue
def process(): logger.info("itjuzi_funding_parser2 begin...") items = parser_db_util.find_process(SOURCE, TYPE) # items = [parser_db_util.find_process_one(SOURCE, TYPE, 9551657)] for item in items: logger.info(item["url"]) f = parse(item) if f is None: continue if f == -1: parser_db_util.update_processed(item["_id"]) continue flag, source_funding_id = parser_db_util.save_funding(f, 13030) if flag: # pass parser_db_util.update_processed(item["_id"]) # break logger.info("itjuzi_funding_parser2 end.") logger.info(nokeys)
def process(): logger.info("fellowPlus_investor_parser begin...") items = parser_db_util.find_process(SOURCE, TYPE) # items = [parser_db_util.find_process_one_key(SOURCE, TYPE, "126_9")] for item in items: key = item["key"] info = parser(item) # break # collection_content = { # "date":datetime.datetime.now(), # "source":SOURCE, # "type":TYPE, # "url":item['url'], # "key":key, # "info":investor_info # } info["createTime"] = datetime.datetime.now() info["source"] = SOURCE info["type"] = TYPE info["url"] = item["url"] info["key"] = item["key"] parser_item = collection_investor.find_one({ "source": SOURCE, "type": TYPE, "key": key }) if parser_item is not None: collection_investor.delete_one({ "source": SOURCE, "type": TYPE, "key": key }) collection_investor.insert_one(info) # user_focus_field = investor_info['user_focus_field'] # fields = user_focus_field.split('、') # for field in fields: # tag_item =collection_field.find_one({'name': field}) # if tag_item is None: # field_content = {'name': field, 'count': 1} # collection_field.insert_one(field_content) # else: # tag_item['count'] = tag_item['count']+1 # _id = tag_item["_id"] # collection_field.update_one({'_id':_id}, {"$set": tag_item}) # org_name = info['org_name'] # org_item = collection_org.find_one({'org_name': org_name}) # users = [] # user = {'name': info['name'], 'position': info['org_position']} # if org_item is None: # users.append(user) # org_content = {'org_name': org_name, 'users': users} # collection_org.insert_one(org_content) # else: # users = org_item['users'] # e_flag = False # for u in users: # if u["name"] == user["name"]: e_flag = True; break # if e_flag is False: # users.append(user) # _id = org_item["_id"] # collection_org.update_one({'_id':_id}, {"$set": org_item}) # parser_db_util.update_processed(item["_id"]) #break logger.info("fellowPlus_investor_parser end.")