def comps(): global logger_comps query = json.loads(request.data).get('payload') logger_comps.info('Comps Query, %s' % query) cid, tag, start, size = query.get('company'), query.get( 'tag', 0), query.get('start', 0), query.get('size', 5) if tag == 0: comps_candidates = dbutil.get_company_comps(g.db, cid) logger_comps.info(comps_candidates) results = { 'company': { 'count': len(comps_candidates), 'data': map(lambda x: {'id': dbutil.get_company_code(g.db, x)}, comps_candidates)[start:start + size], 'tags': dbutil.prompt_tag_filter(g.db, comps_candidates) } } else: tag = dbutil.get_tag_id(g.db, tag)[0] comps_candidates = dbutil.get_filtered_company_comps(g.db, cid, tag) results = { 'company': { 'count': len(comps_candidates), 'data': map(lambda x: {'id': dbutil.get_company_code(g.db, x)}, comps_candidates)[start:start + size] } } return make_response(jsonify(results))
def check_apprank(): mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() today = datetime.today() todays = list( mongo.trend.appstore_rank.find({ 'date': { '$gt': (today - timedelta(days=1)), '$lte': today }, 'rank': { '$lte': 500 } })) yesterdays = list( mongo.trend.appstore_rank.find({ 'date': { '$gt': (today - timedelta(days=2)), '$lt': (today - timedelta(days=1)) }, 'rank': { '$lte': 500 } })) newin = {} first = set() yesterdays = set(item['trackId'] for item in yesterdays) day_thirday = today - timedelta(days=30) for item in filter(lambda x: x['trackId'] not in yesterdays, todays): mongo.temp.appstore.insert_one({ 'type': 3017, 'createTime': today, 'item': item }) for aid in dbutil.get_artifacts_from_iOS(db, item['trackId']): newin[aid] = item previous = mongo.trend.appstore_rank.find({ 'trackId': item['trackId'], 'genre': item['genre'], 'type': item['type'], 'rank': { '$lt': 500 } }).count() if previous == 1: cid = dbutil.get_artifact_company(db, aid) print aid, dbutil.get_company_code(db, cid) first.add(cid) print len(newin), len(first) print '\n'.join([ 'http://pro.xiniudata.com/validator/#/company/%s/overview' % dbutil.get_company_code(db, cid) for cid in first ])
def create_incremental(index=None): global logger_universal_index if not index: client = UniversalIndexCreator() elif int(index) == 1: host, port = tsbconfig.get_es_config_1() client = UniversalIndexCreator( Elasticsearch([{ 'host': host, 'port': port }])) elif int(index) == 2: host, port = tsbconfig.get_es_config_2() client = UniversalIndexCreator( Elasticsearch([{ 'host': host, 'port': port }])) else: client = UniversalIndexCreator() db = dbcon.connect_torndb() consumer_search = init_kafka() while True: logger_universal_index.info('Incremental create search1 index starts') try: for message in consumer_search: try: logger_universal_index.info( "%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) action = json.loads(message.value).get('action', 'create') cid = json.loads(message.value).get('id') or json.loads( message.value).get('_id') if action == 'create': client.create_single(db, cid) logger_universal_index.info( 'incremental %s index created' % cid) elif action == 'delete': if json.loads(message.value).get('aliasId', False): client.create_single(db, cid) logger_universal_index.info( 'incremental %s alias deleted' % cid) elif json.loads(message.value).get( 'artifactId', False): client.create_single(db, cid) logger_universal_index.info( 'incremental %s artifact deleted' % cid) else: client.delete_index( 'universal', dbutil.get_company_code(db, cid)) logger_universal_index.info( 'incremental %s index deleted' % cid) consumer_search.commit() except Exception, e: logger_universal_index.exception( 'Incr exception# %s \n # %s' % (message, e)) except Exception, e: logger_universal_index.exception('Incr outside exception # %s' % e)
def clear_items(self): global logger_sourcing file_path = os.path.join( os.path.split(os.path.realpath(__file__))[0], u'dumps/saoanzi.csv') data = [] for anzi in dbutil.get_daily_saoanzi_sources(self.db, self.today): cactive = dbutil.get_company_active(self.db, anzi.companyId) need_verify = self.tcg.need_verify(anzi.companyId) if need_verify or (cactive != 'Y'): self.tcg.generate_tc( json.dumps({ 'id': anzi.companyId, 'source': 'track_saoanzi' })) dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId, 'P') elif not self.__valid_message(anzi): dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId, 'N') else: dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId, 'Y') url = "http://pro.xiniudata.com/validator/#/company/%s/overview" \ % dbutil.get_company_code(self.db, anzi.companyId) # sources = ';'.join([s.name for s in dbutil.get_saoanzi_item_sources(self.db, anzi.id)]) source = anzi.source need_verify = u'需要检查' if (need_verify or (cactive != 'Y')) else u'不需要检查' data.append([ dbutil.get_company_name(self.db, anzi.companyId), url, need_verify, anzi.createTime, source ]) if not data: return # send email data = pandas.DataFrame(data) data.to_csv(file_path, encoding='utf_8_sig') # stat_verify = {title: len(set(detail[0])) for title, detail in data.groupby(3)} stat_verify = '<br/>'.join([ '%s\t%s' % (title, len(set(detail[0]))) for title, detail in data.groupby(2) ]) # stat_source = {title: len(detail) for title, detail in data.groupby(5)} stat_source = '<br/>'.join([ '%s\t%s' % (title, len(detail)) for title, detail in data.groupby(4) ]) stat = u'去重公司数<br/>%s<br/>每个源下的公司数<br/>%s\n' % (stat_verify, stat_source) receivers = ['victor', 'erin', 'weiguangxiao', 'gewei'] receivers = ';'.join(['*****@*****.**' % r for r in receivers]) title = u'扫案子项目列表 %s' % self.current_check_time.strftime('%Y-%m-%d %H') content = u'%s检查,今天共有%s个扫案子条目<br/>%s' % \ (self.current_check_time.strftime('%Y-%m-%d %H:%M'), len(data), stat) send_mail_file(u'烯牛扫案子后台', u'烯牛扫案子后台', "*****@*****.**", receivers, title, content, file_path)
def test(): test_do = Download_Optimization() test_output = "test_nice_download_output" with open(test_output, 'w') as f: # 40 游戏 for cid, score in test_do.get_nice_download_cids(tids=[40]): f.write(dbutil.get_company_code(test_do.db, cid)) f.write(' ') f.write(str(round(score, 4))) f.write('\n')
def test(): test_do = Download_Optimization() with open('tmp/test_nice_download_output', 'w') as f: # 40 游戏 for cid, score in test_do.get_nice_download_cids( sectors=[22, 40, 107]): f.write(dbutil.get_company_code(test_do.db, cid)) f.write(' ') f.write(str(round(score, 4))) f.write('\n')
def __compare(c, fo, db, e): old = db.query('select tag.name name from tag, company_tag_rel rel where tagId=tag.id and tag.type=11012 ' 'and companyId=%s and (rel.active is null or rel.active="Y")', c.id) old = ','.join([v.name for v in old]) e.extract(c.id) new = db.query('select tag.name name from tag, company_tag_rel rel where tagId=tag.id and tag.type=11012 ' 'and companyId=%s and (rel.active is null or rel.active="Y")', c.id) new = ','.join([v.name for v in new]) fo.write('%s\t%s\t%s\t%s\t%s\n' % (dbutil.get_company_code(db, c.id), dbutil.get_company_name(db, c.id), dbutil.get_company_brief(db, c.id), old, new))
def dump(): global mapping mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() ke = KeywordExtractor() raw = mongo.raw.qmp.find( { "url": "http://vip.api.qimingpian.com/d/c3", "processed": True }, { 'postdata': 1, 'data.basic': 1 }) results = {} fo = codecs.open('dumps/20180726', 'w', 'utf-8') for qmp in raw: basic = qmp.get('data', {}).get('basic') tags = [] tags.append(basic.get('hangye1', '')) tags.append(basic.get('hangye2', '')) tags.extend(basic.get('tags_match', '').split('|')) tags = [tag for tag in tags if tag.strip()] sc = db.get( 'select companyId from source_company where source=13121 and sourceId=%s;', qmp['postdata']['id']) tag_qmp = set(tags) & set(mapping.keys()) if not tag_qmp: continue if not (sc and sc.companyId): continue orignal = copy(tag_qmp) tag_qmp = [mapping.get(tag) for tag in tag_qmp] tag_xiniu = [ dbutil.get_tag_name(db, tid) for tid in ke.extract_vip(sc.companyId).keys() ] url = 'http://www.xiniudata.com/company/%s/overview' % dbutil.get_company_code( db, sc.companyId) desc = db.get('select brief from company where id=%s;', sc.companyId).brief desc = desc.replace('\n', '') if desc else '' if set(tag_qmp) & set(tag_xiniu): # results[1] = results.get(1, 0) + 1 fo.write('%s\t%s\t1\t%s\t%s\n' % (','.join(orignal), ','.join(tag_xiniu), url, desc)) else: fo.write('%s\t%s\t0\t%s\t%s\n' % (','.join(orignal), ','.join(tag_xiniu), url, desc)) # results[0] = results.get(0, 0) + 1 for k, v in results.items(): print k, v
def dump(colid): db = dbcon.connect_torndb() cids = [ item.companyId for item in db.query( 'select companyId from collection_company_rel where collectionId=%s ' 'and (active is null or active="Y");', colid) ] with codecs.open('files/%s' % colid, 'w', 'utf-8') as fo: for cid in cids: name = dbutil.get_company_name(db, cid) url = 'http://www.xiniudata.com/#/company/%s/overview' % dbutil.get_company_code( db, cid) fo.write('%s, %s\n' % (name, url)) db.close()
def __update_company_news(db, mongo, cids, tpid, content=u'发现一家公司', fund_extract=-5, detail_id=None, comments=None): for cid in cids: existed = dbutil.exist_topic_company(db, tpid, cid) tpc = dbutil.update_topic_company(db, tpid, cid, 'P') if tpc and not existed: nid = mongo.article.news.insert({ 'date': datetime.utcnow(), 'createTime': datetime.utcnow(), 'modifyTime': datetime.utcnow(), 'title': dbutil.get_company_name(db, cid), 'contents': [{ 'content': content, 'rank': 1 }], 'type': 61000, 'createUser': 139, 'fund_extract': fund_extract, 'processStatus': 2, 'companyIds': [int(cid)], 'companyCodes': [dbutil.get_company_code(db, cid)], 'topic_id': tpid }) send_msg(tpc, 'topic_company') tpm = dbutil.update_topic_message(db, tpid, dbutil.get_company_name(db, cid), 'P', 10, str(nid), detail_id, comments) dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpm, 'topic_message')
def score(): db = dbcon.connect_torndb() with codecs.open('dumps/rank', 'w', 'utf-8') as fo: for tag in [u'大数据', u'小程序', u'短视频', u'民宿', u'足球', u'咖啡']: cids = [] tid = dbutil.get_tag_id(db, tag)[0] complete = db.query( 'select rel.companyId cid from company_tag_rel rel, company_scores s ' 'where (rel.active="Y" or rel.active is null) and rel.companyId=s.companyId ' 'and s.type=37010 and tagId=%s order by score desc limit 100;', tid) cids.extend([c.cid for c in complete]) yellows = db.query( 'select companyId cid, count(*) c from company_tag_rel rel, tag ' 'where tag.id=tagId and tag.type=11100 and (tag.active is null or tag.active="Y") ' 'and (rel.active="Y" or rel.active is null) and companyId in ' '(select distinct companyId from company_tag_rel where tagId=%s ' 'and (active is null or active="Y")) group by companyId order by c desc limit 100;', tid) cids.extend([c.cid for c in yellows]) msgs = db.query( 'select msg.companyId cid, count(*) c from company_message msg, company_tag_rel rel ' 'where msg.active="Y" and msg.companyId=rel.companyId and msg.publishTime>"2018-02-01" ' 'and rel.tagId=%s and (rel.active="Y" or rel.active is null) group by msg.companyId ' 'order by c desc limit 100;', tid) cids.extend([c.cid for c in msgs]) cids = set(cids) for cid in cids: name = dbutil.get_company_name(db, cid) brief = dbutil.get_company_brief(db, cid) url = 'http://www.xiniudata.com/#/company/%s/overview' % dbutil.get_company_code( db, cid) s1 = dbutil.get_company_score(db, cid, 37010) s1 = 1 if s1 >= 0.5 else s1 s2 = (len(dbutil.get_company_tags_yellow(db, cid, False)) + 1 - dbutil.get_company_yellow_time_deduction(db, cid)) / 9 s3 = (log10( len(dbutil.get_company_messages(db, cid, 'Y', '2018-02-01')) + 1)) / 4 s4 = db.get( 'select confidence from company_tag_rel where companyId=%s and tagId=%s;', cid, tid).confidence fo.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (tag, name, brief, url, s1, round(s2, 2), round( s3, 2), round(s4, 2)))
def create_incremental(index): global logger_index, consumer_search, producer_search if int(index) == 1: host, port = tsbconfig.get_es_config_1() elif int(index) == 2: host, port = tsbconfig.get_es_config_2() # client = IndexCreator(Elasticsearch([{'host': host, 'port': port}])) else: host, port = tsbconfig.get_es_config() # client = IndexCreator(Elasticsearch([{'host': host, 'port': port}])) logger_index.error('Not legal elasticsearch config %s, using default' % index) client = IndexCreator(Elasticsearch([{'host': host, 'port': port}])) i_client = InteriorIndexCreator( Elasticsearch([{ 'host': host, 'port': port }])) db = dbcon.connect_torndb() init_kafka(index) while True: logger_index.info('Incremental create search%s index starts' % index) try: for message in consumer_search: try: logger_index.info( "%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) action = json.loads(message.value).get('action', 'create') # sb create a new tag if action == 'keyword': client.create_indice_completion_keywords(db, update=True) # consumer_search.commit() logger_index.info('Update keyword') continue cid = json.loads(message.value).get('id') or json.loads( message.value).get('_id') if action == 'create': client.create_single(db, cid) i_client.create_index(db, cid) logger_index.info('incremental %s index created' % cid) elif action == 'delete': if json.loads(message.value).get('aliasId', False): client.create_single(db, cid) i_client.create_index(db, cid) logger_index.info('incremental %s alias deleted' % cid) elif json.loads(message.value).get( 'artifactId', False): client.create_single(db, cid) i_client.create_index(db, cid) logger_index.info( 'incremental %s artifact deleted' % cid) else: client.delete_index( 'company', dbutil.get_company_code(db, cid)) client.delete_index('completion', cid) i_client.create_index(db, cid) logger_index.info('incremental %s index deleted' % cid) consumer_search.commit() except Exception, e: logger_index.exception('Incr exception# %s \n # %s' % (message, e)) except Exception, e: logger_index.exception('Incr outside exception # %s' % e)
def create_single(self, db, cid): """ create a single index for a particular company, completion id consists of its type and original id, including cxxxx, fxxx, axxxx, pxxxx, nxxxx, standing for company, full, artifact, product, nick kxxxx, keyword """ # check whether to index this cid if not dbutil.get_company_index_type(db, cid): self.logger.info('should not index %s' % cid) return company = {} alias = set() company_score = dbutil.get_company_score(db, cid, 37020) company['ranking_score'] = company_score name = dbutil.get_company_name(db, cid).lower().replace(' ', '') code = dbutil.get_company_code(db, cid) company['cid'] = code completion = { 'id': cid, '_name': name, '_code': code, '_prompt': 'name', } # First, Names # short name alias.add(name.lower()) alias.add(''.join(lazy_pinyin(name.lower()))) # full name full = dbutil.get_company_corporate_name(db, cid, False) if full and full.strip(): alias.add(full.lower()) # TODO temp solution alias.add(full.lower().replace(u'北京', '').replace(u'上海', '').replace(u'深圳', '')) # artifact name aresults = dbutil.get_artifact_idname_from_cid(db, cid, True) if aresults: alias.update([ self.valid_name(aname) for _, aname in aresults if self.valid_name(aname) ]) # alias aliass = dbutil.get_alias_idname(db, cid) if aliass and len(aliass) < 20: alias.update([ self.valid_name(aname) for _, aname in aliass if self.valid_name(aname) ]) # corporate name corporate = dbutil.get_company_corporate_name(db, cid) if corporate and corporate.strip(): alias.add(corporate.lower()) # corporate full name corporate_full = dbutil.get_company_corporate_name(db, cid, False) if corporate_full and corporate_full.strip(): alias.add(corporate_full.lower()) # corporate alias corporate_alias = dbutil.get_corporate_alias(db, cid) if corporate_alias and len(corporate_alias) < 20: alias.update([ self.valid_name(aname) for aname in corporate_alias if self.valid_name(aname) ]) # check if there is a relevant digital coin dt = dbutil.get_company_digital_coin_info(db, cid) if dt: alias.add(dt.symbol.lower()) # short name if dt.name: alias.add(dt.name.lower().replace(' ', '')) # english name if dt.enname: alias.add(dt.enname.lower()) # create indice names completion['completionName'] = list(alias) company['name'] = name.lower() company['alias'] = self.analyze_names(alias) # Second, team identify, investor identify team = self.identifier.identify(cid) if team and len(team) > 0: company['team'] = team if dbutil.exist_company_tag(db, cid, 309129): company['investor'] = 44010 # Third, keywords # regular tag tags_info = dbutil.get_company_tags_idname(db, cid, tag_out_type=(11000, 11001, 11002)) if tags_info: for tid, tname, weight in tags_info: company.setdefault('tags', []).append(tname.lower()) # yellows, --> forget y take this out yellows = dbutil.get_company_tags_yellow(db, cid) if yellows: company['yellows'] = [yellow.lower() for yellow in yellows] # Forth, description desc = dbutil.get_company_solid_description(db, cid) if desc and desc.strip(): desc = filter(lambda x: (x not in self.stopwords) and len(x) > 1, list(self.seg.cut4search(desc))) company['description'] = (' '.join(desc)).lower() # Fifth, round and investors and members company['round'] = dbutil.get_company_round(db, cid) company['investors'] = dbutil.get_company_investor_names(db, cid) company['members'] = [ name for _, name in dbutil.get_member_idname(db, cid) ] # Sixth, location lid, lname = dbutil.get_company_location(db, cid) company['location'] = lid # Seventh, establish date, create date, count of company message establish_date = dbutil.get_company_establish_date(db, cid) try: company['established'] = int(establish_date.strftime('%Y%m')) except Exception, e: pass
def memorize(self, tid, today=None): global logger_tt if not today: today = datetime.today() yesterday = today - timedelta(days=1) today_int = int(today.strftime('%Y%m%d')) tag = dbutil.get_tag_info(self.db, tid, 'name') logger_tt.info('Start to process %s' % tid) # relevant companies cids = dbutil.get_company_from_tags(self.db, [tid]) codes = [dbutil.get_company_code(self.db, cid) for cid in cids] visits = self.mongo.log.user_log.find({ 'time': { '$gt': today - timedelta(hours=32), '$lte': today - timedelta(hours=8) }, 'requestURL': "/xiniudata-api/api2/service/company/basic", 'jsonRequest.payload.code': { '$in': codes } }) # visits = list(visits) # visits = [visit['jsonRequest']['payload']['code'] in codes for visit in visits] self.mongo.keywords.trend_statistc.update( { 'tag': tid, 'date': datetime.fromordinal(today.date().toordinal()), 'subtype': 'company_visit' }, {'$set': { 'type': 'company', 'weight': len(list(visits)) }}, True) subscriptions = dbutil.get_company_subscription_details( self.db, yesterday.strftime('%Y-%m-%d'), today.strftime('%Y-%m-%d'), *cids) self.mongo.keywords.trend_statistc.update( { 'tag': tid, 'date': datetime.fromordinal(today.date().toordinal()), 'subtype': 'company_subscribe' }, {'$set': { 'type': 'company', 'weight': len(subscriptions) }}, True) # logger_tt.info('Company done') # relevant news news = self.search_client.search('general', input=tag, filters={ 'date': today_int }, size=500).get('news', {}) news = list(news.get('data', [])) self.mongo.keywords.trend_statistc.update( { 'tag': tid, 'date': datetime.fromordinal(today.date().toordinal()), 'subtype': 'news_relevant' }, {'$set': { 'type': 'news', 'weight': len(news) }}, True) # logger_tt.info('News searched') news_read = self.mongo.log.user_log.find({ 'time': { '$gt': today - timedelta(hours=32), '$lte': today - timedelta(hours=8) }, 'requestURL': self.news_read_url, 'jsonRequest.payload.newsId': { '$in': news } }) self.mongo.keywords.trend_statistc.update( { 'tag': tid, 'date': datetime.fromordinal(today.date().toordinal()), 'subtype': 'news_read' }, {'$set': { 'type': 'news', 'weight': len(list(news_read)) }}, True) # logger_tt.info('News done') # search search = self.mongo.log.search.find({ 'time': { '$gt': today - timedelta(hours=32), '$lte': today - timedelta(hours=8) }, 'query.input': tag, 'userId': { '$ne': None } }) self.mongo.keywords.trend_statistc.update( { 'tag': tid, 'date': datetime.fromordinal(today.date().toordinal()), 'subtype': 'search_precise' }, {'$set': { 'type': 'search', 'weight': len(list(search)) }}, True)
def test_get_code(): test_do = Download_Optimization() test_output = "test_do" with open(test_output, 'w') as f: f.write(dbutil.get_company_code(test_do.db, 261))
def create_single(self, db, cid): global logger_universal_index # check whether to index this cid if not dbutil.get_company_index_type(db, cid): logger_universal_index.info('should not index %s' % cid) return company = {} alias, artifacts = set(), set() company['ranking_score'] = dbutil.get_company_score(db, cid, 37020) name = dbutil.get_company_name(db, cid).lower().replace(' ', '') code = dbutil.get_company_code(db, cid) company['id'] = code # short name alias.add(name.lower()) alias.add(''.join(lazy_pinyin(name.lower()))) # full name full = dbutil.get_company_corporate_name(db, cid, False) if full and full.strip(): alias.add(full.lower()) alias.add(full.lower().replace(u'北京', '').replace(u'上海', '').replace( u'深圳', '').replace(u'成都', '')) # artifact name aresults = dbutil.get_artifact_idname_from_cid(db, cid, True) if aresults: alias.update([ self.valid_name(aname) for _, aname in aresults if self.valid_name(aname) ]) # alias aliass = dbutil.get_alias_idname(db, cid) if aliass and len(aliass) < 20: alias.update([ self.valid_name(aname) for _, aname in aliass if self.valid_name(aname) ]) # corporate name corporate = dbutil.get_company_corporate_name(db, cid) if corporate and corporate.strip(): alias.add(corporate.lower()) # corporate full name corporate_full = dbutil.get_company_corporate_name(db, cid, False) if corporate_full and corporate_full.strip(): alias.add(corporate_full.lower()) # corporate alias corporate_alias = dbutil.get_corporate_alias(db, cid) if corporate_alias and len(corporate_alias) < 20: alias.update([ self.valid_name(aname) for aname in corporate_alias if self.valid_name(aname) ]) # check if there is a relevant digital coin dt = dbutil.get_company_digital_coin_info(db, cid) if dt: alias.add(dt.symbol.lower()) # short name if dt.name: alias.add(dt.name.lower().replace(' ', '')) # english name if dt.enname: alias.add(dt.enname.lower()) # create indice names company['name'] = name.lower() company['alias'] = self.analyze_names(alias) # tag tags_info = dbutil.get_company_tags_idname(db, cid, tag_out_type=(11000, 11001, 11002)) if tags_info: for tid, tname, weight in tags_info: company.setdefault('tags', []).append(tname.lower()) company.setdefault('features', []).append(tid) company['nested_tag'] = [] for industry in dbutil.get_company_industries(db, cid): company.setdefault('nested_tag', []).append({ 'id': industry.industryId, 'published': industry.publishTime, "category": "industry" }) for topic in dbutil.get_company_topics(db, cid): msg_publish = dbutil.get_topic_message_company_publish(db, topic) company.setdefault('nested_tag', []).append({ 'id': topic.topicId, 'published': msg_publish, "category": "topic" }) topic_tag = self.topic_tags.get(topic.topicId) if topic_tag: company.setdefault('tags', []).append(topic_tag.lower()) sectors = dbutil.get_company_sector_tag(db, cid) company['sector'] = sectors # description desc = dbutil.get_company_solid_description(db, cid) if desc and desc.strip(): desc = filter(lambda x: (x not in self.stopwords) and len(x) > 1, list(self.seg.cut4search(desc))) company['description'] = (' '.join(desc)).lower() # round and investors and members round = dbutil.get_company_round(db, cid) company['round'] = 1000 if round == 0 else round company['sort_round'] = dbutil.get_round_sort(db, company.get('round')) status = dbutil.get_company_status(db, cid) if status in {2020, 2025}: company['status'] = status elif dbutil.get_company_ipo_status(db, cid): company['status'] = -1 else: company['status'] = -2 company['investors'] = dbutil.get_company_investor_names(db, cid) company['investorId'] = dbutil.get_company_investors(db, cid) company['members'] = [ name for _, name in dbutil.get_member_idname(db, cid) ] # location lid, lname = dbutil.get_company_location(db, cid) company['location'] = lid # establish date, create date, count of company message establish_date = dbutil.get_company_establish_date(db, cid) try: company['established'] = int(establish_date.strftime('%Y%m')) except Exception, e: pass