Пример #1
0
    def expand(self):

        companies = set([
            c.companyId
            for c in dbutil.get_topic_companies(self.db, self.tpid)
        ])
        if len(companies) < 5:
            return
        candidates = [
            comps.get('candidates', [])
            for comps in self.mongo.comps.candidates.find(
                {'company': {
                    '$in': list(companies)
                }})
        ]
        candidates = map(lambda x: x[0], chain(*candidates))
        candidates = Counter(
            [cid for cid in candidates if cid not in companies])
        comps = Counter([
            cid for cid in chain(
                *[dbutil.get_company_comps(self.db, cid) for cid in companies])
            if cid not in companies
        ])
        for comp in comps.most_common(min(len(companies) / 2, 50)):
            if comp[1] > len(companies) / 5:
                dbutil.update_topic_company(self.db,
                                            self.tpid,
                                            comp[0],
                                            confidence=0.51)
        for candidate in candidates.most_common(min(len(companies) / 5, 30)):
            if candidate[1] > len(companies) * 0.6:
                dbutil.update_topic_company(self.db,
                                            self.tpid,
                                            candidate[0],
                                            confidence=0.51)
Пример #2
0
def fa_relevant_track():

    # topic52
    global logger_track
    db = dbcon.connect_torndb()
    day7 = datetime.now() - timedelta(days=7)
    for cid in dbutil.get_all_fund_raising(db):
        logger_track.info('Processing %s' % cid)
        brief = dbutil.get_company_brief(db, cid)
        msg = u'%s, %s开启了新一轮融资' % (brief, dbutil.get_company_name(db, cid))
        active = 'Y' if (dbutil.get_company_verify(db, cid) == 'Y'
                         and brief) else 'P'
        tpm = dbutil.update_topic_message_withoutdup(
            db,
            52,
            msg,
            active,
            80,
            dbutil.get_company_latest_fa(db, cid, day7),
            detail_id=cid)
        tpc = dbutil.update_topic_company(db, 52, cid, active)
        if tpm:
            dbutil.update_topic_message_company(db, tpm, tpc)
            send_msg(tpm, 'topic_message')
            send_msg(tpc, 'topic_company')
            logger_track.info('Updated %s' % cid)
    dbutil.update_last_message_time(db, 52)
Пример #3
0
def __topic_29(db, funding):
    """
    BAT又在这些领域出手了
    187, 217, 117
    """

    investors = set(dbutil.get_funding_investor_ids(
        db, funding.id)) & {187, 217, 117}
    if investors:
        active = 'Y' if dbutil.get_topic_auto_pubilsh_status(
            db, 29) == 'Y' else 'P'
        cid = funding.companyId
        investors = ','.join(
            [dbutil.get_investor_name(db, iid) for iid in investors])
        msg = u'%s投资了%s,%s' % \
              (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid))
        tpm = dbutil.update_topic_message(db, 29, msg, active, 70, funding.id)
        # if active == 'Y':
        if tpm:
            send_msg(tpm, 'topic_message')
            tpc = dbutil.update_topic_company(db, 29, cid, active)
            if tpc:
                dbutil.update_topic_message_company(db, tpm, tpc)
            send_msg(tpc, 'topic_company')
        logger_track.info('29 for %s, add tpm %s' % (funding.id, tpm))
    else:
        logger_track.info('29 not for %s' % funding.id)
Пример #4
0
def __topic_28(db, funding):
    """
    红杉真格经纬IDG
    114, 122, 125, 109
    """

    investors = set(dbutil.get_funding_investor_ids(
        db, funding.id)) & {114, 122, 125, 109}
    if investors:
        active = 'Y' if dbutil.get_topic_auto_pubilsh_status(
            db, 28) == 'Y' else 'P'
        # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0]
        cid = funding.companyId
        investors = ','.join(
            [dbutil.get_investor_name(db, iid) for iid in investors])
        msg = u'%s投资了%s,%s' % \
              (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid))
        tpm = dbutil.update_topic_message(db, 28, msg, active, 70, funding.id)
        # if active == 'Y':
        if tpm:
            send_msg(tpm, 'topic_message')
            # for cid in dbutil.get_corporate_companies(db, funding.corporateId):
            tpc = dbutil.update_topic_company(db, 28, cid, active)
            # if active == 'Y':
            if tpc:
                dbutil.update_topic_message_company(db, tpm, tpc)
            send_msg(tpc, 'topic_company')
        logger_track.info('28 for %s, add tpm %s' % (funding.id, tpm))
    else:
        logger_track.info('28 not for %s' % funding.id)
Пример #5
0
def __topic_27(db, funding):
    """
    每日退出事件
    """

    global logger_track
    if funding.round and (funding.round == 1110 or funding.round == 1120):
        active = 'Y' if dbutil.get_topic_auto_pubilsh_status(
            db, 27) == 'Y' else 'P'
        # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0]
        cid = funding.companyId
        if funding.round == 1120:
            msg = u'%s,%s被%s收购' % \
                  (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), funding.investorsRaw)
        else:
            msg = u'%s,%s完成上市' % \
                  (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid))
        tpm = dbutil.update_topic_message(db, 27, msg, active, 70, funding.id)
        if tpm:
            send_msg(tpm, 'topic_message')
            # for cid in dbutil.get_corporate_companies(db, funding.corporateId):
            tpc = dbutil.update_topic_company(db, 27, cid, active)
            # if active == 'Y':
            if tpc:
                dbutil.update_topic_message_company(db, tpm, tpc)
            send_msg(tpc, 'topic_company')
        logger_track.info('27 for %s, add tpm %s' % (funding.id, tpm))
    else:
        logger_track.info('27 not for %s' % funding.id)
Пример #6
0
    def fit_company(self, cid):

        global producer_track, logger_track
        company_features = set(dbutil.get_company_feature_tags(self.db, cid))
        # logger_track.info(company_features)
        # logger_track.info(self.features.values())
        for k, v in self.features.items():
            if not (set(v) & company_features):
                return False
        features_used = list(chain(*[tids for tids in self.features.values()]))
        # for detailid
        comment = dbutil.get_company_tags_comment(self.db, cid, features_used)
        if comment:
            tpmid = dbutil.update_topic_message(self.db, self.tpid,
                                                comment.get('message'),
                                                self.auto_publish,
                                                comment.get('relate_type'),
                                                comment.get('relate_id'),
                                                comment.get('detail_id'),
                                                comment.get('comments'))
            tpcid = dbutil.update_topic_company(self.db, self.tpid, cid,
                                                self.auto_publish)
        else:
            tpmid = dbutil.update_topic_message(self.db, self.tpid,
                                                u'添加了一家新公司', self.auto_publish,
                                                60, cid)
            tpcid = dbutil.update_topic_company(self.db, self.tpid, cid,
                                                self.auto_publish)

        logger_track.info(
            '%s seem to meet requirements, tpm %s, tpc %s, topic %s' %
            (cid, tpmid, tpcid, self.tpid))
        if tpmid:
            self.send_track_msg(tpmid, 'topic_message')
            if tpcid:
                self.send_track_msg(tpcid, 'topic_company')
                self.send_msg(
                    'task_company',
                    json.dumps({
                        'source': 'track_topic',
                        'id': cid,
                        'detail': tpcid,
                        'from': 'nlp'
                    }))
            dbutil.update_topic_message_company(self.db, tpmid, tpcid)

        return True
Пример #7
0
    def fit(self, client):

        if self.rules:
            codes = client.search('topic',
                                  query=self.rules).get('company',
                                                        {}).get('data', [])
            if codes:
                codes.reverse()
                for code in codes:
                    flag = True
                    cid = self.db.get('select id from company where code=%s',
                                      code).id
                    company_features = set(
                        dbutil.get_company_feature_tags(self.db, cid))
                    for k, v in self.features.items():
                        if not (set(v) & company_features):
                            flag = False
                            break
                    if flag:
                        dbutil.update_topic_company(self.db, self.tpid, cid)
        if self.auto_expand:
            self.expand()
Пример #8
0
def __update_company_news(db,
                          mongo,
                          cids,
                          tpid,
                          content=u'发现一家公司',
                          fund_extract=-5,
                          detail_id=None,
                          comments=None):

    for cid in cids:
        existed = dbutil.exist_topic_company(db, tpid, cid)
        tpc = dbutil.update_topic_company(db, tpid, cid, 'P')
        if tpc and not existed:
            nid = mongo.article.news.insert({
                'date':
                datetime.utcnow(),
                'createTime':
                datetime.utcnow(),
                'modifyTime':
                datetime.utcnow(),
                'title':
                dbutil.get_company_name(db, cid),
                'contents': [{
                    'content': content,
                    'rank': 1
                }],
                'type':
                61000,
                'createUser':
                139,
                'fund_extract':
                fund_extract,
                'processStatus':
                2,
                'companyIds': [int(cid)],
                'companyCodes': [dbutil.get_company_code(db, cid)],
                'topic_id':
                tpid
            })
            send_msg(tpc, 'topic_company')
            tpm = dbutil.update_topic_message(db, tpid,
                                              dbutil.get_company_name(db, cid),
                                              'P', 10, str(nid), detail_id,
                                              comments)
            dbutil.update_topic_message_company(db, tpm, tpc)
            send_msg(tpm, 'topic_message')
Пример #9
0
def __topic_26(db, funding):
    """
    每日投融资速递
    """

    global logger_track, round_desc
    active = 'Y' if dbutil.get_topic_auto_pubilsh_status(db,
                                                         26) == 'Y' else 'P'
    # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0]
    cid = funding.companyId
    if funding.round in (1105, 1110):
        msg = u'%s, %s%s上市' % \
              (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round))
    elif funding.round == 1120:
        msg = u'%s, %s%s' % \
              (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round))
    elif funding.round in (1106, 1130, 1140, 1150, 1160):
        msg = u'%s, %s完成了%s' % \
              (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round))
    elif funding.round == 0:
        msg = u'%s, %s完成了新一轮融资' % (dbutil.get_company_brief(
            db, cid), dbutil.get_company_name(db, cid))
    elif funding.round == 1111:
        msg = u'%s, %s完成了新一轮融资' % (dbutil.get_company_brief(
            db, cid), dbutil.get_company_name(db, cid))
    elif funding.round == 1131:
        msg = u'%s, %s完成了战略合并' % (dbutil.get_company_brief(
            db, cid), dbutil.get_company_name(db, cid))
    else:
        msg = u'%s, %s完成了%s融资' % \
              (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round))
    tpm = dbutil.update_topic_message(db, 26, msg, active, 70, funding.id)
    if tpm:
        send_msg(tpm, 'topic_message')
        tpc = dbutil.update_topic_company(db, 26, cid, active)
        if tpc:
            dbutil.update_topic_message_company(db, tpm, tpc)
        # if active == 'Y':
        send_msg(tpc, 'topic_company')
    logger_track.info('26 for %s, add tpm %s' % (funding.id, tpm))
Пример #10
0
def __topic_11(db, mongo, funding):
    """
    独角兽,大额融资(超过10000w RMB)
    """

    global logger_track

    if not funding.newsId:
        return

    if funding.precise == 'Y':
        investment = {
            3010: 6.5,
            3020: 1,
            3030: 5,
            3040: 7,
            3050: 8,
            3070: 0.8
        }.get(funding['currency'], 0) * (funding.get('investment', 0) or 0)
    else:
        investment = funding.get('investment', 0)
    if investment >= 100000000:
        active = 'Y' if dbutil.get_topic_auto_pubilsh_status(
            db, 11) == 'Y' else 'P'
        title = list(mongo.article.news.find({'_id': ObjectId(funding.newsId)
                                              }))[0].get('title', '')
        tpm = dbutil.update_topic_message(db, 11, title, active, 10,
                                          funding.newsId)
        # if active == 'Y':
        if tpm:
            send_msg(tpm, 'topic_message')
            # for cid in dbutil.get_corporate_companies(db, funding.corporateId):
            tpc = dbutil.update_topic_company(db, 11, funding.companyId,
                                              active)
            if tpc:
                dbutil.update_topic_message_company(db, tpm, tpc)
            # if active == 'Y':
            send_msg(tpc, 'topic_company')
        logger_track.info('11 for %s, add tpm %s' % (funding.id, tpm))
Пример #11
0
    def track_topic_30(self, task):
        """
        首次媒体报道
        """

        global producer_news_task
        news = list(
            self.mongo.article.news.find({'_id':
                                          ObjectId(task['news_id'])}))[0]
        if news.get('date') and news['date'] < (
                datetime.now() - timedelta(days=self.news_timeliness)):
            return
        # 融资新闻排除
        if 578349 in news.get('features', []):
            return
        for cid in task.get('companyIds', []):
            # establish date greater than 5 years
            if dbutil.get_company_establish_date(
                    self.db,
                    cid) < (datetime.now() - timedelta(days=365 * 5)).date():
                continue
            if len(list(self.mongo.article.news.find({'companyIds':
                                                      cid}))) == 1:
                active = 'Y' if dbutil.get_topic_auto_pubilsh_status(
                    self.db, 30) == 'Y' else 'P'
                # tpm = dbutil.update_topic_message(self.db, 30, u'发现一家新公司', active, 10, task['news_id'])
                tpm = dbutil.update_topic_message(self.db, 30,
                                                  news.get('title', ''),
                                                  active, 10, task['news_id'])
                tpc = dbutil.update_topic_company(self.db, 30, cid, active)
                if tpm:
                    dbutil.update_topic_message_company(self.db, tpm, tpc)
                if active == 'Y':
                    try:
                        producer_news_task.send_messages(
                            "track_message_v2",
                            json.dumps({
                                'id': tpm,
                                'type': 'topic_message',
                                'action': 'create'
                            }))
                        producer_news_task.send_messages(
                            "track_message_v2",
                            json.dumps({
                                'id': tpc,
                                'type': 'topic_company',
                                'action': 'create'
                            }))
                    except FailedPayloadsError, fpe:
                        init_kafka()
                        producer_news_task.send_messages(
                            "track_message_v2",
                            json.dumps({
                                'id': tpm,
                                'type': 'topic_message',
                                'action': 'create'
                            }))
                        producer_news_task.send_messages(
                            "track_message_v2",
                            json.dumps({
                                'id': tpc,
                                'type': 'topic_company',
                                'action': 'create'
                            }))
Пример #12
0
    def fit_news(self, record):

        global producer_track, logger_track
        if record.get('processStatus', 0) != 1:
            return False
        news_features = set(record.get('features', set()))
        company_features = set(
            chain(*[
                dbutil.get_company_feature_tags(self.db, cid)
                for cid in record.get('companyIds')
            ]))
        if not news_features:
            return False
        for k, v in self.features.items():
            if k >= 11500 and not (set(v) & news_features):
                return False
            if k < 11500 and not ((set(v) & company_features) or
                                  (set(v) & news_features)):
                return False

        # search dimension
        if self.searches:
            contents = ' '.join([
                item.get('content', '') for item in record.get('contents', [])
            ])
            for term in self.searches:
                if term not in contents:
                    return False

        # update topic message
        tpmsg = dbutil.update_topic_message(self.db, self.tpid,
                                            record.get('title',
                                                       ''), self.auto_publish,
                                            10, record['_id'])
        # update topic message, topic company, send msg
        # if tpmsg and self.auto_publish == 'Y':
        if tpmsg:
            msg = {
                'type': 'topic_message',
                'id': tpmsg,
                'action': 'create',
                'from': 'nlp'
            }
            self.send_msg("track_message_v2", json.dumps(msg))
            # update topic company and send msg
            for cid in record.get('companyIds', []):
                tpc = dbutil.update_topic_company(self.db, self.tpid, cid,
                                                  self.auto_publish)
                # if tpc and self.auto_publish == 'Y':
                if tpc:
                    msg = {
                        'type': 'topic_company',
                        'id': tpc,
                        'action': 'create',
                        'from': 'nlp'
                    }
                    # relate topic company and message
                    dbutil.update_topic_message_company(self.db, tpmsg, tpc)
                    self.send_msg("track_message_v2", json.dumps(msg))
                    self.send_msg(
                        'task_company',
                        json.dumps({
                            'source': 'track_topic',
                            'id': cid,
                            'detail': tpc
                        }))
        return True