Exemplo n.º 1
0
def comps():

    global logger_comps
    query = json.loads(request.data).get('payload')
    logger_comps.info('Comps Query, %s' % query)
    cid, tag, start, size = query.get('company'), query.get(
        'tag', 0), query.get('start', 0), query.get('size', 5)
    if tag == 0:
        comps_candidates = dbutil.get_company_comps(g.db, cid)
        logger_comps.info(comps_candidates)
        results = {
            'company': {
                'count':
                len(comps_candidates),
                'data':
                map(lambda x: {'id': dbutil.get_company_code(g.db, x)},
                    comps_candidates)[start:start + size],
                'tags':
                dbutil.prompt_tag_filter(g.db, comps_candidates)
            }
        }
    else:
        tag = dbutil.get_tag_id(g.db, tag)[0]
        comps_candidates = dbutil.get_filtered_company_comps(g.db, cid, tag)
        results = {
            'company': {
                'count':
                len(comps_candidates),
                'data':
                map(lambda x: {'id': dbutil.get_company_code(g.db, x)},
                    comps_candidates)[start:start + size]
            }
        }
    return make_response(jsonify(results))
Exemplo n.º 2
0
    def match(self, cid):

        tags = {
            name: weight
            for _, name, weight in dbutil.get_company_tags_idname(
                self.db, cid, tag_out_type=(11000, 11001, 11002, 11100, 11054))
        }
        tags = self.transformer.transform(tags)
        similarities = cosine_similarity(self.fund_profiles, tags)
        similarities = {
            self.fund_mapping.get(index): s
            for index, s in enumerate([s[0] for s in similarities])
        }
        # similarities = filter(lambda item: 1020 <= self.fund_rounds.get(item[0]) < 1040, similarities)
        # similarities = filter(lambda item: self.activeness.get(item[0]) > 3, similarities)
        investor_comps = list(
            chain(*[
                dbutil.get_company_investors(self.db, comp)
                for comp in dbutil.get_company_comps(self.db, cid)
            ]))
        for ic in set(investor_comps):
            similarities[ic] = similarities.get(
                ic, 0) * investor_comps.count(ic) + 1
        similarities = sorted(similarities.items(), key=lambda x: -x[1])
        famous = set(dbutil.get_online_investors(self.db))
        with codecs.open('dumps/fund', 'w', 'utf-8') as fo:
            for iid, weight in similarities:
                fo.write('%s\t%s\t%s\t%s\t%s\t%s\n' %
                         (self.funds.get(iid), weight, iid
                          in famous, self.fund_rounds.get(iid),
                          self.activeness.get(iid), self.locations.get(iid)))
Exemplo n.º 3
0
    def expand(self):

        companies = set([
            c.companyId
            for c in dbutil.get_topic_companies(self.db, self.tpid)
        ])
        if len(companies) < 5:
            return
        candidates = [
            comps.get('candidates', [])
            for comps in self.mongo.comps.candidates.find(
                {'company': {
                    '$in': list(companies)
                }})
        ]
        candidates = map(lambda x: x[0], chain(*candidates))
        candidates = Counter(
            [cid for cid in candidates if cid not in companies])
        comps = Counter([
            cid for cid in chain(
                *[dbutil.get_company_comps(self.db, cid) for cid in companies])
            if cid not in companies
        ])
        for comp in comps.most_common(min(len(companies) / 2, 50)):
            if comp[1] > len(companies) / 5:
                dbutil.update_topic_company(self.db,
                                            self.tpid,
                                            comp[0],
                                            confidence=0.51)
        for candidate in candidates.most_common(min(len(companies) / 5, 30)):
            if candidate[1] > len(companies) * 0.6:
                dbutil.update_topic_company(self.db,
                                            self.tpid,
                                            candidate[0],
                                            confidence=0.51)
Exemplo n.º 4
0
def company_sample():

    db = dbcon.connect_torndb()
    mongo = dbcon.connect_mongo()
    cids = mongo.task.company.find({
        'types': 'visit_local'
    }).sort('_id', DESCENDING).limit(500)
    cids = sample([r.get('companyId') for r in cids], 100)
    with codecs.open('files/sample', 'w', 'utf-8') as fo:
        fo.write('\n'.join([str(cid) for cid in cids]))
    with codecs.open('files/sample.old', 'w', 'utf-8') as fo:
        for cid in cids:
            fo.write('%s#%s\n' % (cid, ','.join(
                [str(cid2) for cid2 in dbutil.get_company_comps(db, cid)])))