示例#1
0
def tag_id_rank_list_by_txt(txt):
    txt = txt.lower()
    tag_id_list_rank = defaultdict(int)
    for word, rank in tf_idf_seg_txt(txt):
        #print word
        ars = db_tag_bayes.get(word)
        if ars:
            ar = array('I')
            ar.fromstring(ars)
            #print len(ar)
            #print db_tag_bayes[word]
            #print word, ar
            for tag_id, bayes in chunkiter(ar, 2):
                tag_id_list_rank[tag_id] += (bayes * rank)

    result = []

    for tag_id, rank in sorted(tag_id_list_rank.iteritems(),
                               key=itemgetter(1),
                               reverse=True):
        has_tag = False

        if tag_id not in ID2NAME:
            continue

        for i in ID2NAME[tag_id]:
            if has_tag:
                break

            tag_list = list(sp_txt(i))

            if tag_list:
                for j in tag_list:
                    #print j, str(j) in txt
                    if str(j) in txt:
                        has_tag = True
                        break
            elif i in txt:
                has_tag = True
                break

        if has_tag:
            result.append((tag_id, rank))

    return result
示例#2
0
def tag_id_rank_list_by_txt(txt):
    txt = txt.lower()
    tag_id_list_rank = defaultdict(int)
    for word, rank in tf_idf_seg_txt(txt):
        # print word
        ars = db_tag_bayes.get(word)
        if ars:
            ar = array("I")
            ar.fromstring(ars)
            # print len(ar)
            # print db_tag_bayes[word]
            # print word, ar
            for tag_id, bayes in chunkiter(ar, 2):
                tag_id_list_rank[tag_id] += bayes * rank

    result = []

    for tag_id, rank in sorted(tag_id_list_rank.iteritems(), key=itemgetter(1), reverse=True):
        has_tag = False

        if tag_id not in ID2NAME:
            continue

        for i in ID2NAME[tag_id]:
            if has_tag:
                break

            tag_list = list(sp_txt(i))

            if tag_list:
                for j in tag_list:
                    # print j, str(j) in txt
                    if str(j) in txt:
                        has_tag = True
                        break
            elif i in txt:
                has_tag = True
                break

        if has_tag:
            result.append((tag_id, rank))

    return result
示例#3
0
def loads_id_score(id_score):
    r = array('I')
    r.fromstring(id_score)
    return list(chunkiter(r, 2))