def update_standard_tags():
    stdtag = StandardTags()
    total = db.tags.find().count()
    logging.debug("remaining %d tags without standard." % total)
    for i, t in enumerate(db.tags.find(timeout=False)):  # {"$where":"this.standard == null"}, timeout=False
        taglst = stdtag.transform(t["name"])
        if not taglst or taglst[0][0] == t["name"]:
            logging.debug("skip tag: %s", t["name"])
            t["standard"] = []
        else:
            t["standard"] = taglst
            logging.debug("%d, tag update standard %s --> %s" % (i, t["name"], taglst[0][0]))
        ret = db.tags.update({"_id": t["_id"]}, t)
        if not ret["ok"]:
            logging.warn("tag update failed. tag: %s" % t["name"])
        prog_d("tag update", i, total)
def update_standard_tags():
    stdtag = StandardTags()
    total = db.tags.find().count()
    logging.debug('remaining %d tags without standard.' % total)
    for i, t in enumerate(
            db.tags.find(timeout=False)
    ):  # {"$where":"this.standard == null"}, timeout=False
        taglst = stdtag.transform(t['name'])
        if not taglst or taglst[0][0] == t['name']:
            logging.debug('skip tag: %s', t['name'])
            t['standard'] = []
        else:
            t['standard'] = taglst
            logging.debug('%d, tag update standard %s --> %s' %
                          (i, t['name'], taglst[0][0]))
        ret = db.tags.update({"_id": t['_id']}, t)
        if not ret['ok']:
            logging.warn('tag update failed. tag: %s' % t['name'])
        prog_d('tag update', i, total)
示例#3
0
                # prog_d('solve Matrix row', c, total)
                # c += 1
            
        return MImatrix

    def _calMIvalue(self, a_set, b_set):
        pab = math.fabs(float(len(a_set&b_set)) / self.root)
        pa  = math.fabs(float(len(a_set)) / self.root)
        pb  = math.fabs(float(len(b_set)) / self.root)
        Iab = pab * math.log((pab+1) / (pa*pb))
        Ha  = -pa * math.log(pa)
        Hb  = -pb * math.log(pb)
        return float(Iab) / (float(Ha + Hb) / 2)

rsdb   = RecsysDatabase()
stdtag = StandardTags()

# set PROG before using this function
def prog_d(dstr, line=-1, total=100):
    global PROG_REC
    if line >= 0:
        progress = int(float(line)/float(total) * 100 + 1)
        if progress not in PROG_SCALE or progress == PROG_REC:
            return
        # print progress
        PROG_REC = progress
        dstr += ' %d%%(%d/%d) -=-=-' % (progress, line, total)
        logging.info('-=-=- Processing ' + dstr)
    else:
        logging.info('-=-=- Finishing ' + dstr)
示例#4
0
# -*- coding: utf-8 -*- 
from book_recsys import *
from stdtag import StandardTags
# word2vec = pickle.load(open('dump/Word2VecMtrx.dmp'))


stdtag = StandardTags()
stdtag._loadStart()

def _accWeight(node_dict, level, fno, a=0.0):
    up_lev = {}
    for nd in node_dict.items():
        p_mat = level[fno][2]
        for upnd in p_mat[nd[0]].items(): # 遍历连接着的上层节点
            if upnd[1] > 0:
                if upnd[0] not in up_lev:
                    up_lev[upnd[0]] = 0.0
                up_lev[upnd[0]] += nd[1]*upnd[1]*(1+a) # 权重叠加
    return up_lev

def findLevel(tag, level):
    for i, lev in enumerate(level):
        if tag in lev[0]:
            return i

def getWacWeight(tag, level, iter_level=-1):
    #print '\r\n\r\n\r\n\r\n', tag
    if iter_level == -1:
        iter_level = len(level)-1
    ret = {}
    num = findLevel(tag, level)
示例#5
0
# -*- coding: utf-8 -*-
from book_recsys import *
from stdtag import StandardTags
# word2vec = pickle.load(open('dump/Word2VecMtrx.dmp'))

stdtag = StandardTags()
stdtag._loadStart()


def _accWeight(node_dict, level, fno, a=0.0):
    up_lev = {}
    for nd in node_dict.items():
        p_mat = level[fno][2]
        for upnd in p_mat[nd[0]].items():  # 遍历连接着的上层节点
            if upnd[1] > 0:
                if upnd[0] not in up_lev:
                    up_lev[upnd[0]] = 0.0
                up_lev[upnd[0]] += nd[1] * upnd[1] * (1 + a)  # 权重叠加
    return up_lev


def findLevel(tag, level):
    for i, lev in enumerate(level):
        if tag in lev[0]:
            return i


def getWacWeight(tag, level, iter_level=-1):
    #print '\r\n\r\n\r\n\r\n', tag
    if iter_level == -1:
        iter_level = len(level) - 1