def update_standard_tags(): stdtag = StandardTags() total = db.tags.find().count() logging.debug("remaining %d tags without standard." % total) for i, t in enumerate(db.tags.find(timeout=False)): # {"$where":"this.standard == null"}, timeout=False taglst = stdtag.transform(t["name"]) if not taglst or taglst[0][0] == t["name"]: logging.debug("skip tag: %s", t["name"]) t["standard"] = [] else: t["standard"] = taglst logging.debug("%d, tag update standard %s --> %s" % (i, t["name"], taglst[0][0])) ret = db.tags.update({"_id": t["_id"]}, t) if not ret["ok"]: logging.warn("tag update failed. tag: %s" % t["name"]) prog_d("tag update", i, total)
def update_standard_tags(): stdtag = StandardTags() total = db.tags.find().count() logging.debug('remaining %d tags without standard.' % total) for i, t in enumerate( db.tags.find(timeout=False) ): # {"$where":"this.standard == null"}, timeout=False taglst = stdtag.transform(t['name']) if not taglst or taglst[0][0] == t['name']: logging.debug('skip tag: %s', t['name']) t['standard'] = [] else: t['standard'] = taglst logging.debug('%d, tag update standard %s --> %s' % (i, t['name'], taglst[0][0])) ret = db.tags.update({"_id": t['_id']}, t) if not ret['ok']: logging.warn('tag update failed. tag: %s' % t['name']) prog_d('tag update', i, total)
# prog_d('solve Matrix row', c, total) # c += 1 return MImatrix def _calMIvalue(self, a_set, b_set): pab = math.fabs(float(len(a_set&b_set)) / self.root) pa = math.fabs(float(len(a_set)) / self.root) pb = math.fabs(float(len(b_set)) / self.root) Iab = pab * math.log((pab+1) / (pa*pb)) Ha = -pa * math.log(pa) Hb = -pb * math.log(pb) return float(Iab) / (float(Ha + Hb) / 2) rsdb = RecsysDatabase() stdtag = StandardTags() # set PROG before using this function def prog_d(dstr, line=-1, total=100): global PROG_REC if line >= 0: progress = int(float(line)/float(total) * 100 + 1) if progress not in PROG_SCALE or progress == PROG_REC: return # print progress PROG_REC = progress dstr += ' %d%%(%d/%d) -=-=-' % (progress, line, total) logging.info('-=-=- Processing ' + dstr) else: logging.info('-=-=- Finishing ' + dstr)
# -*- coding: utf-8 -*- from book_recsys import * from stdtag import StandardTags # word2vec = pickle.load(open('dump/Word2VecMtrx.dmp')) stdtag = StandardTags() stdtag._loadStart() def _accWeight(node_dict, level, fno, a=0.0): up_lev = {} for nd in node_dict.items(): p_mat = level[fno][2] for upnd in p_mat[nd[0]].items(): # 遍历连接着的上层节点 if upnd[1] > 0: if upnd[0] not in up_lev: up_lev[upnd[0]] = 0.0 up_lev[upnd[0]] += nd[1]*upnd[1]*(1+a) # 权重叠加 return up_lev def findLevel(tag, level): for i, lev in enumerate(level): if tag in lev[0]: return i def getWacWeight(tag, level, iter_level=-1): #print '\r\n\r\n\r\n\r\n', tag if iter_level == -1: iter_level = len(level)-1 ret = {} num = findLevel(tag, level)
# -*- coding: utf-8 -*- from book_recsys import * from stdtag import StandardTags # word2vec = pickle.load(open('dump/Word2VecMtrx.dmp')) stdtag = StandardTags() stdtag._loadStart() def _accWeight(node_dict, level, fno, a=0.0): up_lev = {} for nd in node_dict.items(): p_mat = level[fno][2] for upnd in p_mat[nd[0]].items(): # 遍历连接着的上层节点 if upnd[1] > 0: if upnd[0] not in up_lev: up_lev[upnd[0]] = 0.0 up_lev[upnd[0]] += nd[1] * upnd[1] * (1 + a) # 权重叠加 return up_lev def findLevel(tag, level): for i, lev in enumerate(level): if tag in lev[0]: return i def getWacWeight(tag, level, iter_level=-1): #print '\r\n\r\n\r\n\r\n', tag if iter_level == -1: iter_level = len(level) - 1