def main(): t = {} for k,v in NAME2ID.iteritems(): t[name_tidy(k)] = v print "#coding: utf-8" print "NAME2ID = ", pprint(t)
def main(): t = {} for k, v in NAME2ID.iteritems(): t[name_tidy(k)] = v print "#coding: utf-8" print "NAME2ID = ", pprint(t)
def main(): id2alias = {} for zhihu_topic in ZHIHU_TOPIC: id = zhihu_topic[0] alias_list = zhihu_topic[5] id2alias[int(id)] = alias_list name2id = {} for i in Zsite.where(cid=CID_TAG): tag_list = map(str.strip, i.name.split("/")) zhihu_id = MY2ID[i.id] alias_list = id2alias.get(zhihu_id,()) tag_list.extend(alias_list) for name in tag_list: name2id[name.lower()] = i.id print """ #coding:utf-8 NAME2ID = """, pprint(name2id)
# NAME_RANK[name] = rank # if _name != name: # NAME_RANK[_name] = name count = 0 MY2Z = {} for i in Zsite.where(cid=CID_TAG): i.name = i.name.strip() i.save() if i.name not in NAME_ID: for j in map(str.strip,i.name.split("/")): if j in NAME_ID: MY2Z[i.id] = NAME_ID[j] break else: MY2Z[i.id] = NAME_ID[i.name] if i.id not in MY2Z: count += 1 #print count, '%s|'%i.name import _env from zkit.pprint import pprint from yajl import dumps pprint(dict((v,k) for k,v in MY2Z.iteritems())) #print dumps(NAME_KEYWORD) #print dumps(NAME_RANK)
sp2id = defaultdict(list) for k, v in NAME2ID.iteritems(): for i in sp_txt(k): sp2id[i].append(k) word_parent = defaultdict(set) for k, v in NAME2ID.iteritems(): for i in sp_txt(k): for j in sp2id[i]: if j != k and k in j: #print k, j word_parent[NAME2ID[j]].add(NAME2ID[k]) id2name = dict((k, v) for v, k in NAME2ID.iteritems()) #for id, pid_list in word_parent.iteritems(): # print id2name[id] # for i in pid_list: # print id2name[i], # print "\n" word_parent = dict((k, tuple(v)) for k, v in word_parent.iteritems()) print 'PTAG = ', pprint(word_parent)
#coding:utf-8 import _env from zkit.google.greader import Reader from config import GREADER_USERNAME, GREADER_PASSWORD from zkit.pprint import pprint reader = Reader(GREADER_USERNAME, GREADER_PASSWORD) result = [] for feed in reader.feed("feed/http://book.douban.com/feed/review/book"): pprint(feed) data = {} data['title'] = feed['title'] data['author'] = feed['author'] data['content'] = feed['content'] data['updated'] = feed['updated'] data['id'] = feed['id'] data['published'] = feed['published'] result.append(data) if __name__ == "__main__": pass
#""" RESULT = {} for tag in Zsite.where(cid=CID_TAG): name_list = map(str.strip, tag.name.split('/')) for name in name_list: RESULT[name.replace('·', '.').lower()] = tag.id for i in ZHIHU_TOPIC: id = i[0] if id not in ID2MY: continue rename = i[-1] for name in rename: name = name.lower() if name in RESULT and RESULT[name] != ID2MY[id]: zsite = Zsite.mc_get(ID2MY[id]) print zsite.id, zsite.name zsite = Zsite.mc_get(RESULT[name]) print zsite.id, zsite.name else: continue print """ #coding:utf-8 TAG2ID = """, pprint(RESULT)
# from zkit.pprint import pprint # # pprint(url2id) #""" RESULT = {} for tag in Zsite.where(cid=CID_TAG): name_list = map(str.strip, tag.name.split('/')) for name in name_list: RESULT[name.replace('·', '.').lower()] = tag.id for i in ZHIHU_TOPIC: id = i[0] if id not in ID2MY: continue rename = i[-1] for name in rename: name = name.lower() if name in RESULT and RESULT[name] != ID2MY[id]: zsite = Zsite.mc_get(ID2MY[id]) print zsite.id, zsite.name zsite = Zsite.mc_get(RESULT[name]) print zsite.id, zsite.name else: continue print """ #coding:utf-8 TAG2ID = """, pprint(RESULT)
from collections import defaultdict from zkit.pprint import pprint sp2id = defaultdict(list) for k, v in NAME2ID.iteritems(): for i in sp_txt(k): sp2id[i].append(k) word_parent = defaultdict(set) for k, v in NAME2ID.iteritems(): for i in sp_txt(k): for j in sp2id[i]: if j != k and k in j: #print k, j word_parent[NAME2ID[j]].add(NAME2ID[k]) id2name = dict((k, v) for v, k in NAME2ID.iteritems()) #for id, pid_list in word_parent.iteritems(): # print id2name[id] # for i in pid_list: # print id2name[i], # print "\n" word_parent = dict((k, tuple(v)) for k, v in word_parent.iteritems()) print 'PTAG = ', pprint(word_parent)