示例#1
0
def merge(path):
    for pos, i in enumerate(glob(path + "/*")):
        print pos, i
        for word, topic_freq in tofromfile.fromfile(i).iteritems():

            if len(word.strip()) <= 3:
                continue

            word = name_tidy(word)
            s = [word]
            for topic, freq in topic_freq.iteritems():
                topic = int(topic)
                redis.hincrby(word, topic, int(freq * 100))
示例#2
0
def merge(path):
    for pos, i in enumerate(glob(path+"/*")):
        print pos, i
        for word, topic_freq in tofromfile.fromfile(i).iteritems():

            if len(word.strip()) <= 3:
                continue

            word = name_tidy(word)
            s = [word]
            for topic, freq in topic_freq.iteritems():
                topic = int(topic)
                redis.hincrby(word, topic, int(freq*100))
示例#3
0
def merge():
    CACHE_PATH = "/home/work/wanfang/tag"
    for pos, i in enumerate(glob(CACHE_PATH+"/*")):
        for word, topic_freq in tofromfile.fromfile(i).iteritems():

            if len(word.strip()) <= 3:
                continue

            word = name_tidy(word)
            s = [word]
            for topic, freq in topic_freq.iteritems():
                topic = int(topic)
                s.append((topic, freq))
            print dumps(s)
示例#4
0
def merge():
    CACHE_PATH = "/home/work/wanfang/tag"
    for pos, i in enumerate(glob(CACHE_PATH + "/*")):
        for word, topic_freq in tofromfile.fromfile(i).iteritems():

            if len(word.strip()) <= 3:
                continue

            word = name_tidy(word)
            s = [word]
            for topic, freq in topic_freq.iteritems():
                topic = int(topic)
                s.append((topic, freq))
            print dumps(s)
示例#5
0
 def fromfile(self, path):
     self._dict = fromfile(path)
     return self
示例#6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import _env
from config import ZDATA_PATH
from zkit.tofromfile import fromfile
from idf import tf_idf as _tf_idf
from os.path import join
from mmseg import seg_txt

IDF = fromfile(join(ZDATA_PATH, 'data/idf'))


def tf_idf(word_list):
    return _tf_idf(word_list, IDF)


def tf_idf_seg_txt(txt):
    txt = txt.replace('。', ' ').replace(',', ' ')
    word_list = list(seg_txt(txt))
    return tf_idf(word_list)


from kyotocabinet import DB
from collections import defaultdict
from array import array
from zkit.zitertools import chunkiter
from operator import itemgetter
from zdata.tag.name2id import NAME2ID
from zkit.txt_cleanup import sp_txt

ID2NAME = defaultdict(list)
示例#7
0
 def extend_by_file(self, filename):
     _count , _df = fromfile(filename)
     self._count += _count
     for k,v in _df.iteritems():
         self._df[k]+=v
示例#8
0
 def fromfile(self, path):
     self._dict = fromfile(path)
     return self
示例#9
0
 def extend_by_file(self, filename):
     _count, _df = fromfile(filename)
     self._count += _count
     for k, v in _df.iteritems():
         self._df[k] += v
示例#10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import _env
from config import ZDATA_PATH
from zkit.tofromfile import fromfile
from idf import tf_idf as _tf_idf
from os.path import join
from mmseg import seg_txt

IDF = fromfile(join(ZDATA_PATH, "data/idf"))


def tf_idf(word_list):
    return _tf_idf(word_list, IDF)


def tf_idf_seg_txt(txt):
    txt = txt.replace("。", " ").replace(",", " ")
    word_list = list(seg_txt(txt))
    return tf_idf(word_list)


from kyotocabinet import DB
from collections import defaultdict
from array import array
from zkit.zitertools import chunkiter
from operator import itemgetter
from zdata.tag.name2id import NAME2ID
from zkit.txt_cleanup import sp_txt

ID2NAME = defaultdict(list)