示例#1
0
 def __init__(self, path, normalize=True):
     self.wi, self.iw = load_vocabulary(path + '.words.vocab')
     self.ci, self.ic = load_vocabulary(path + '.contexts.vocab')
     self.m = load_matrix(path)
     self.m.data = np.log(self.m.data)
     self.normal = normalize
     if normalize:
         self.normalize()
示例#2
0
 def __init__(self, path, normalize=True):
     self.wi, self.iw = load_vocabulary(path + '.words.vocab')
     self.ci, self.ic = load_vocabulary(path + '.contexts.vocab')
     self.m = load_matrix(path)
     self.m.data = np.log(self.m.data)
     self.normal = normalize
     if normalize:
         self.normalize()
示例#3
0
文件: explicit.py 项目: cheapmon/wot
 def load(cls, path, normalize=True, restricted_context=None, **kwargs):
     mat = load_matrix(path)
     word_vocab, context_vocab = load_vocabulary(mat, path)
     return cls(mat,
                word_vocab,
                context_vocab,
                normalize=normalize,
                restricted_context=restricted_context)
示例#4
0
 def __init__(self, path, normalize=True, k=1):
     Explicit.__init__(self, path, False)
     self.wi, self.iw = load_vocabulary(path + '.words.vocab')
     self.ci, self.ic = load_vocabulary(path + '.contexts.vocab')
     self.m = load_matrix(path)
     self.m.data = self.m.data - np.log(k)
     # self.normal = normalize
     if normalize:
         self.normalize()
示例#5
0
 def __init__(self, path, normalize=True, glen=5):
     self.wi, self.iw = load_vocabulary(path + '.words.vocab')
     self.ci, self.ic = load_vocabulary(path + '.contexts.vocab')
     self.sz, self.ng_freqs = self.load_counts(path)
     self.m = load_matrix(path)
     self.m.data = np.log(self.m.data)
     self.normal = normalize
     self.glen = glen
     if normalize:
         self.normalize()
示例#6
0
 def __init__(self, path, normalize=True):
     Explicit.__init__(self, path, False)
     self.wi, self.iw = load_vocabulary(path + '.words.vocab')
     self.ci, self.ic = load_vocabulary(path + '.contexts.vocab')
     self.m = load_matrix(path)
     self.m.data = np.log(self.m.data)
     self.m.data[self.m.data <= 0] = 0
     self.m.data[self.m.data > 0] = 1
     # self.normal = normalize
     if normalize:
         self.normalize()
示例#7
0
文件: explicit.py 项目: cheapmon/wot
 def load(cls,
          path,
          normalize=True,
          restricted_context=None,
          thresh=None,
          neg=1):
     mat = load_matrix(path, thresh)
     word_vocab, context_vocab = load_vocabulary(mat, path)
     return cls(mat,
                word_vocab,
                context_vocab,
                normalize,
                restricted_context,
                neg=neg)
示例#8
0
 def load(cls,
          path,
          normalize=True,
          restricted_context=None,
          thresh=None,
          neg=1):
     #This line produces an error because load_matrix takes only one argument
     #mat = load_matrix(path, thresh)
     #Changing the line:
     mat = load_matrix(path)
     word_vocab, context_vocab = load_vocabulary(mat, path)
     return cls(mat,
                word_vocab,
                context_vocab,
                normalize,
                restricted_context,
                neg=neg)
示例#9
0
def main(proc_num, queue, out_dir, in_dir, context_size):
    ioutils.mkdir(out_dir)
    print proc_num, "Start loop"
    while True:  # Iterates through the years
        try:
            year = queue.get(block=False)
        except Empty:
            print proc_num, "Finished"
            break
        print proc_num, "- Loading mat for year", year
        year_mat = load_matrix(in_dir + str(year) + ".bin")
        index = ioutils.load_pickle(in_dir + str(year) + "-index.pkl")
        print proc_num, "- Processing data for year", year
        counts = year_mat.sum(1) / (2 * context_size)  # sums up the occurrence
        counts = {
            word: int(counts[index[word]])
            for word in index if index[word] < len(counts)
        }
        ioutils.write_pickle(counts, out_dir + "/" + str(year) +
                             "-counts.pkl")  # writes it in a file
示例#10
0
 def load(cls, path, normalize=True, restricted_context=None, **kwargs):
     mat = load_matrix(path)
     word_vocab, context_vocab = load_vocabulary(mat, path)
     return cls(mat, word_vocab, context_vocab, normalize=normalize, restricted_context=restricted_context)
示例#11
0
 def load(cls, path, normalize=True, restricted_context=None, thresh=None, neg=1):
     mat = load_matrix(path, thresh)
     word_vocab, context_vocab = load_vocabulary(mat, path)
     return cls(mat, word_vocab, context_vocab, normalize, restricted_context, neg=neg)