Пример #1
0
 def _generate_tree(self):
     return {
         "tokens": merge_strings(self.tokens),
         "topics":
         merge_strings(self.topics) if self.topics is not None else False,
         "matrix": disassemble_sparse_matrix(self.matrix)
     }
Пример #2
0
 def _generate_tree(self) -> dict:
     tree = self.__dict__.copy()
     for key in vars(Model()):
         del tree[key]
     freqkeys = [""] * len(self.frequencies)
     freqvals = numpy.zeros(len(self.frequencies), dtype=numpy.uint32)
     for i, (key, val) in enumerate(sorted(self.frequencies.items())):
         freqkeys[i] = key
         freqvals[i] = val
     tree["frequencies"] = {"keys": merge_strings(freqkeys), "vals": freqvals}
     tree["checker"] = self.checker.__dict__.copy()
     delstrs = set()
     delindexes = numpy.zeros(len(self.checker._deletes), dtype=numpy.uint32)
     dellengths = numpy.zeros_like(delindexes)
     for i, (key, dss) in enumerate(self.checker._deletes.items()):
         delindexes[i] = key
         dellengths[i] = len(dss)
         for ds in dss:
             delstrs.add(ds)
     delstrs = sorted(delstrs)
     delstrs_map = {s: i for i, s in enumerate(delstrs)}
     deldata = numpy.zeros(sum(dellengths), dtype=numpy.uint32)
     offset = 0
     for di in delindexes:
         dss = self.checker._deletes[di]
         for j, ds in enumerate(dss):
             deldata[offset + j] = delstrs_map[ds]
         offset += len(dss)
     tree["checker"]["_deletes"] = {
         "strings": merge_strings(delstrs),
         "indexes": delindexes,
         "lengths": dellengths,
         "data": deldata,
     }
     wordvals = numpy.zeros(len(self.checker._words), dtype=numpy.uint32)
     for key, val in self.checker._words.items():
         wordvals[delstrs_map[key]] = val
     tree["checker"]["_words"] = wordvals
     tree["tokens"] = merge_strings(self.tokens)
     vocab_strings = [""] * len(self.wv.vocab)
     vocab_counts = numpy.zeros(len(vocab_strings), dtype=numpy.uint32)
     for key, val in self.wv.vocab.items():
         vocab_strings[val.index] = key
         vocab_counts[val.index] = val.count
     hash2index = numpy.zeros(len(self.wv.hash2index), dtype=numpy.uint32)
     for key, val in self.wv.hash2index.items():
         hash2index[val] = key
     tree["wv"] = {
         "vocab": {"strings": merge_strings(vocab_strings), "counts": vocab_counts},
         "vectors": self.wv.vectors,
         "min_n": self.wv.min_n,
         "max_n": self.wv.max_n,
         "bucket": self.wv.bucket,
         "num_ngram_vectors": self.wv.num_ngram_vectors,
         "vectors_ngrams": self.wv.vectors_ngrams,
         "hash2index": hash2index,
     }
     return tree
Пример #3
0
 def _generate_tree(self):
     tree = {"langs": self._langs}
     for lang, library_names in self._library_names.items():
         tree[lang] = {
             "library_names": merge_strings(sorted(library_names))
         }
         tree[lang]["library_metadata"] = {}
         for meta, libs in self._library_metadata[lang].items():
             tree[lang]["library_metadata"][meta] = merge_strings(
                 sorted(libs))
     return tree
Пример #4
0
 def _generate_tree(self):
     return {
         "matrix":
         disassemble_sparse_matrix(self._matrix),
         "files":
         merge_strings(self._files),
         "deps":
         merge_strings(self._deps),
         "ind_to_langs":
         merge_strings(
             [self._ind_to_langs[ind] for ind in range(len(self._files))]),
         "ind_to_repos":
         merge_strings(
             [self._ind_to_repos[ind] for ind in range(len(self._files))]),
     }
Пример #5
0
 def _generate_tree(self):
     tokens = [None] * len(self)
     freqs = numpy.zeros(len(self), dtype=numpy.float32)
     for k, i in self._order.items():
         tokens[i] = k
         freqs[i] = self._df[k]
     return {"docs": self.docs, "tokens": merge_strings(tokens), "freqs": freqs}
Пример #6
0
 def _generate_tree(self):
     tokens = self.tokens()
     freqs = numpy.array([self._df[t] for t in tokens], dtype=numpy.float32)
     return {
         "docs": self.docs,
         "tokens": merge_strings(tokens),
         "freqs": freqs
     }
Пример #7
0
 def _generate_tree(self):
     size = sum(map(len, self.communities))
     data = numpy.zeros(size, dtype=numpy.uint32)
     indptr = numpy.zeros(len(self.communities) + 1, dtype=numpy.int64)
     pos = 0
     for i, community in enumerate(self.communities):
         data[pos:pos + len(community)] = community
         pos += len(community)
         indptr[i + 1] = pos
     return {"data": data, "indptr": indptr, "elements": merge_strings(self.id_to_element)}
Пример #8
0
 def _generate_tree(self):
     size = sum(map(len, self.communities))
     data = numpy.zeros(size, dtype=numpy.uint32)
     indptr = numpy.zeros(len(self.communities) + 1, dtype=numpy.int64)
     pos = 0
     for i, community in enumerate(self.communities):
         data[pos:pos + len(community)] = community
         pos += len(community)
         indptr[i + 1] = pos
     return {"data": data, "indptr": indptr, "elements": merge_strings(self.id_to_element)}
Пример #9
0
 def _generate_tree(self):
     tree = {"schemes": {}}
     for key, vals in self.levels.items():
         tree["schemes"][key] = scheme = {}
         npartitions = len(next(iter(vals.values())))
         classes = [None for _ in range(len(vals))]
         scheme["levels"] = levels = numpy.zeros(len(vals) * npartitions, dtype=numpy.int32)
         for i, pair in enumerate(vals.items()):
             classes[i], levels[i * npartitions:(i + 1) * npartitions] = pair
         scheme["classes"] = merge_strings(classes)
     return tree
Пример #10
0
 def _generate_tree(self):
     return {"cc": self.id_to_cc, "elements": merge_strings(self.id_to_element),
             "buckets": disassemble_sparse_matrix(self.id_to_buckets)}
Пример #11
0
Файл: id2vec.py Проект: y1026/ml
 def _generate_tree(self):
     return {"embeddings": self.embeddings, "tokens": merge_strings(self.tokens)}
Пример #12
0
 def _generate_tree(self):
     return {"cc": self.id_to_cc, "elements": merge_strings(self.id_to_element),
             "buckets": disassemble_sparse_matrix(self.id_to_buckets)}
Пример #13
0
 def _generate_tree(self):
     return {
         "documents": merge_strings(self._documents),
         "matrix": disassemble_sparse_matrix(self._matrix),
         "tokens": merge_strings(self.tokens)
     }
Пример #14
0
 def _generate_tree(self) -> dict:
     tree = super()._generate_tree()
     tree.update(identifiers=merge_strings(sorted(self._identifiers)))
     return tree