Exemplo n.º 1
0
 def _to_dict_to_save(self):
     return {
         "repository":
         self.repository,
         "filenames":
         merge_strings(self.filenames),
         "uasts":
         merge_strings([uast.SerializeToString() for uast in self.uasts])
     }
 def _to_dict(self):
     return {
         "X":
         merge_strings(self.X),
         "y_text":
         merge_strings(self.y_text),
         "y_pos":
         self.y_pos,
         "y_uast":
         merge_strings([uast.SerializeToString() for uast in self.y_uast])
     }
Exemplo n.º 3
0
 def save(self, output, deps=None):
     if not deps:
         raise ValueError(
             "You must specify DocumentFrequencies dependency to save BOW.")
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(
         self._meta, {
             "repos": merge_strings(self._repos),
             "matrix": disassemble_sparse_matrix(self.matrix),
             "tokens": merge_strings(self.tokens)
         }, output)
Exemplo n.º 4
0
 def save(self, output, deps: Union[None, list] = None) -> None:
     if not deps:
         deps = self.meta["dependencies"]
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(
         self._meta, {
             "tokens":
             merge_strings(self.tokens),
             "topics":
             merge_strings(self.topics)
             if self.topics is not None else False,
             "matrix":
             disassemble_sparse_matrix(self.matrix)
         }, output)
Exemplo n.º 5
0
 def save(self, output, deps=None):
     if not deps:
         deps = tuple()
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(self._meta, {
         "embeddings": self.embeddings,
         "tokens": merge_strings(self.tokens)
     }, output)
Exemplo n.º 6
0
 def save(self, output, deps=None):
     if not deps:
         deps = tuple()
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(
         self._meta, {
             "tokens": merge_strings(self.tokens),
             "matrix": disassemble_sparse_matrix(self.matrix)
         }, output)
Exemplo n.º 7
0
 def save(self, output, deps=None):
     if not deps:
         deps = tuple()
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     tokens = self.tokens()
     freqs = numpy.array([self._df[t] for t in tokens], dtype=numpy.float32)
     write_model(self._meta, {
         "docs": self.docs,
         "tokens": merge_strings(tokens),
         "freqs": freqs
     }, output)
Exemplo n.º 8
0
 def test_empty_split_save_load_merge(self):
     strings = []
     merged = merge_strings(strings)
     assert_array_equal(merged["strings"], numpy.array([], dtype="S1"))
     assert_array_equal(merged["lengths"], numpy.array([], dtype=int))
     self.assertIsNone(merged["str"])
     af = asdf.AsdfFile(merged)
     buffer = BytesIO()
     af.write_to(buffer)
     buffer.seek(0)
     af_loaded = asdf.open(buffer)
     strings_restored = split_strings(af_loaded.tree)
     self.assertEqual(strings, strings_restored)
Exemplo n.º 9
0
 def test_merge_strings(self):
     strings = ["a", "bc", "def"]
     merged = merge_strings(strings)
     self.assertIsInstance(merged, dict)
     self.assertIn("strings", merged)
     self.assertIn("lengths", merged)
     self.assertIsInstance(merged["strings"], numpy.ndarray)
     self.assertEqual(merged["strings"].shape, (1, ))
     self.assertEqual(merged["strings"][0], b"abcdef")
     self.assertIsInstance(merged["lengths"], numpy.ndarray)
     self.assertEqual(merged["lengths"].shape, (3, ))
     self.assertEqual(merged["lengths"][0], 1)
     self.assertEqual(merged["lengths"][1], 2)
     self.assertEqual(merged["lengths"][2], 3)
Exemplo n.º 10
0
 def save(self, output, deps=None):
     if not deps:
         deps = tuple()
     self._meta = generate_meta(self.NAME, 0, *deps)
     tokens = self.tokens()
     freqs = numpy.array([self._df[t] for t in tokens], dtype=numpy.float32)
     if tokens:
         write_model(self._meta, {
             "docs": self.docs,
             "tokens": merge_strings(tokens),
             "freqs": freqs
         }, output)
     else:
         self._log.warning("Did not write %s because the model is empty",
                           output)
Exemplo n.º 11
0
 def _generate_tree(self):
     return {
         "tokens": merge_strings(self.tokens),
         "matrix": disassemble_sparse_matrix(self.matrix)
     }
Exemplo n.º 12
0
 def test_invalid_merge_strings(self):
     with self.assertRaises(TypeError):
         merge_strings("abcd")
     with self.assertRaises(TypeError):
         merge_strings([0, 1, 2, 3])
Exemplo n.º 13
0
 def _to_dict_to_save(self):
     save_dict = super(Source, self)._to_dict_to_save()
     save_dict["sources"] = merge_strings(self.sources)
     return save_dict