def _to_dict_to_save(self): return { "repository": self.repository, "filenames": merge_strings(self.filenames), "uasts": merge_strings([uast.SerializeToString() for uast in self.uasts]) }
def _to_dict(self): return { "X": merge_strings(self.X), "y_text": merge_strings(self.y_text), "y_pos": self.y_pos, "y_uast": merge_strings([uast.SerializeToString() for uast in self.y_uast]) }
def save(self, output, deps=None): if not deps: raise ValueError( "You must specify DocumentFrequencies dependency to save BOW.") self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model( self._meta, { "repos": merge_strings(self._repos), "matrix": disassemble_sparse_matrix(self.matrix), "tokens": merge_strings(self.tokens) }, output)
def save(self, output, deps: Union[None, list] = None) -> None: if not deps: deps = self.meta["dependencies"] self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model( self._meta, { "tokens": merge_strings(self.tokens), "topics": merge_strings(self.topics) if self.topics is not None else False, "matrix": disassemble_sparse_matrix(self.matrix) }, output)
def save(self, output, deps=None): if not deps: deps = tuple() self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model(self._meta, { "embeddings": self.embeddings, "tokens": merge_strings(self.tokens) }, output)
def save(self, output, deps=None): if not deps: deps = tuple() self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model( self._meta, { "tokens": merge_strings(self.tokens), "matrix": disassemble_sparse_matrix(self.matrix) }, output)
def save(self, output, deps=None): if not deps: deps = tuple() self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) tokens = self.tokens() freqs = numpy.array([self._df[t] for t in tokens], dtype=numpy.float32) write_model(self._meta, { "docs": self.docs, "tokens": merge_strings(tokens), "freqs": freqs }, output)
def test_empty_split_save_load_merge(self): strings = [] merged = merge_strings(strings) assert_array_equal(merged["strings"], numpy.array([], dtype="S1")) assert_array_equal(merged["lengths"], numpy.array([], dtype=int)) self.assertIsNone(merged["str"]) af = asdf.AsdfFile(merged) buffer = BytesIO() af.write_to(buffer) buffer.seek(0) af_loaded = asdf.open(buffer) strings_restored = split_strings(af_loaded.tree) self.assertEqual(strings, strings_restored)
def test_merge_strings(self): strings = ["a", "bc", "def"] merged = merge_strings(strings) self.assertIsInstance(merged, dict) self.assertIn("strings", merged) self.assertIn("lengths", merged) self.assertIsInstance(merged["strings"], numpy.ndarray) self.assertEqual(merged["strings"].shape, (1, )) self.assertEqual(merged["strings"][0], b"abcdef") self.assertIsInstance(merged["lengths"], numpy.ndarray) self.assertEqual(merged["lengths"].shape, (3, )) self.assertEqual(merged["lengths"][0], 1) self.assertEqual(merged["lengths"][1], 2) self.assertEqual(merged["lengths"][2], 3)
def save(self, output, deps=None): if not deps: deps = tuple() self._meta = generate_meta(self.NAME, 0, *deps) tokens = self.tokens() freqs = numpy.array([self._df[t] for t in tokens], dtype=numpy.float32) if tokens: write_model(self._meta, { "docs": self.docs, "tokens": merge_strings(tokens), "freqs": freqs }, output) else: self._log.warning("Did not write %s because the model is empty", output)
def _generate_tree(self): return { "tokens": merge_strings(self.tokens), "matrix": disassemble_sparse_matrix(self.matrix) }
def test_invalid_merge_strings(self): with self.assertRaises(TypeError): merge_strings("abcd") with self.assertRaises(TypeError): merge_strings([0, 1, 2, 3])
def _to_dict_to_save(self): save_dict = super(Source, self)._to_dict_to_save() save_dict["sources"] = merge_strings(self.sources) return save_dict