def save(self, output, deps=None): if not deps: deps = tuple() self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model( self._meta, { "tokens": merge_strings(self.tokens), "matrix": disassemble_sparse_matrix(self.matrix) }, output)
def save(self, output, deps=None): if not deps or len(deps) < 2: raise ValueError( "You must specify DocumentFrequencies and Id2Vec dependencies " "to save NBOW.") self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model( self._meta, { "repos": merge_strings(self._repos), "matrix": disassemble_sparse_matrix(self._matrix) }, output)
def save(self, output, deps: Union[None, list] = None) -> None: if not deps: deps = self.meta["dependencies"] self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) write_model( self._meta, { "tokens": merge_strings(self.tokens), "topics": merge_strings(self.topics) if self.topics is not None else False, "matrix": disassemble_sparse_matrix(self.matrix) }, output)
def save(self, output, deps=None): if not deps: raise ValueError( "You must specify DocumentFrequencies dependency to save BOW.") self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps) if self.tokens: write_model( self._meta, { "repos": merge_strings(self._repos), "matrix": disassemble_sparse_matrix(self.matrix), "tokens": merge_strings(self.tokens) }, output) else: self._log.warning("Did not write %s because the model is empty", output)
def test_disassemble_sparse_matrix_empty(self): arr = numpy.zeros((10, 10), dtype=numpy.float32) mat = csr_matrix(arr) dis = disassemble_sparse_matrix(mat) self.assertIsInstance(dis, dict) self.assertIn("shape", dis) self.assertIn("format", dis) self.assertIn("data", dis) self.assertEqual(dis["shape"], arr.shape) self.assertEqual(dis["format"], "csr") self.assertIsInstance(dis["data"], list) self.assertEqual(len(dis["data"]), 3) self.assertEqual(dis["data"][0].size, 0) self.assertEqual(dis["data"][1].size, 0) self.assertTrue((dis["data"][2] == 0).all())
def test_disassemble_sparse_matrix(self): arr = numpy.zeros((10, 10), dtype=numpy.float32) numpy.random.seed(0) arr[numpy.random.randint(0, 10, (50, 2))] = 1 mat = csr_matrix(arr) dis = disassemble_sparse_matrix(mat) self.assertIsInstance(dis, dict) self.assertIn("shape", dis) self.assertIn("format", dis) self.assertIn("data", dis) self.assertEqual(dis["shape"], arr.shape) self.assertEqual(dis["format"], "csr") self.assertIsInstance(dis["data"], (tuple, list)) self.assertEqual(len(dis["data"]), 3) self.assertTrue((dis["data"][0] == mat.data).all()) self.assertTrue((dis["data"][1] == mat.indices).all()) self.assertTrue((dis["data"][2] == mat.indptr).all())
def _generate_tree(self): return { "tokens": merge_strings(self.tokens), "matrix": disassemble_sparse_matrix(self.matrix) }