示例#1
0
    def test_word_vector_resource(self):
        path = os.path.join(os.path.dirname(__file__), "./data")
        storage = Storage(path)

        vocab = Vocabulary()
        vocab.set(["you", "loaded", "word", "vector", "now"])

        vector_size = 50
        word2vec = [
            "you " + " ".join(["0"] * vector_size),
            "word " + " ".join(["1"] * vector_size),
            "now " + " ".join(["2"] * vector_size),
        ]
        word2vec_file = Path(storage.path("external/word2vec_dummyr.txt"))
        with word2vec_file.open(mode="w", encoding="utf-8") as f:
            f.write("\n".join(word2vec))

        wv = WordVector(word2vec_file)
        key_vector = wv.load()
        for k in key_vector:
            self.assertTrue(k in vocab.get())
            self.assertEqual(len(key_vector[k]), vector_size)

        embed = vocab.make_embedding(word2vec_file)
        self.assertEqual(embed.shape, (len(vocab.get()), vector_size))
示例#2
0
 def test_chazutsu(self):
     path = os.path.join(os.path.dirname(__file__), "../data")
     storage = Storage(path)
     r = chazutsu.datasets.DUC2004().download(storage.path("raw"))
     df = storage.chazutsu(r.root).data()
     print(df.head(5))
     shutil.rmtree(r.root)
示例#3
0
    def test_read_file(self):
        path = os.path.join(os.path.dirname(__file__), "../data")
        storage = Storage(path)
        csv = DataFile(storage.path("raw/sample_dataset.csv"))

        content = csv.to_array()
        fetched = list(csv.fetch(progress=True))
        for c, f in zip(content, fetched):
            self.assertEqual(c, f)
示例#4
0
    def test_convert(self):
        path = os.path.join(os.path.dirname(__file__), "../data")
        storage = Storage(path)
        csv = DataFile(storage.path("raw/sample_dataset.csv"))

        path_changed = csv.convert(data_dir_to="interim")
        correct = os.path.join(path, "./interim/sample_dataset.csv")
        self.assertEqual(resolve(path_changed.path), resolve(correct))

        attr_added = csv.convert(add_attribute="preprocessed")
        correct = storage.path("raw/sample_dataset__preprocessed.csv")
        self.assertEqual(resolve(attr_added.path), resolve(correct))

        attr_converted = attr_added.convert(
            attribute_to={"preprocessed": "converted"})
        correct = storage.path("raw/sample_dataset__converted.csv")
        self.assertEqual(resolve(attr_converted.path), resolve(correct))

        ext_changed = csv.convert(ext_to=".txt")
        correct = storage.path("raw/sample_dataset.txt")
        self.assertEqual(resolve(ext_changed.path), resolve(correct))
示例#5
0
 def test_path(self):
     root = os.path.join(os.path.dirname(__file__), "../../data")
     storage = Storage(root)
     correct_path = os.path.join(root, "raw")
     self.assertEqual(resolve(storage.path("raw")),
                      resolve(correct_path))