示例#1
0
 def test_corpus(self):
     self.assertIsNotNone(countries())
     self.assertIsNotNone(provinces())
     self.assertIsNotNone(thai_negations())
     self.assertIsNotNone(thai_stopwords())
     self.assertIsNotNone(thai_syllables())
     self.assertIsNotNone(thai_words())
     download("test")
     self.assertIsNotNone(remove("test"))
     self.assertIsNotNone(remove("tnc_freq"))
示例#2
0
    def test_corpus(self):
        self.assertIsInstance(thai_negations(), frozenset)
        self.assertIsInstance(thai_stopwords(), frozenset)
        self.assertIsInstance(thai_syllables(), frozenset)
        self.assertIsInstance(thai_words(), frozenset)

        self.assertIsInstance(countries(), frozenset)
        self.assertIsInstance(provinces(), frozenset)
        self.assertIsInstance(provinces(details=True), list)
        self.assertEqual(len(provinces(details=False)),
                         len(provinces(details=True)))
        self.assertIsInstance(thai_female_names(), frozenset)
        self.assertIsInstance(thai_male_names(), frozenset)

        self.assertIsInstance(
            get_corpus_db("https://example.com/XXXXXX0lkjasd/SXfmskdjKKXXX"),
            Response,
        )  # URL does not exist, should get 404 response
        self.assertIsNone(get_corpus_db("XXXlkja3sfdXX"))  # Invalid URL

        self.assertEqual(get_corpus_db_detail("XXXmx3KSXX"),
                         {})  # corpus does not exist

        self.assertTrue(download("test"))  # download the first time
        self.assertTrue(download(name="test", force=True))  # force download
        self.assertTrue(download(name="test"))  # try download existing
        self.assertFalse(download(name="test",
                                  url="wrongurl"))  # URL not exist
        self.assertFalse(
            download(name="XxxXXxxx817d37sf"))  # corpus name not exist
        self.assertIsNotNone(get_corpus_db_detail("test"))  # corpus exists
        self.assertIsNotNone(get_corpus_path("test"))  # corpus exists
        self.assertTrue(remove("test"))  # remove existing
        self.assertFalse(remove("test"))  # remove non-existing
        self.assertIsNone(get_corpus_path("XXXkdjfBzc"))  # query non-existing
        self.assertTrue(download(name="test", version="0.1"))
        self.assertTrue(remove("test"))
示例#3
0
 def rm(self, argv):
     parser = argparse.ArgumentParser(
         description="Remove a dataset",
         usage="thainlp data rm <dataset_name>",
     )
     parser.add_argument(
         "dataset_name",
         type=str,
         help="dataset/corpus's name",
     )
     args = parser.parse_args(argv[3:])
     if corpus.remove(args.dataset_name):
         print("Removed successfully.")
     else:
         print("Not found.")
示例#4
0
    def test_corpus(self):
        self.assertIsInstance(thai_negations(), frozenset)
        self.assertIsInstance(thai_stopwords(), frozenset)
        self.assertIsInstance(thai_syllables(), frozenset)
        self.assertIsInstance(thai_words(), frozenset)

        self.assertIsInstance(countries(), frozenset)
        self.assertIsInstance(provinces(), frozenset)
        self.assertIsInstance(provinces(details=True), list)
        self.assertEqual(len(provinces(details=False)),
                         len(provinces(details=True)))
        self.assertIsInstance(thai_family_names(), frozenset)
        self.assertIsInstance(list(thai_family_names())[0], str)
        self.assertIsInstance(thai_female_names(), frozenset)
        self.assertIsInstance(thai_male_names(), frozenset)

        self.assertIsInstance(
            get_corpus_db("https://example.com/XXXXXX0lkjasd/SXfmskdjKKXXX"),
            Response,
        )  # URL does not exist, should get 404 response
        self.assertIsNone(get_corpus_db("XXXlkja3sfdXX"))  # Invalid URL

        self.assertEqual(get_corpus_db_detail("XXXmx3KSXX"),
                         {})  # corpus does not exist
        self.assertEqual(get_corpus_db_detail("XXXmx3KSXX", version="0.2"),
                         {})  # corpus does not exist

        self.assertTrue(download("test"))  # download the first time
        self.assertTrue(download(name="test", force=True))  # force download
        self.assertTrue(download(name="test"))  # try download existing
        self.assertFalse(download(name="test",
                                  url="wrongurl"))  # URL not exist
        self.assertFalse(
            download(name="XxxXXxxx817d37sf"))  # corpus name not exist
        self.assertIsNotNone(get_corpus_db_detail("test"))  # corpus exists
        self.assertIsNotNone(get_corpus_path("test"))  # corpus exists
        self.assertIsNone(get_corpus_default_db("test"))
        self.assertIsNotNone(get_corpus_default_db("thainer", "1.5.1"))
        self.assertIsNotNone(get_corpus_default_db("thainer"))
        self.assertIsNone(get_corpus_default_db("thainer", "1.2"))
        self.assertTrue(remove("test"))  # remove existing
        self.assertFalse(remove("test"))  # remove non-existing
        self.assertIsNone(get_corpus_path("XXXkdjfBzc"))  # query non-existing
        self.assertFalse(download(name="test", version="0.0"))
        self.assertFalse(download(name="test", version="0.0.0"))
        self.assertFalse(download(name="test", version="0.0.1"))
        self.assertFalse(download(name="test", version="0.0.2"))
        self.assertFalse(download(name="test", version="0.0.3"))
        self.assertFalse(download(name="test", version="0.0.4"))
        self.assertIsNotNone(download(name="test", version="0.0.5"))
        self.assertTrue(download("test"))
        self.assertIsNotNone(remove("test"))  # remove existing
        self.assertIsNotNone(download(name="test", version="0.0.6"))
        self.assertIsNotNone(download(name="test", version="0.0.7"))
        self.assertIsNotNone(download(name="test", version="0.0.8"))
        self.assertIsNotNone(download(name="test", version="0.0.9"))
        self.assertIsNotNone(download(name="test", version="0.0.10"))
        with self.assertRaises(Exception) as context:
            self.assertIsNotNone(download(name="test", version="0.0.11"))
        self.assertTrue(
            "Hash does not match expected." in str(context.exception))
        self.assertIsNotNone(download(name="test", version="0.1"))
        self.assertIsNotNone(remove("test"))
示例#5
0
 def remove(args):
     corpus.remove(args.name)