Exemplo n.º 1
0
    def test_corpus(self):
        self.assertIsInstance(thai_negations(), frozenset)
        self.assertIsInstance(thai_stopwords(), frozenset)
        self.assertIsInstance(thai_syllables(), frozenset)
        self.assertIsInstance(thai_words(), frozenset)

        self.assertIsInstance(countries(), frozenset)
        self.assertIsInstance(provinces(), frozenset)
        self.assertIsInstance(provinces(details=True), list)
        self.assertEqual(len(provinces(details=False)),
                         len(provinces(details=True)))
        self.assertIsInstance(thai_family_names(), frozenset)
        self.assertIsInstance(list(thai_family_names())[0], str)
        self.assertIsInstance(thai_female_names(), frozenset)
        self.assertIsInstance(thai_male_names(), frozenset)

        self.assertIsInstance(
            get_corpus_db("https://example.com/XXXXXX0lkjasd/SXfmskdjKKXXX"),
            Response,
        )  # URL does not exist, should get 404 response
        self.assertIsNone(get_corpus_db("XXXlkja3sfdXX"))  # Invalid URL

        self.assertEqual(get_corpus_db_detail("XXXmx3KSXX"),
                         {})  # corpus does not exist
        self.assertEqual(get_corpus_db_detail("XXXmx3KSXX", version="0.2"),
                         {})  # corpus does not exist

        self.assertTrue(download("test"))  # download the first time
        self.assertTrue(download(name="test", force=True))  # force download
        self.assertTrue(download(name="test"))  # try download existing
        self.assertFalse(download(name="test",
                                  url="wrongurl"))  # URL not exist
        self.assertFalse(
            download(name="XxxXXxxx817d37sf"))  # corpus name not exist
        self.assertIsNotNone(get_corpus_db_detail("test"))  # corpus exists
        self.assertIsNotNone(get_corpus_path("test"))  # corpus exists
        self.assertTrue(remove("test"))  # remove existing
        self.assertFalse(remove("test"))  # remove non-existing
        self.assertIsNone(get_corpus_path("XXXkdjfBzc"))  # query non-existing
        self.assertFalse(download(name="test", version="0.0"))
        self.assertFalse(download(name="test", version="0.0.0"))
        self.assertFalse(download(name="test", version="0.0.1"))
        self.assertFalse(download(name="test", version="0.0.2"))
        self.assertFalse(download(name="test", version="0.0.3"))
        self.assertFalse(download(name="test", version="0.0.4"))
        self.assertIsNotNone(download(name="test", version="0.0.5"))
        self.assertTrue(download("test"))
        self.assertIsNotNone(remove("test"))  # remove existing
        self.assertIsNotNone(download(name="test", version="0.0.6"))
        self.assertIsNotNone(download(name="test", version="0.0.7"))
        self.assertIsNotNone(download(name="test", version="0.0.8"))
        self.assertIsNotNone(download(name="test", version="0.0.9"))
        self.assertIsNotNone(download(name="test", version="0.0.10"))
        with self.assertRaises(Exception) as context:
            self.assertIsNotNone(download(name="test", version="0.0.11"))
        self.assertTrue(
            "Hash does not match expected." in str(context.exception))
        self.assertIsNotNone(download(name="test", version="0.1"))
        self.assertIsNotNone(remove("test"))
Exemplo n.º 2
0
def tag_provinces(tokens: List[str]) -> List[Tuple[str, str]]:
    """
    This function recognize Thailand provinces in text.

    :param list[str] tokens: a list of words
    :reutrn: a list of tuple indicating NER for `LOCATION` in IOB format
    :rtype: list[tuple[str, str]]

    :Example:
    ::

        from pythainlp.tag import tag_provinces

        text = ['หนองคาย', 'น่าอยู่']
        tag_provinces(text)
        # output: [('หนองคาย', 'B-LOCATION'), ('น่าอยู่', 'O')]

        text = ['อำเภอ', 'ฝาง','เป็น','ส่วน','หนึ่ง','ของ', 'จังหวัด', \\
            'เชียงใหม่']
        tag_provinces(text)
        # output: [('อำเภอ', 'O'), ('ฝาง', 'O'), ('เป็น', 'O'), ('ส่วน', 'O'),
        #   ('หนึ่ง', 'O'), ('ของ', 'O'), ('จังหวัด', 'O'),
        #   ('เชียงใหม่', 'B-LOCATION')]
    """
    province_list = provinces()

    output = []
    for token in tokens:
        if token in province_list:
            output.append((token, "B-LOCATION"))
        else:
            output.append((token, "O"))

    return output
Exemplo n.º 3
0
    def test_corpus(self):
        self.assertIsInstance(thai_negations(), frozenset)
        self.assertIsInstance(thai_stopwords(), frozenset)
        self.assertIsInstance(thai_syllables(), frozenset)
        self.assertIsInstance(thai_words(), frozenset)

        self.assertIsInstance(countries(), frozenset)
        self.assertIsInstance(provinces(), frozenset)
        self.assertIsInstance(thai_female_names(), frozenset)
        self.assertIsInstance(thai_male_names(), frozenset)

        self.assertEqual(get_corpus_db_detail("XXX"),
                         {})  # corpus does not exist
        self.assertTrue(download("test"))  # download the first time
        self.assertTrue(download(name="test", force=True))  # force download
        self.assertTrue(download(name="test"))  # try download existing
        self.assertFalse(download(name="test",
                                  url="wrongurl"))  # URL not exist
        self.assertFalse(
            download(name="XxxXXxxx817d37sf"))  # corpus name not exist
        self.assertIsNotNone(get_corpus_db_detail("test"))  # corpus exists
        self.assertTrue(remove("test"))  # remove existing
        self.assertFalse(remove("test"))  # remove non-existing
        self.assertTrue(download(name="test", version="0.1"))
        self.assertTrue(remove("test"))
Exemplo n.º 4
0
 def test_corpus(self):
     self.assertIsNotNone(countries())
     self.assertIsNotNone(provinces())
     self.assertIsNotNone(thai_negations())
     self.assertIsNotNone(thai_stopwords())
     self.assertIsNotNone(thai_syllables())
     self.assertIsNotNone(thai_words())
     download("test")
     self.assertIsNotNone(remove("test"))
     self.assertIsNotNone(remove("tnc_freq"))
Exemplo n.º 5
0
 def test_corpus(self):
     self.assertIsNotNone(countries())
     self.assertIsNotNone(provinces())
     self.assertIsNotNone(thai_negations())
     self.assertIsNotNone(thai_stopwords())
     self.assertIsNotNone(thai_syllables())
     self.assertIsNotNone(thai_words())
     download("test")
     self.assertIsNotNone(remove("test"))
     self.assertIsNotNone(remove("tnc_freq"))
Exemplo n.º 6
0
 def test_corpus(self):
     self.assertIsNotNone(countries())
     self.assertIsNotNone(provinces())
     self.assertIsNotNone(thai_negations())
     self.assertIsNotNone(thai_stopwords())
     self.assertIsNotNone(thai_syllables())
     self.assertIsNotNone(thai_words())
     self.assertIsNotNone(thai_female_names())
     self.assertIsNotNone(thai_male_names())
     self.assertEqual(get_corpus_db_detail("XXX"), {})
     self.assertIsNone(download("test"))
     self.assertIsNone(download("test", force=True))
     self.assertIsNotNone(get_corpus_db_detail("test"))
     self.assertIsNotNone(remove("test"))
     self.assertFalse(remove("test"))
Exemplo n.º 7
0
def tag_provinces(tokens: List[str]) -> List[Tuple[str, str]]:
    """
    Recognize Thailand provinces in text

    Input is a list of words
    Return a list of tuples

    Example::
     >>> text = ['หนองคาย', 'น่าอยู่']
     >>> tag_provinces(text)
     [('หนองคาย', 'B-LOCATION'), ('น่าอยู่', 'O')]
    """
    province_list = provinces()

    output = []
    for token in tokens:
        if token in province_list:
            output.append((token, "B-LOCATION"))
        else:
            output.append((token, "O"))

    return output
Exemplo n.º 8
0
def tag_provinces(tokens: List[str]) -> List[Tuple[str, str]]:
    """
    This function recognize Thailand provinces in text.

    Note that it uses exact match and considers no context.

    :param list[str] tokens: a list of words
    :reutrn: a list of tuple indicating NER for `LOCATION` in IOB format
    :rtype: list[tuple[str, str]]

    :Example:
    ::

        from pythainlp.tag import tag_provinces

        text = ['หนองคาย', 'น่าอยู่']
        tag_provinces(text)
        # output: [('หนองคาย', 'B-LOCATION'), ('น่าอยู่', 'O')]
    """
    province_list = provinces()
    output = [(token, "B-LOCATION") if token in province_list else (token, "O")
              for token in tokens]
    return output
Exemplo n.º 9
0
def update():
    global p
    if getprovince() != '' and getdistrict() != '':
        p = Place(province=getprovince(), amphoe=getdistrict())
    elif getprovince() != '':
        p = Place(province=getprovince())
    else:
        p = Place(province="หนองคาย", amphoe="เมืองหนองคาย")


d = PyTMD(key, p)
pattern = r'\<LOCATION\>(.*?)\<\/LOCATION\>'
tokenizer = RegexpTokenizer(pattern)
ner = ThaiNameTagger()
province_list = provinces()


def now():
    global d, p
    data = d.forecast_daily(day("today"))[0]['forecasts'][0]['data']
    text = """
    สภาพอากาศที่{place}ตอนนี้ มีสภาพอากาศ{cond}
    มีอุณหภูมิ {temp} องศาเซลเซียส
    อุณหภูมิสูงสุด {temp_max} องศาเซลเซียส
    และมีอุณหภูมิต่ำสุด {temp_min} องศาเซลเซียสค่ะ
    """.format(place=p.province,
               cond=cond2txt(data['cond']),
               temp=int(data['tc']),
               temp_max=int(data['tc_max']),
               temp_min=int(data['tc_min'])).replace("    ", "")