Пример #1
0
 def test_year_notyear_number(self):
     try:
         expected = 999
         actual = TextToNumEng.convertTryYear('nine hundred ninety nine')
         self.assertEqual(actual, expected)
     except Exception:
         self.assertTrue(False, traceback.format_exc())
Пример #2
0
 def test_year_invalid(self):
     try:
         actual = TextToNumEng.convertTryYear('nineteen hundred four')
         self.assertIsNone(actual)
     except Exception as e:
         #print type(e), str(e)
         self.assertTrue(isinstance(e, Exception))
Пример #3
0
 def test_year_2015(self):
     expected = 2015
     actual = TextToNumEng.convertTryYear('twenty fifteen')
     self.assertEqual(actual, expected)
Пример #4
0
 def test_year_1992(self):
     expected = 1992
     actual = TextToNumEng.convertTryYear('nineteen ninety two')
     self.assertEqual(actual, expected)
Пример #5
0
 def test_year_984(self):
     expected = 984
     actual = TextToNumEng.convertTryYear("nine eighty four")
     self.assertEqual(actual, expected)
Пример #6
0
 def test_year_911(self):
     expected = 911
     actual = TextToNumEng.convertTryYear("nine 11")
     self.assertEqual(actual, expected)
Пример #7
0
 def test_year_1996(self):
     expected = 1996
     actual = TextToNumEng.convertTryYear("nineteen ninety six")
     self.assertEqual(actual, expected)
Пример #8
0
    def getNormOptions(word_string, extended=True):
        # Returns possible ways to normalize the string
        options_dict = defaultdict(set)

        # Number conversion
        if word_string.isdigit():
            word_number = int(word_string)
            # Convert digits to words
            try:
                num_text = NumToTextEng.convert(word_number)
                options_dict['textnum'].add(num_text)
            except:
                pass

            # Convert digits to year
            try:
                num_text = NumToTextEng.convertTryYear(word_number)
                options_dict['textyear'].add(num_text)
            except:
                pass
        else:
            if extended and len(word_string.split()) > 1:
                ext = set()

                # Hyphenation
                ext.add('-'.join(word_string.split()))

                # All one single word
                ext.add(''.join(word_string.split()))

                # Strip punctuation (both sides)
                ext.add(word_string.strip(string.punctuation))
                # Strip left punct only
                ext.add(word_string.lstrip(string.punctuation))
                # Strip right punct only
                ext.add(word_string.rstrip(string.punctuation))
                options_dict['extended'].update(ext)

            wstr = word_string
            # Deyphenation
            if '-' in word_string:
                wstr = word_string.replace('-', ' ')
                options_dict['text'].add(wstr)

            # Convert words to digits
            try:
                num = TextToNumEng.convertTryYear(wstr)
                options_dict['num'].add(str(num))
            except:
                pass

            # Contractions
            if "'" in word_string:
                # Expand contractions
                cont_options = ContractionsEng.expandOptions(word_string)
            else:
                # Apply contractions
                cont_options = ContractionsEng.contractOptions(word_string)
            if cont_options:
                options_dict['text'].update(cont_options)

        return options_dict