Пример #1
0
 def test_prepare_drop_hyphen_and_apostrophe_at_start_or_end_word(self):
     self.stream = StringIO(
         'date some- \'text ’date text'
     )
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual({'some': 1, 'text': 2, 'date': 2}, words)
Пример #2
0
 def test_prepare_simple(self):
     self.stream = StringIO('text date \nand more text')
     dic = DictionaryForText(self.stream)
     self.assertEqual(
         {'text': 2, 'date': 1, 'and': 1, 'more': 1},
         dic.prepare()
     )
Пример #3
0
 def test_prepare_drop_end_s(self):
     self.stream = StringIO('two cats are cat and cat')
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual(
         {'two': 1, 'cat': 3, 'are': 1, 'and': 1},
         words
     )
     self.assertEqual({'cats': 'cat'}, dic.get_drop_end_s())
Пример #4
0
 def test_prepare_drop_proper_name(self):
     self.stream = StringIO('Two cats are two tails, Murzik and Venik.')
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual(
         {'two': 2, 'cats': 1, 'are': 1, 'tails': 1, 'and': 1},
         words
     )
     self.assertEqual({'Murzik': 1, 'Venik': 1}, dic.get_drop_proper_name())
Пример #5
0
    def test_prepare_text_drop_short_words(self):
        self.stream = StringIO(
            'cat is a word'
        )
        dic = DictionaryForText(self.stream)
        words = dic.prepare()
        self.assertEqual({'cat': 1, 'word': 1}, words)

        self.assertEqual({'is': 1, 'a': 1}, dic.get_drop_short())
Пример #6
0
 def test_prepare_text_with_punctuation_mark_at_end(self):
     self.stream = StringIO(
         'text, date. and? more! text: text; text…; text — text.'
     )
     dic = DictionaryForText(self.stream)
     self.assertEqual(
         {'text': 6, 'date': 1, 'and': 1, 'more': 1},
         dic.prepare()
     )
Пример #7
0
 def test_prepare_drop_end_apostrophe_s(self):
     self.stream = StringIO('that cat and this cat are cat\'s cats')
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual(
         {'that': 1, 'cat': 4, 'and': 1, 'this': 1, 'are': 1},
         words
     )
     self.assertEqual({'cat\'s': 'cat'}, dic.get_drop_end_apostrophe_s())
Пример #8
0
 def test_prepare_text_with_punctuation_marks_and_other_symbols(self):
     self.stream = StringIO('''
         text, «date». and? {more}! [text]: "text"; text…; (text)
         “text” — text 2date
     ''')
     dic = DictionaryForText(self.stream)
     self.assertEqual(
         {'text': 7, 'date': 2, 'and': 1, 'more': 1},
         dic.prepare()
     )
Пример #9
0
 def test_prepare_drop_end_es(self):
     self.stream = StringIO(
         'classes are object of class class'
     )
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual(
         {'are': 1, 'object': 1, 'class': 3},
         words
     )
     self.assertEqual({'classes': 'class'}, dic.get_drop_end_es())
Пример #10
0
 def test_prepare_drop_end_ed(self):
     self.stream = StringIO(
         'the call called. create it. it will be created.'
     )
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual(2, words['call'])
     self.assertEqual(2, words['create'])
     self.assertFalse('called' in words)
     self.assertFalse('created' in words)
     self.assertEqual(
         {'called': 'call', 'created': 'create'}, dic.get_drop_end_ed()
     )
Пример #11
0
    def test_prepare_drop_end_ing(self):
        self.stream = StringIO(
            'th the thing are running during I am run too. Music is dur'
        )
        dic = DictionaryForText(self.stream)
        words = dic.prepare()
        self.assertEqual(1, words['thing'])
        self.assertEqual(1, words['during'])
        self.assertEqual(2, words['run'])

        self.assertFalse('running' in words)
        self.assertEqual(
            {'running': 'run'}, dic.get_drop_end_ing()
        )
Пример #12
0
 def test_prepare_drop_end_ies(self):
     self.stream = StringIO(
         'those goodies are one goody for you and one one for you'
     )
     dic = DictionaryForText(self.stream)
     words = dic.prepare()
     self.assertEqual(
         {
             'those': 1, 'are': 1, 'one': 3, 'goody': 2, 'for': 2,
             'you': 2, 'and': 1
         },
         words
     )
     self.assertEqual({'goodies': 'goody'}, dic.get_drop_end_ies())
Пример #13
0
 def test_drop_end_apostrophe_s(self):
     end_key = 'end_apostrophe_s'
     keep, drop = DictionaryForText._drop_ends(
         {'item\'s': 2, 'item': 3, 'class': 4, 'item’s': 5}, end_key
     )
     self.assertEqual({'item': 10, 'class': 4}, keep)
     self.assertEqual({'item\'s': 'item', 'item’s': 'item'}, drop[end_key])
Пример #14
0
    def test_get_content_after_close_stream(self):
        text = 'Some text date.'
        self.stream = StringIO(text)
        dic = DictionaryForText(self.stream)
        self.stream.close()

        self.assertEqual(text, dic.text)
Пример #15
0
 def test_drop_end_es(self):
     end_key = 'end_es'
     keep, drop = DictionaryForText._drop_ends(
         {'drive': 2, 'drives': 3, 'class': 4, 'classes': 5}, end_key
     )
     self.assertEqual({'drive': 2, 'drives': 3, 'class': 9}, keep)
     self.assertEqual({'classes': 'class'}, drop[end_key])
Пример #16
0
 def test_drop_end_ies(self):
     end_key = 'end_ies'
     keep, drop = DictionaryForText._drop_ends(
         {'entity': 2, 'entities': 3, 'class': 4}, end_key
     )
     self.assertEqual({'entity': 5, 'class': 4}, keep)
     self.assertEqual({'entities': 'entity'}, drop[end_key])
Пример #17
0
 def test_get_content(self):
     # stream = open("myfile.txt", "r", encoding="utf-8")
     # stream = io.StringIO("some initial text data")
     text = 'Some text date.'
     self.stream = StringIO(text)
     dic = DictionaryForText(self.stream)
     self.assertEqual(text, dic.text)
Пример #18
0
 def test_drop_end_s(self):
     end_key = 'end_s'
     keep, drop = DictionaryForText._drop_ends(
         {'items': 2, 'item': 3, 'class': 4, 'drive': 5, 'drives': 6},
         end_key
     )
     self.assertEqual({'item': 5, 'class': 4, 'drive': 11}, keep)
     self.assertEqual({'items': 'item', 'drives': 'drive'}, drop[end_key])
Пример #19
0
 def test_prepare_camel_case_words(self):
     words = DictionaryForText._prepare_camel_case_words(
         {
             'BlockBreakEvent': 1, 'Event': 1, 'when': 1,
             'Block': 1, 'Break': 1
         }
     )
     self.assertEqual(
         {'Block': 2, 'Break': 2, 'Event': 2, 'when': 1}, words
     )
Пример #20
0
    def test_has_end_ed(self):
        self.assertTrue(DictionaryForText._end_ed_checker('called'))
        self.assertTrue(DictionaryForText._end_ed_checker('created'))
        self.assertTrue(DictionaryForText._end_ed_checker('dropped'))

        self.assertFalse(DictionaryForText._end_ed_checker('call'))
        self.assertFalse(DictionaryForText._end_ed_checker('create'))
        self.assertFalse(DictionaryForText._end_ed_checker('cat'))
Пример #21
0
 def test_exceptions(self):
     keep, drop = DictionaryForText._drop_ends(
         {
             'seed': 2, 'see': 3, 'seeds': 1, 'spe': 4, 'speed': 5,
             'bee': 2, 'bees': 3, 'be': 4, 'goods': 2, 'good': 3,
             'http': 2, 'https': 3
         },
         ('end_ed', 'end_s', 'end_es')
     )
     self.assertEqual(
         {
             'seed': 3, 'see': 3, 'spe': 4, 'speed': 5, 'bee': 5, 'be': 4,
             'goods': 2, 'good': 3, 'http': 2, 'https': 3
         },
         keep
     )
     self.assertEqual({'seeds': 'seed', 'bees': 'bee'}, drop['end_s'])
     self.assertEqual({}, drop['end_ed'])
Пример #22
0
 def test_drop_end_ing(self):
     end_key = 'end_ing'
     keep, drop = DictionaryForText._drop_ends(
         {
             'make': 1, 'making': 1,
             'running': 2, 'run': 2,
             'craft': 4, 'crafting': 4,
             'string': 5
         },
         end_key
     )
     self.assertEqual({'make': 2, 'run': 4, 'craft': 8, 'string': 5}, keep)
     self.assertEqual(
         {
             'making': 'make',
             'running': 'run',
             'crafting': 'craft'
         }, drop[end_key]
     )
Пример #23
0
 def test_drop_end_ed(self):
     end_key = 'end_ed'
     keep, drop = DictionaryForText._drop_ends(
         {
             'called': 2, 'call': 3,
             'word': 1,
             'create': 4, 'created': 5,
             'drop': 1, 'dropped': 1
         },
         end_key
     )
     self.assertEqual({'call': 5, 'word': 1, 'create': 9, 'drop': 2}, keep)
     self.assertEqual(
         {
             'called': 'call',
             'created': 'create',
             'dropped': 'drop'
         }, drop[end_key]
     )
Пример #24
0
 def test_proper_name_checker(self):
     self.assertTrue(DictionaryForText._proper_name_checker('Murzik'))
     self.assertFalse(DictionaryForText._proper_name_checker('RDX'))
     self.assertFalse(DictionaryForText._proper_name_checker('ClickEvent'))
     self.assertFalse(DictionaryForText._proper_name_checker('clickEvent'))
     self.assertFalse(DictionaryForText._proper_name_checker('mouse'))
Пример #25
0
 def test_drop_proper_name(self):
     keep, drop = DictionaryForText._drop_proper_name(
         {'Two': 1, 'two': 2, 'cat': 3, 'Venik': 1}
     )
     self.assertEqual({'two': 3, 'cat': 3}, keep)
     self.assertEqual({'Venik': 1}, drop)
Пример #26
0
 def test_has_uppercase(self):
     self.assertTrue(DictionaryForText._check_has_uppercase('CAT'))
     self.assertTrue(DictionaryForText._check_has_uppercase('Cat'))
     self.assertTrue(DictionaryForText._check_has_uppercase('cAt'))
     self.assertTrue(DictionaryForText._check_has_uppercase('caT'))
     self.assertFalse(DictionaryForText._check_has_uppercase('cat'))
Пример #27
0
 def test_drop_uppercase(self):
     keep, drop = DictionaryForText._drop_upper_case(
         {'cat': 2, 'Cat': 3, 'item': 4}
     )
     self.assertEqual({'cat': 5, 'item': 4}, keep)
     self.assertEqual({'Cat': 'cat'}, drop)
Пример #28
0
 def test_drop_short_words(self):
     keep, drop = DictionaryForText._drop_short_words(
         {'word': 2, 'a': 3, '': 4, 'cat': 5, 'as': 6}
     )
     self.assertEqual({'word': 2, 'cat': 5}, keep)
     self.assertEqual({'a': 3, '': 4, 'as': 6}, drop)
Пример #29
0
 def test_short_checker(self):
     self.assertTrue(DictionaryForText._short_checker(''))
     self.assertTrue(DictionaryForText._short_checker('a'))
     self.assertTrue(DictionaryForText._short_checker('ab'))
     self.assertFalse(DictionaryForText._short_checker('abc'))
Пример #30
0
    def test_has_end_ing(self):
        self.assertTrue(DictionaryForText._end_ing_checker('making'))
        self.assertTrue(DictionaryForText._end_ing_checker('dropping'))

        self.assertFalse(DictionaryForText._end_ing_checker('make'))
        self.assertFalse(DictionaryForText._end_ing_checker('drop'))