Пример #1
0
    def get_cleaner():
        """Return a CHATCleaner instance.

        Returns:
            acqdiv.parsers.chat.cleaners.cleaner.CHATCleaner:
            The cleaner instance.
        """
        return CHATCleaner()
Пример #2
0
 def test_utterance_cross_clean(self):
     """Test utterance_cross_clean for same input as output."""
     raw_utt = ''
     actual_utt = 'ha be'
     target_utt = 'ha be'
     seg_tier = 'h_a b_e'
     gloss_tier = '1sg pl'
     pos_tier = 'V N'
     actual_output = CHATCleaner.utterance_cross_clean(
         raw_utt, actual_utt, target_utt, seg_tier, gloss_tier, pos_tier)
     desired_output = (actual_utt, target_utt, seg_tier, gloss_tier,
                       pos_tier)
     self.assertEqual(actual_output, desired_output)
Пример #3
0
    def test_clean_utterance_mixed_things_to_clean(self):
        """Test all utterance cleaning methods at once.

        The utterance contains:
        - redundant whitespace
        - terminator
        - untranscribed
        - events
        - Null-event
        - repetition
        - scoped symbols
        - pause between words
        """
        utterance = ("+^ that's [x 2] xxx (..) mine ↓ &=vocalizes ; <0you"
                     " pig <she said   [=! cries]>> [=! slaps leg] +/.")
        actual_output = CHATCleaner.clean_utterance(utterance)
        desired_output = "that's that's ??? mine pig she said"
        self.assertEqual(actual_output, desired_output)
Пример #4
0
 def test_clean_word_mixed(self):
     """Test clean_word with markers, drawls, pauses and blocking."""
     actual_output = CHATCleaner.clean_word('^ka:l^e@e')
     desired_output = 'kale'
     self.assertEqual(actual_output, desired_output)
Пример #5
0
 def test_clean_word_already_clean(self):
     """Test clean_word with an already clean word."""
     actual_output = CHATCleaner.clean_word('ka')
     desired_output = 'ka'
     self.assertEqual(actual_output, desired_output)
Пример #6
0
 def test_clean_utterance_null(self):
     """Test with null utterance."""
     utterance = '0[=! applauses]'
     actual_output = CHATCleaner.clean_utterance(utterance)
     desired_output = ''
     self.assertEqual(actual_output, desired_output)
Пример #7
0
 def test_clean_utterance_empty_string(self):
     """Test clean_utterance with an empty string."""
     utterance = ''
     actual_output = CHATCleaner.clean_utterance(utterance)
     desired_output = ''
     self.assertEqual(actual_output, desired_output)
Пример #8
0
 def test_clean_utterance_clean_utt(self):
     """Test remove utterance using with already clean utterance."""
     utterance = "that's mine she said"
     actual_output = CHATCleaner.clean_utterance(utterance)
     desired_output = "that's mine she said"
     self.assertEqual(actual_output, desired_output)
Пример #9
0
 def test_clean_pos(self):
     """Test clean_pos, same input as output."""
     pos = 'he'
     actual_output = CHATCleaner.clean_pos_raw(pos)
     desired_output = pos
     self.assertEqual(actual_output, desired_output)
Пример #10
0
 def test_clean_gloss(self):
     """Test clean_gloss, same input as output."""
     gloss = 'he'
     actual_output = CHATCleaner.clean_gloss_raw(gloss)
     desired_output = gloss
     self.assertEqual(actual_output, desired_output)
Пример #11
0
 def test_clean_segment(self):
     """Test clean_segment, same input as output."""
     segment = 'he'
     actual_output = CHATCleaner.clean_segment(segment)
     desired_output = segment
     self.assertEqual(actual_output, desired_output)
Пример #12
0
 def test_clean_date_regular_date(self):
     """Test clean_date with a regular date as input."""
     actual_output = CHATCleaner.clean_date('12-SEP-1997')
     desired_output = '1997-09-12'
     self.assertEqual(actual_output, desired_output)
Пример #13
0
 def test_clean_pos_word(self):
     """Test clean_pos_word, same input as output."""
     pos_word = 'V'
     actual_output = CHATCleaner.clean_pos_word(pos_word)
     desired_output = pos_word
     self.assertEqual(actual_output, desired_output)
Пример #14
0
 def test_clean_gloss_word(self):
     """Test clean_gloss_word, same input as output."""
     gloss_word = 'wh'
     actual_output = CHATCleaner.clean_gloss_word(gloss_word)
     desired_output = gloss_word
     self.assertEqual(actual_output, desired_output)
Пример #15
0
 def test_clean_seg_word(self):
     """Test clean_seg_word, same input as output."""
     seg_word = 'ke'
     actual_output = CHATCleaner.clean_seg_word(seg_word)
     desired_output = seg_word
     self.assertEqual(actual_output, desired_output)
Пример #16
0
 def test_clean_word_empty_string(self):
     """Test clean_word with an empty string."""
     actual_output = CHATCleaner.clean_word('')
     desired_output = ''
     self.assertEqual(actual_output, desired_output)
Пример #17
0
 def test_clean_seg_tier(self):
     """Test clean_seg_tier for same input as output."""
     seg_tier = 'ha be'
     actual_output = CHATCleaner.clean_seg_tier(seg_tier)
     desired_output = seg_tier
     self.assertEqual(actual_output, desired_output)
Пример #18
0
 def test_clean_date_empty_string(self):
     """Test clean_date with empty string as input."""
     actual_output = CHATCleaner.clean_date('')
     desired_output = ''
     self.assertEqual(actual_output, desired_output)