Exemplo n.º 1
0
    def test_mark_with_noneng(self):
        tokens = [
            StringLiteral(
                [SplitContainer([Word.from_("A"),
                                 Word.from_("Wirklich")])]),
            MultilineComment([
                SplitContainer.from_single_token('ц'),
                SplitContainer([
                    Word.from_("blanco"),
                    Underscore(),
                    Word.from_("english")
                ])
            ]),
            OneLineComment(
                [SplitContainer([Word.from_("DIESELBE"),
                                 Word.from_("8")])])
        ]

        actual = mark(tokens, {})

        expected = [
            StringLiteral([
                SplitContainer(
                    [Word.from_("A"),
                     NonEng(Word.from_("Wirklich"))])
            ]),
            MultilineComment([
                SplitContainer([NonEng(Word.from_('ц'))]),
                SplitContainer([
                    # we have to call constructor manually here,
                    # case split container cannot set wordStart prefix
                    # when the first subword is wrapped in NonEng
                    NonEng(Word.from_("blanco")),
                    Underscore(),
                    Word.from_("english")
                ])
            ]),
            OneLineComment([
                SplitContainer([
                    # we have to call constructor manually here,
                    # case split container cannot set wordStart prefix
                    # when the first subword is wrapped in NonEng
                    NonEng(Word.from_("DIESELBE")),
                    Word.from_("8")
                ])
            ])
        ]
        self.assertEqual(expected, actual)
Exemplo n.º 2
0
    def test_5(self):
        text = '''
// this code won't compile but the preprocessing still has to be done corrrectly
'''
        expected_result = [
            NewLine(),
            OneLineComment([
                SplitContainer.from_single_token('this'),
                SplitContainer.from_single_token('code'),
                SplitContainer.from_single_token('won'), "'",
                SplitContainer.from_single_token('t'),
                SplitContainer.from_single_token('compile'),
                SplitContainer.from_single_token('but'),
                SplitContainer.from_single_token('the'),
                SplitContainer.from_single_token('preprocessing'),
                SplitContainer.from_single_token('still'),
                SplitContainer.from_single_token('has'),
                SplitContainer.from_single_token('to'),
                SplitContainer.from_single_token('be'),
                SplitContainer.from_single_token('done'),
                SplitContainer.from_single_token('corrrectly')
            ]),
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)
Exemplo n.º 3
0
    def test_to_repr_with_enonlycontents(self):
        prep_config = PrepConfig({
            PrepParam.EN_ONLY: 2,
            PrepParam.COM_STR: 0,
            PrepParam.SPLIT: 3,
            PrepParam.TABS_NEWLINES: 1,
            PrepParam.MARK_LOGS: 1,
            PrepParam.CAPS: 1
        })

        ngramSplittingConfig = NgramSplitConfig(
            splitting_type=NgramSplittingType.NUMBERS_AND_CUSTOM,
            sc_splittings={})

        tokens = [
            Number([1, DecimalPoint(), 1]), "*",
            SplitContainer([NonEng(Word.from_("dinero"))]),
            StringLiteral([
                NonEng(Word.from_("ich")),
                NonEng(Word.from_("weiss")),
                NonEng(Word.from_("nicht")),
                NonEng(Word.from_("was")),
                NonEng(Word.from_("soll")),
                NonEng(Word.from_("es")),
                NonEng(Word.from_("bedeuten")),
                NonEng(Word.from_("dass")),
                NonEng(Word.from_("ich")),
                NonEng(Word.from_("so")),
                NonEng(Word.from_("traurig")),
                NonEng(Word.from_("bin")),
            ]),
            NewLine(),
            MultilineComment([
                SplitContainer([NonEng(Word.from_('ц'))]),
                SplitContainer([
                    NonEng(Word.from_("blanco")),
                    Underscore(),
                    Word.from_("english")
                ])
            ]),
            NewLine(),
            Tab(),
            OneLineComment([
                SplitContainer(
                    [NonEng(Word.from_("DIESELBE")),
                     Word.from_("8")])
            ])
        ]

        actual = to_repr(prep_config, tokens, ngramSplittingConfig)

        expected = [
            pl['word_start'], '1', '.', '1', pl['word_end'], "*",
            pl['non_eng'], '"', pl["non_eng_content"], '"', '/*',
            pl['non_eng'], pl['word_start'], pl['non_eng'], '_', 'english',
            pl['word_end'], '*/', '//', pl['word_start'], pl['capitals'],
            pl['non_eng'], "8", pl['word_end'], pl['olc_end']
        ]

        self.assertEqual(expected, actual)
Exemplo n.º 4
0
    def test_mark_all_eng(self):
        '''
        All words are english. Nothing changed
        '''
        tokens = [
            StringLiteral([
                OneLineCommentStart(),
                SplitContainer([
                    Word.from_("test"),
                    Word.from_("my"),
                    Word.from_("class")
                ])
            ]),
            NewLine(),
            OneLineComment([
                MultilineCommentEnd(),
                SplitContainer.from_single_token("lifeisgood")
            ]),
            NewLine(),
            StringLiteral([
                MultilineCommentStart(),
                SplitContainer.from_single_token("!")
            ]),
            NewLine(),
            MultilineComment([NewLine()]),
            NewLine()
        ]

        actual = mark(tokens, {})

        self.assertEqual(actual, tokens)
Exemplo n.º 5
0
    def test_process_comments_and_str_literals(self):
        '''
        Positive scenario

        <start>"//test_MyClass"
        //*/
        "/*!"
        /*
        /*
        <end>


        '''
        tokens = [
            Quote(),
            OneLineCommentStart(),
            SplitContainer([
                Word.from_("test"),
                Underscore(),
                Word.from_("my"),
                Word.from_("Class")
            ]),
            Quote(),
            NewLine(),
            OneLineCommentStart(),
            MultilineCommentEnd(),
            NewLine(),
            Quote(),
            MultilineCommentStart(),
            SplitContainer.from_single_token("!"),
            Quote(),
            NewLine(),
            MultilineCommentStart(),
            NewLine(),
            MultilineCommentEnd(),
            NewLine(),
        ]

        actual = process_comments_and_str_literals(tokens, {})

        expected = [
            StringLiteral([
                OneLineCommentStart(),
                SplitContainer([
                    Word.from_("test"),
                    Underscore(),
                    Word.from_("my"),
                    Word.from_("Class")
                ], )
            ]),
            NewLine(),
            OneLineComment([MultilineCommentEnd()]),
            NewLine(),
            StringLiteral([
                MultilineCommentStart(),
                SplitContainer.from_single_token("!")
            ]),
            NewLine(),
            MultilineComment([NewLine()]),
            NewLine()
        ]

        self.assertEqual(expected, actual)
Exemplo n.º 6
0
    StringLiteral(
        [SplitContainer([Word.from_("A"),
                         NonEng(Word.from_("Wirklich"))])]),
    NewLine(),
    MultilineComment([
        SplitContainer([NonEng(Word.from_('ц'))]),
        SplitContainer([
            NonEng(Word.from_("blanco")),
            Underscore(),
            Word.from_("english")
        ])
    ]),
    NewLine(),
    Tab(),
    OneLineComment(
        [SplitContainer([NonEng(Word.from_("DIESELBE")),
                         Word.from_("8")])])
]


class TeprTest(unittest.TestCase):
    def test_both_enonly_and_nosplit(self):
        with self.assertRaises(ValueError):
            prep_config = PrepConfig({
                PrepParam.EN_ONLY: 1,
                PrepParam.COM_STR: 0,
                PrepParam.SPLIT: 0,
                PrepParam.TABS_NEWLINES: 1,
                PrepParam.MARK_LOGS: 1,
                PrepParam.CAPS: 1
            })