Python cut_by_number示例，lexos.processors.prepare.cutter.cut_by_number Python示例

示例#1

0

显示文件

文件： test_file_cutter.py 项目： WheatonCS/Lexos

 def test_cut_by_number_excess_chunks(self):
     assert cut_by_number(text="RemovewhitespaceonChinese?",
                          num_segment=3) == [
         "RemovewhitespaceonChinese?", '', '']
     assert cut_by_number(text="This text has too few words!",
                          num_segment=8) == \
         ["This ", "text ", "has ", "too ", "few ", "words!", '', '']
     assert cut_by_number(text="Reeeeeeeeeeeeeeeeeeeeeeeally long word",
                          num_segment=6) == \
         ["Reeeeeeeeeeeeeeeeeeeeeeeally ", "long ", "word", '', '', '']

示例#2

0

显示文件

 def test_cut_by_number_excess_chunks(self):
     assert cut_by_number(
         text="RemovewhitespaceonChinese?",
         num_segment=3) == ["RemovewhitespaceonChinese?", '', '']
     assert cut_by_number(text="This text has too few words!",
                          num_segment=8) == \
         ["This ", "text ", "has ", "too ", "few ", "words!", '', '']
     assert cut_by_number(text="Reeeeeeeeeeeeeeeeeeeeeeeally long word",
                          num_segment=6) == \
         ["Reeeeeeeeeeeeeeeeeeeeeeeally ", "long ", "word", '', '', '']

示例#3

0

显示文件

 def test_cut_by_number_bad_math(self):
     # Divide by zero exception
     try:
         _ = cut_by_number(text="Danger zone!", num_segment=0)
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
     # Invalid index exception
     try:
         _ = cut_by_number(text="Oh gawd...", num_segment=-1)
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE

示例#4

0

显示文件

文件： test_file_cutter.py 项目： WheatonCS/Lexos

 def test_cut_by_number_bad_math(self):
     # Divide by zero exception
     try:
         _ = cut_by_number(text="Danger zone!", num_segment=0)
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE
     # Invalid index exception
     try:
         _ = cut_by_number(text="Oh gawd...", num_segment=-1)
     except AssertionError as error:
         assert str(error) == SEG_NON_POSITIVE_MESSAGE

示例#5

0

显示文件

文件： test_file_cutter.py 项目： WheatonCS/Lexos

    def test_cut_by_number_spacing(self):
        # cut after space
        assert cut_by_number(text="Hanging space ", num_segment=2) == [
            "Hanging ", "space "]

        assert cut_by_number(text="Other  whitespace\n is\tfine!\n\n",
                             num_segment=4) == ["Other  ", "whitespace\n ",
                                                "is\t", "fine!\n\n"]

        assert cut_by_number(text="      <-There are six spaces here",
                             num_segment=5) == ["<-There ", "are ", "six ",
                                                "spaces ", "here"]

        assert cut_by_number(text="\n\n\n\n\nword\n\n\n\n\n",
                             num_segment=1) == ["word\n\n\n\n\n"]

示例#6

0

显示文件

    def test_cut_by_number_spacing(self):
        # cut after space
        assert cut_by_number(text="Hanging space ",
                             num_segment=2) == ["Hanging ", "space "]

        assert cut_by_number(text="Other  whitespace\n is\tfine!\n\n",
                             num_segment=4) == [
                                 "Other  ", "whitespace\n ", "is\t",
                                 "fine!\n\n"
                             ]

        assert cut_by_number(text="      <-There are six spaces here",
                             num_segment=5) == [
                                 "<-There ", "are ", "six ", "spaces ", "here"
                             ]

        assert cut_by_number(text="\n\n\n\n\nword\n\n\n\n\n",
                             num_segment=1) == ["word\n\n\n\n\n"]

示例#7

0

显示文件

文件： test_file_cutter.py 项目： WheatonCS/Lexos

    def test_cut_by_number_lines(self):
        assert cut_by_number(
            text="Latinisalanguagewithnospaces\n"
                 "Youmayfindthisdifficulttoread!",
            num_segment=2) == ["Latinisalanguagewithnospaces\n",
                               "Youmayfindthisdifficulttoread!"]

        assert cut_by_number(text="line\nline\nline\nline\nline",
                             num_segment=2) == ["line\nline\nline\n",
                                                "line\nline"]

        assert cut_by_number(text="Languageswithoutanyspacesmayhave\n"
                             "uneven\nchunks", num_segment=3) == \
            ["Languageswithoutanyspacesmayhave\n", "uneven\n", "chunks"]

        assert cut_by_number(text="Ithinkthisiswhy\u3000Chinesetextcanbesplit",
                             num_segment=2) \
            == ["Ithinkthisiswhy\u3000", "Chinesetextcanbesplit"]

示例#8

0

显示文件

    def test_cut_by_number_lines(self):
        assert cut_by_number(text="Latinisalanguagewithnospaces\n"
                             "Youmayfindthisdifficulttoread!",
                             num_segment=2) == [
                                 "Latinisalanguagewithnospaces\n",
                                 "Youmayfindthisdifficulttoread!"
                             ]

        assert cut_by_number(text="line\nline\nline\nline\nline",
                             num_segment=2) == [
                                 "line\nline\nline\n", "line\nline"
                             ]

        assert cut_by_number(text="Languageswithoutanyspacesmayhave\n"
                             "uneven\nchunks", num_segment=3) == \
            ["Languageswithoutanyspacesmayhave\n", "uneven\n", "chunks"]

        assert cut_by_number(text="Ithinkthisiswhy\u3000Chinesetextcanbesplit",
                             num_segment=2) \
            == ["Ithinkthisiswhy\u3000", "Chinesetextcanbesplit"]

示例#9

0

显示文件

文件： test_file_cutter.py 项目： WheatonCS/Lexos

    def test_cut_by_number_normal(self):
        assert cut_by_number(text="Text", num_segment=1) == ["Text"]

        assert cut_by_number(text="This text has five words",
                             num_segment=5) == ["This ", "text ", "has ",
                                                "five ", "words"]

        assert cut_by_number(text="Odd number of words in this text",
                             num_segment=6) == ["Odd number ", "of ",
                                                "words ", "in ", "this ",
                                                "text"]

        # add extra words to the beginning substrings not to the end substrings
        assert cut_by_number(text="Odd number of words in this text",
                             num_segment=6) != ["Odd", "number ", "of ",
                                                "words ", "in ",
                                                "this text"]

        assert cut_by_number(text="Almost enough words here but not quite",
                             num_segment=4) == ["Almost enough ",
                                                "words here ", "but not ",
                                                "quite"]

示例#10

0

显示文件

    def test_cut_by_number_normal(self):
        assert cut_by_number(text="Text", num_segment=1) == ["Text"]

        assert cut_by_number(
            text="This text has five words",
            num_segment=5) == ["This ", "text ", "has ", "five ", "words"]

        assert cut_by_number(text="Odd number of words in this text",
                             num_segment=6) == [
                                 "Odd number ", "of ", "words ", "in ",
                                 "this ", "text"
                             ]

        # add extra words to the beginning substrings not to the end substrings
        assert cut_by_number(
            text="Odd number of words in this text", num_segment=6) != [
                "Odd", "number ", "of ", "words ", "in ", "this text"
            ]

        assert cut_by_number(text="Almost enough words here but not quite",
                             num_segment=4) == [
                                 "Almost enough ", "words here ", "but not ",
                                 "quite"
                             ]