def test_cut_by_words_overlap(self): assert cut_by_words(text="test test test", seg_size=2, overlap=1, last_prop=.5) == ["test test ", "test test", "test"] assert cut_by_words(text="dog cat duck bird", seg_size=3, overlap=2, last_prop=.5) == ["dog cat duck ", "cat duck bird", "duck bird"]
def test_cut_by_words_no_whitespace(self): assert cut_by_words(text="testtest", seg_size=1, overlap=0, last_prop=1) == ["testtest"] assert cut_by_words(text="helloworld helloworld", seg_size=1, overlap=0, last_prop=1) == ["helloworld ", "helloworld"]
def test_cut_by_words(self): assert cut_by_words(text=" ", seg_size=1, overlap=0, last_prop=1) == [""] assert cut_by_words(text="test test", seg_size=1, overlap=0, last_prop=1) == ["test ", "test"] assert cut_by_words(text="abc abc abc abc abc abc abc abc abc abc abc " "abc abc abc abc abc abc abc abc abc abc " "abc", seg_size=4, overlap=0, last_prop=.5)\ == ["abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ", "abc abc"]
def test_cut_by_words_overlap(self): assert cut_by_words(text="test test test", seg_size=2, overlap=1, last_prop=.5) == [ "test test ", "test test", "test" ] assert cut_by_words(text="dog cat duck bird", seg_size=3, overlap=2, last_prop=.5) == [ "dog cat duck ", "cat duck bird", "duck bird" ]
def test_cut_by_words(self): assert cut_by_words(text=" ", seg_size=1, overlap=0, last_prop=1) == [""] assert cut_by_words(text="test test", seg_size=1, overlap=0, last_prop=1) == ["test ", "test"] assert cut_by_words(text="abc abc abc abc abc abc abc abc abc abc abc " "abc abc abc abc abc abc abc abc abc abc " "abc", seg_size=4, overlap=0, last_prop=.5)\ == ["abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ", "abc abc abc abc ", "abc abc"]
def test_cut_by_words_zero_chunks_precondition(self): try: _ = cut_by_words(text=" ", seg_size=0, overlap=0, last_prop=.5) raise AssertionError("zero_division error did not raise") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE try: _ = cut_by_words(text="test test", seg_size=0, overlap=0, last_prop=.5) raise AssertionError("zero_division error did not raise") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE
def test_cut_by_words_proportion(self): assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=1) == ["test test test"] assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=.5) == ["test test ", "test"] assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=1.5) == ["test test test"] assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=2) == ["test test test"] assert cut_by_words(text="test test test test", seg_size=2, overlap=0, last_prop=1) == ["test test ", "test test"] assert cut_by_words( text="test test test test test", seg_size=2, overlap=0, last_prop=.5) == ["test test ", "test test ", "test"] assert cut_by_words(text="test test test test test", seg_size=2, overlap=0, last_prop=1) == ["test test ", "test test test"] assert cut_by_words(text="test test test test test", seg_size=3, overlap=0, last_prop=1) == ["test test test test test"]
def test_seg_size_assertion_error(self): try: _ = cut_by_words(text="test test", seg_size=1, overlap=1, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == LARGER_SEG_SIZE_MESSAGE try: _ = cut_by_words(text="test test test", seg_size=1, overlap=2, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == LARGER_SEG_SIZE_MESSAGE
def test_cut_by_words_zero_chunks_precondition(self): try: _ = cut_by_words(text=" ", seg_size=0, overlap=0, last_prop=.5) raise AssertionError("zero_division error did not raise") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE try: _ = cut_by_words(text="test test", seg_size=0, overlap=0, last_prop=.5) raise AssertionError("zero_division error did not raise") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE
def test_cut_by_words_neg_overlap_precondition(self): try: _ = cut_by_words(text="test", seg_size=1, overlap=-1, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE
def test_cut_by_words_neg_chunk_precondition(self): try: _ = cut_by_words(text="test", seg_size=-1, overlap=0, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE
def test_seg_size_assertion_error(self): try: _ = cut_by_words(text="test test", seg_size=1, overlap=1, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == LARGER_SEG_SIZE_MESSAGE try: _ = cut_by_words(text="test test test", seg_size=1, overlap=2, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == LARGER_SEG_SIZE_MESSAGE
def test_cut_by_words_proportion(self): assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=1) == ["test test test"] assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=.5) == ["test test ", "test"] assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=1.5) == ["test test test"] assert cut_by_words(text="test test test", seg_size=2, overlap=0, last_prop=2) == ["test test test"] assert cut_by_words(text="test test test test", seg_size=2, overlap=0, last_prop=1) == [ "test test ", "test test"] assert cut_by_words(text="test test test test test", seg_size=2, overlap=0, last_prop=.5) == [ "test test ", "test test ", "test"] assert cut_by_words(text="test test test test test", seg_size=2, overlap=0, last_prop=1) == [ "test test ", "test test test"] assert cut_by_words(text="test test test test test", seg_size=3, overlap=0, last_prop=1) == [ "test test test test test"]
def test_cut_by_words_neg_overlap_precondition(self): try: _ = cut_by_words(text="test", seg_size=1, overlap=-1, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == NEG_OVERLAP_LAST_PROP_MESSAGE
def test_cut_by_words_neg_chunk_precondition(self): try: _ = cut_by_words(text="test", seg_size=-1, overlap=0, last_prop=.5) raise AssertionError("did not throw error") except AssertionError as error: assert str(error) == SEG_NON_POSITIVE_MESSAGE
def test_cut_by_words_no_whitespace(self): assert cut_by_words(text="testtest", seg_size=1, overlap=0, last_prop=1) == ["testtest"] assert cut_by_words(text="helloworld helloworld", seg_size=1, overlap=0, last_prop=1) == [ "helloworld ", "helloworld"]