Пример #1
0
def test_text2words_02():
    with open('dataset/ensemble_method.txt', 'r') as file:
        texts = file.read().replace('\n', '')

    splitter = Splitter()
    splitter.text2words(texts=texts)
    assert len(splitter.words) == 571
    assert splitter.words[0] == 'write'
Пример #2
0
def test_text2words_02():
    file_name = os.path.join(DATA_DIR, 'ensemble_method.txt')
    with open(file_name, 'r', encoding="utf8") as file:
        texts = file.read().replace('\n', '')

    splitter = Splitter()
    splitter.text2words(texts=texts)
    assert len(splitter.words) <= 571
    assert splitter.words[0] == 'write'
Пример #3
0
def test_text2words_03():
    # TODO
    with open('dataset/negotiation_tips.txt', 'r') as file:
        texts = file.read().replace('\n', '')

    def cleanerizer(texts):
        text_1 = re.sub(r"[(\[].*?[)\]]", "", texts)
        text_2 = re.sub(r'-', r'', text_1)
        return text_2

    splitter = Splitter()
    splitter.text2words(texts=cleanerizer(texts))
    print(splitter.words)
    splitter.text2words(texts=texts)
    print(splitter.words)
Пример #4
0
def test_text2words_03():
    # TODO
    file_name = os.path.join(DATA_DIR, 'negotiation_tips.txt')
    with open(file_name, 'r') as file:
        texts = file.read().replace('\n', '')

    def cleanerizer(texts):
        text_1 = re.sub(r"[(\[].*?[)\]]", "", texts)
        text_2 = re.sub(r'-', r'', text_1)
        return text_2

    splitter = Splitter()
    splitter.text2words(texts=cleanerizer(texts))
    print(splitter.words)
    splitter.text2words(texts=texts)
    print(splitter.words)
Пример #5
0
def test_text2words_01():
    texts = ["This is an awesome book to learn NLP. DistilBERT is an amazing NLP model. We can interchangeably use " \
             "embedding, encoding, or vectorizing."]
    splitter = Splitter()
    splitter.text2words(texts=texts[0])
    assert splitter.words[0] == 'awesome'