def test_text2words_02(): with open('dataset/ensemble_method.txt', 'r') as file: texts = file.read().replace('\n', '') splitter = Splitter() splitter.text2words(texts=texts) assert len(splitter.words) == 571 assert splitter.words[0] == 'write'
def test_text2words_02(): file_name = os.path.join(DATA_DIR, 'ensemble_method.txt') with open(file_name, 'r', encoding="utf8") as file: texts = file.read().replace('\n', '') splitter = Splitter() splitter.text2words(texts=texts) assert len(splitter.words) <= 571 assert splitter.words[0] == 'write'
def test_text2words_03(): # TODO with open('dataset/negotiation_tips.txt', 'r') as file: texts = file.read().replace('\n', '') def cleanerizer(texts): text_1 = re.sub(r"[(\[].*?[)\]]", "", texts) text_2 = re.sub(r'-', r'', text_1) return text_2 splitter = Splitter() splitter.text2words(texts=cleanerizer(texts)) print(splitter.words) splitter.text2words(texts=texts) print(splitter.words)
def test_text2words_03(): # TODO file_name = os.path.join(DATA_DIR, 'negotiation_tips.txt') with open(file_name, 'r') as file: texts = file.read().replace('\n', '') def cleanerizer(texts): text_1 = re.sub(r"[(\[].*?[)\]]", "", texts) text_2 = re.sub(r'-', r'', text_1) return text_2 splitter = Splitter() splitter.text2words(texts=cleanerizer(texts)) print(splitter.words) splitter.text2words(texts=texts) print(splitter.words)
def test_text2words_01(): texts = ["This is an awesome book to learn NLP. DistilBERT is an amazing NLP model. We can interchangeably use " \ "embedding, encoding, or vectorizing."] splitter = Splitter() splitter.text2words(texts=texts[0]) assert splitter.words[0] == 'awesome'