def test_restaurant_dataset(): with open(restaurant_data, 'r') as file: raw_sentences = adapter.read_sentences(file) assert len(raw_sentences) == 3041 stats = {'rejected': 0, 'conflicts': 0} sentences = adapter.validate_sentences(raw_sentences, stats) sentences = list(sentences) assert len(sentences) == 1978 assert stats['rejected'] == 1063 assert stats['conflicts'] == 91 generate = adapter.generate_classifier_examples examples = [example for sentence in sentences for example in generate(sentence)] assert len(examples) == 3602 # Check distribution of polarities count = lambda label: sum(True for example in examples if example.sentiment == label) ratios = np.array([count(label) for label in Sentiment]) / len(examples) # The labels are in the order: [neutral, negative, positive] assert ratios.round(2).tolist() == [0.18, 0.22, 0.6] file_path = os.path.join(data_dir, 'classifier-semeval-restaurant-train.bin') absa.utils.save(examples, file_path)
def test_validate_sentence(): xml_text = """ <sentences> <sentence id="1">test 1</sentence> <sentence id="2"> <text>test 2</text> <aspectTerms> <aspectTerm polarity="conflict"/> </aspectTerms> </sentence> <sentence id="3"> <text>test 3</text> <aspectTerms> <aspectTerm polarity="neutral"/> <aspectTerm polarity="conflict"/> </aspectTerms> </sentence> </sentences> """ file = io.StringIO(xml_text) sentences = adapter.read_sentences(file) stats = {'rejected': 0, 'conflicts': 0} validated_sentence, = adapter.validate_sentences(sentences, stats) assert validated_sentence.find('text').text == 'test 3' assert stats == {'rejected': 2, 'conflicts': 2}
def test_read_sentences(): xml_text = """ <sentences> <sentence id="1">test 1</sentence> <sentence id="2">test 2</sentence> <sentence id="3">test 3</sentence> </sentences> """ file = io.StringIO(xml_text) sentences = adapter.read_sentences(file) assert len(sentences) == 3
def test_generate_classifier_examples(): text = 'I charge it at night and skip taking the cord ' \ 'with me because of the good battery life.' xml_text = f""" <sentences> <sentence id="1"> <text>{text}</text> <aspectTerms> <aspectTerm term="cord" polarity="neutral" from="41" to="45"/> <aspectTerm term="battery life" polarity="positive" from="74" to="86"/> </aspectTerms> </sentence> </sentences> """ file = io.StringIO(xml_text) sentence, = adapter.read_sentences(file) examples = adapter.generate_classifier_examples(sentence) example_1, example_2 = examples assert example_1.aspect == 'cord' assert example_1.sentiment == Sentiment.neutral assert example_2.aspect == 'battery life' assert example_2.sentiment == Sentiment.positive assert example_1.text == example_2.text == text.lower()