def test_correct_amount_of_events_should_be_extracted(): """ Test, if all events of the test-file marked_sentences.txt are extracted """ e = Event() e.read_file("test/test_articles/marked_sentences.txt") assert_equal(len(e.start_extraction()), 19)
def test_read_from_file_should_deliver_text(): """ Test, if reading a file returns a string that is not empty. """ e = Event() e.read_file("test/test_articles/01_wikipedia_syrian_civil_war.txt") assert_not_equal(e.text, "")
def test_remove_references(): e = Event() e.read_file("test/test_articles/01_wikipedia_syrian_civil_war.txt") text = remove_references(e.text) if re.search(str('\[\d{1,3}]'), text): print("CHECK") else: print("NO")
def test_references_should_be_removed(): """ Check, if references are removed from text. Maybe to be refined later on, because, it is the same regex command that removes the references from text - therefore it is most likely that this test is fine. hmm... """ e = Event() e.read_file("test/test_articles/01_wikipedia_syrian_civil_war.txt") # re searches for references in bracktes, like "[12]" and should not find any. assert_false(re.search(str('\[\d{1,3}]'), remove_references(e.text)))
def extract_events(self): """ Each event in the goldmaster-file is analyzed and extracted by eventminer. The results (extracted event, dates, etc.) will be compared with the initial data from the goldmaster-file. """ for event in self.event_text_raw: e = Event() e.read_text(event) result_set = e.start_accuracy_extraction() self.event_text_eventminer.append(result_set["event"].encode("utf-8")) self.rule_nr_eventminer.append(result_set["rule_nr"]) self.location_eventminer.append(result_set["location"]) self.start_day_eventminer.append(result_set["start_day"]) self.start_month_eventminer.append(str(result_set["start_month"])) self.start_year_eventminer.append(result_set["start_year"]) self.end_day_eventminer.append(str(result_set["end_day"])) self.end_month_eventminer.append(result_set["end_month"]) self.end_year_eventminer.append(result_set["end_year"])
def extract_events(self): """ Each event in the goldmaster-file is analyzed and extracted by eventminer. The results (extracted event, dates, etc.) will be compared with the initial data from the goldmaster-file. """ for event in self.event_text_raw: e = Event() e.read_text(event) result_set = e.start_accuracy_extraction() self.event_text_eventminer.append( result_set["event"].encode("utf-8")) self.rule_nr_eventminer.append(result_set["rule_nr"]) self.location_eventminer.append(result_set["location"]) self.start_day_eventminer.append(result_set["start_day"]) self.start_month_eventminer.append(str(result_set["start_month"])) self.start_year_eventminer.append(result_set["start_year"]) self.end_day_eventminer.append(str(result_set["end_day"])) self.end_month_eventminer.append(result_set["end_month"]) self.end_year_eventminer.append(result_set["end_year"])
def test_read_from_file(): e = Event() e.read_file("test/test_articles/random_test.txt") print(e.text)
def flask_start_extraction(wiki_text): e = Event() e.text = wiki_text e.text = remove_references(e.text) return e.start_extraction()
def test_start_extraction(): e = Event() e.read_file("test/test_articles/random_test.txt") e.text = remove_references(e.text) e.start_extraction()