def print_trees(self, text): parses = self.parse_text(text) for p in parses: s = Sentence(p[1]) print(p[0]) s.print_tree() print('')
def read_text(self, text, aux_text=None, reset_context=True): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() self.disamb = Disambiguation(self.hg, self.parser) nlp_parses = self.parser.parse_text(text) if reset_context: self.aux_text = text if aux_text: self.aux_text = '%s\n%s' % (text, aux_text) parses = [(p[0], self.read_sentence(Sentence(p[1]))) for p in nlp_parses] for p in parses: self.debug_msg('== extra ==') for edg in p[1].edges: self.debug_msg(ed.edge2str(edg)) return parses
def test(infile, model_type='rf'): parses = read_parses(infile, test_set=True) acc_total = 0 acc_wrong = 0 acc_predictions = Counter() acc_true_values = Counter() for parse in parses: # sentence_str = parse[0].strip() json_str = parse[1].strip() # outcome_str = parse[2].strip() sentence = Sentence(json_str=json_str) transfs = [int(token) for token in parse[3].split(',')] total = len(transfs) hgforest = Hypergen(model_type=model_type) hgforest.test(sentence, transfs) wrong = hgforest.wrong print('%s / %s' % (wrong, total)) acc_total += total acc_wrong += wrong acc_predictions = sum( (acc_predictions, Counter(hgforest.test_predictions)), Counter()) acc_true_values = sum( (acc_true_values, Counter(hgforest.test_true_values)), Counter()) acc_predictions = dict(acc_predictions) acc_true_values = dict(acc_true_values) print('PREDICTIONS:') for transf in acc_predictions: print('%s: %s' % (hgtransf.to_string(transf), acc_predictions[transf])) print('TRUE_VALUES:') for transf in acc_true_values: print('%s: %s' % (hgtransf.to_string(transf), acc_true_values[transf])) error_rate = (float(acc_wrong) / float(acc_total)) * 100. print('error rate: %.3f%%' % error_rate)
for i in range(len(token_seq)): token_seq[i].position_in_sentence = i sents.append((sentence_text, token_seq)) return sents def print_trees(self, text): parses = self.parse_text(text) for p in parses: s = Sentence(p[1]) print(p[0]) s.print_tree() print('') if __name__ == '__main__': test_text = u"""Some subspecies of mosquito might be 1st to be genetically wiped out.""" # test_text = u"""Des millions de Français n’ont pas accès à une connexion.""" print('Starting parser...') parser = Parser() print('Parsing...') result = parser.parse_text(test_text) for r in result: sentence = Sentence(r[1]) print(r[0]) sentence.print_tree() print('')
sents = [] for span in parsed_data.sents: sent = [self.__spacy2token(parsed_data[i]) for i in range(span.start, span.end)] sents.append(sent) return sents if __name__ == '__main__': test_text = u""" Alan Mathison Turing was a pioneering English computer scientist, mathematician, logician, cryptanalyst and theoretical biologist. He was highly influential in the development of theoretical computer science, providing a formalisation of the concepts of algorithm and computation with the Turing machine, which can be considered a model of a general purpose computer. Turing is widely considered to be the father of theoretical computer science and artificial intelligence. The psychologist George Kelley (1955) noted that humans do not enter a world that is inherently structured; we must give the world a structure that we ourselves create. Some subspecies of mosquito might be 1st to be genetically wiped out. """ print('Starting parser...') parser = Parser() print('Parsing...') result = parser.parse_text(test_text) for r in result: sentence = Sentence(r) print(sentence) sentence.print_tree() print('')
def process_sentence(self, sentence): self.tree.root_id = self.process_token(sentence.root()) self.tree.remove_redundant_nesting() return self.tree def transform(sentence): alpha = AlphaStage() return alpha.process_sentence(sentence) if __name__ == "__main__": test_text = """ My name is James Bond. """ print("Starting parser...") parser = Parser() print("Parsing...") result = parser.parse_text(test_text) print(result) for r in result: s = Sentence(r) print(s) s.print_tree() t = transform(s) print(t)
return elem_id, transf def process_sentence(self, sentence): self.tree.root_id = self.process_token(sentence.root())[0] return ParserOutput(sentence, self.tree) def transform(sentence): alpha = AlphaForest() return alpha.process_sentence(sentence) if __name__ == '__main__': # learn('cases.csv', 'alpha_forest.model') test_text = """ Satellites from NASA and other agencies have been tracking sea ice changes since 1979. """ # test_text = 'Telmo is going to the gym.' print('Starting parser...') parser = Parser() print('Parsing...') result = parser.parse_text(test_text) for r in result: s = Sentence(r[1]) t = transform(s) print(t.tree.to_hyperedge_str(with_namespaces=False))