def test_parser(args): parser = create_parser(lang=args.lang, parser_class=args.parser) total = 0 wrong = 0 sentence = None with open(args.infile) as f: for line in f: if sentence: total += 1 correct_edge = hedge(line.strip()) parser_output = parser.parse(sentence) parsed_sentence = parser_output['parses'][0] edge = parsed_sentence['main_edge'] sent = parsed_sentence['spacy_sentence'] if edge != correct_edge: wrong += 1 print_tree(sent.root) print('expected:') print(correct_edge) print('result:') print(edge) sentence = None else: sentence = line.strip() print('%s wrong out of %s.' % (wrong, total))
def __init__(self, hg=None, sequence=None, lang=None, corefs=False, parser=None, parser_class=None): self.hg = hg self.sequence = sequence self.lang = lang if parser_class: plang = parser_lang(parser_class) if lang: if lang != plang: msg = 'specified language ({}) and parser language ({}) '\ 'do not match'.format(lang, plang) raise RuntimeError(msg) else: self.lang = plang if parser is None: self.parser = create_parser(lang=lang, parser_class=parser_class, lemmas=True, corefs=corefs) else: self.parser = parser
def get_parser(self, agent): if self.parser is None: corefs = self.corefs in {'resolve', 'replace'} self.parser = create_parser(lang=self.lang, parser_class=self.parser_class, lemmas=True, resolve_corefs=corefs) return self.parser
def manual_test(args): parser = create_parser(lang=args.lang, parser_class=args.parser) he = ManualEvaluation() sentences = [] # read existing tests try: with open(args.outfile, 'r') as f: for line in f: parts = line.split('\t') if len(parts) == 4: sentence = parts[0].strip() sentences.append(sentence) edge = hedge(parts[1].strip()) answer = parts[2].strip() defects = list( hedge(edge_str) for edge_str in parts[3].split('&')) he.apply_evaluation(answer, edge, defects) except FileNotFoundError: pass with open(args.infile, 'r') as f: for line in f: print('GLOBAL:') print(colored(str(he), 'white')) sentence = line.strip() if sentence not in sentences: sentences.append(sentence) parser_output = parser.parse(sentence) parsed_sentence = parser_output['parses'][0] edge = parsed_sentence['main_edge'] if edge: print('\n{}\n{}\n'.format(sentence, indented(edge))) answer = he.input() if answer == 'd': defects = input_defects(sentence, edge) else: defects = [] he.apply_evaluation(answer, edge, defects) defect_str = '&'.join( [defect.to_str() for defect in defects]) row_str = '\t'.join( (sentence, edge.to_str(), answer, defect_str)) with open(args.outfile, 'a') as of: of.write('{}\n'.format(row_str))
def __init__(self, lang): self.parser = create_parser(name=lang) self.sentences = set() self.tokens = 0 self.correct_edges = 0 self.ignored = 0 self.input_files = None self.sentence = None self.source = None self.atoms = None self.spacy_sentence = None self.token2atom = None
def extract_sentences(args): parser = create_parser(name=args.lang) sentences = [] count = 0 with open(args.infile, 'r') as infile, open(args.outfile, 'w') as outfile: for line in infile.readlines(): paragraph = line.strip() if len(paragraph) > 0: parse_results = parser.parse(paragraph) for parse in parse_results['parses']: sentences.append(parse['text']) count += 1 if count % 100 == 0: print('{} sentences found'.format(count)) random.shuffle(sentences) with open(args.outfile, 'w') as outfile: for sentence in sentences: outfile.write('{}\n'.format(sentence))
def generate_alpha_training_data(args): total_sentences = 0 ignored_sentences = 0 failed_parses = 0 total_atoms = 0 parser = create_parser(name=args.lang) with open(args.infile, 'r') as infile, open(args.outfile, 'w') as outfile: for line in infile.readlines(): case = json.loads(line) sentence = case['sentence'] atoms = case['atoms'] parse_results = parser.parse(sentence) parse = parse_results['parses'][0] spacy_sentence = parse['spacy_sentence'] if case['ignore']: ignored_sentences += 1 elif len(atoms) == len(spacy_sentence): total_sentences += 1 total_atoms += len(atoms) for i in range(len(atoms)): atom = atoms[i] token = spacy_sentence[i] word_before = '' word_after = '' pos_before = '' pos_after = '' dep_before = '' dep_after = '' punct_before = False punct_after = False if i > 0: word_before = str(spacy_sentence[i - 1]) pos_before = spacy_sentence[i - 1].pos_ dep_before = spacy_sentence[i - 1].dep_ if spacy_sentence[i - 1].pos_ == 'PUNCT': punct_before = True if i < len(atoms) - 1: word_after = str(spacy_sentence[i + 1]) pos_after = spacy_sentence[i + 1].pos_ dep_after = spacy_sentence[i + 1].dep_ if spacy_sentence[i + 1].pos_ == 'PUNCT': punct_after = True head = token.head is_root = head is None has_lefts = token.n_lefts > 0 has_rights = token.n_rights > 0 outfile.write(('{}' + '\t{}' * 23 + '\n').format( hedge(atom).type()[0], str(token), token.pos_, token.tag_, token.dep_, str(head) if head else '', head.pos_ if head else '', head.tag_ if head else '', head.dep_ if head else '', is_root, has_lefts, has_rights, token.ent_type_, token.shape_[:2], word_before, word_after, punct_before, punct_after, pos_before, pos_after, dep_before, dep_after, case['correct'], case['source'])) else: failed_parses += 1 print('sentences: {}; ignored: {}; failed: {}; atoms: {}'.format( total_sentences, ignored_sentences, failed_parses, total_atoms)) print('done.')
def get_parser(self, agent): if self.parser is None: self.parser = create_parser( name=self.lang, lemmas=True, resolve_corefs=True) return self.parser
from graphbrain.parsers import create_parser, print_tree if __name__ == '__main__': text = """ Satellites from NASA and other agencies have been tracking sea ice changes since 1979. """ parser = create_parser(lang='en', lemmas=True) parse_results = parser.parse(text) for parse in parse_results['parses']: print_tree(parse['spacy_sentence'].root) print(parse['main_edge']) print('>> Extra edges:') for edge in parse['extra_edges']: print(edge)