def main(): arg = argparser().parse_args() grammar = gram.get_grammar(arg.grammar) if '@' in arg.sentences and not (arg.u or arg.i or arg.d): arg.d = True pos = [] neg = [] stype = pos for s in arg.sentences: if s =='@': stype = neg else: stype.append(s) try: if arg.json: print(export_json(pos, neg, grammar, arg.n, arg.frags, arg.supers, arg.descendants, arg.tagger)) else: print(typediff(pos, neg, grammar, arg)) except(delphin.AceError) as err: sys.stderr.write(err.msg) return 2
def main(): arg = argparser().parse_args() profiles = [] if arg.command == 'index': if arg.multi: for name in os.listdir(arg.profile): path = os.path.join(arg.profile, name) if not os.path.isdir(path) or name.startswith('.'): continue profiles.append(path) else: virtual_path = os.path.join(arg.profile, 'virtual') if os.path.exists(virtual_path): with open(virtual_path) as file: profiles = [os.path.join(arg.profile, '..', p.strip('"\n')) for p in file] else: profiles.append(arg.profile) grammar = gram.get_grammar(arg.grammar) index(profiles, arg.treebank, grammar) elif arg.command == 'output': output(arg.path, arg.type)
def output(pickle_path, output_type): with open(pickle_path, 'rb') as f: type_stats = pickle.load(f) x = os.path.splitext(os.path.basename(pickle_path))[0].split('--') grammar_name = x[0] treebank = x[1] trees = x[2] metadata = {'grammar' : grammar_name, 'treebank' : treebank, 'trees' : trees} grammar = gram.get_grammar(grammar_name) hierarchy = delphin.load_hierarchy(grammar.types_path) signs = [x.name for x in hierarchy['sign'].descendants() if not x.name.startswith('glb')] if output_type == 'txt': lex_entries = filter(lambda x:x.endswith('_le'), signs) rules = filter(lambda x:not x.endswith('_le'), signs) unknowns = filter(lambda x:x.endswith('unknown_rel"'), type_stats.keys()) all_types = [x for x in hierarchy.types if not x.startswith('glb')] txt_output(lex_entries, type_stats, metadata, 'lex') txt_output(rules, type_stats, metadata, 'rule') txt_output(type_stats.keys(), type_stats, metadata, 'everything') txt_output(unknowns, type_stats, metadata, 'unknowns') elif output_type == 'json': json_output(type_stats, metadata)
def main(): arg = argparser().parse_args() if arg.command == "make-data": if not os.path.exists(config.DATAPATH): os.makedirs(config.DATAPATH) if len(arg.grammars) == 0: grammars = gram.get_grammars() else: grammars = [gram.get_grammar(alias) for alias in arg.grammars] for grammar in grammars: try: make_data(grammar) except UtilError as e: sys.stderr.write(e.msg + "\n")
def main(): global DEBUG arg = argparser().parse_args() DEBUG = arg.debug if arg.command == 'convert' and arg.align and arg.best > 1: sys.stderr.write("Align option requires that best = 1.\n") return 1 elif arg.command == 'draw': arg.feature = None # just hack this rather than working out when defined grammar = gram.get_grammar(arg.grammar) #if arg.command == 'draw' or arg.feature not in NONTDL_FEATURES: #if os.path.basename(arg.paths[0]) in ('vm6', 'vm13', 'vm31', 'vm32'): # # note that this program doesn't support mixing speech and non # # speech profiles in the one invocation. If you need to use speech # # profiles, just run with only speech or only non-speech profiles. # # grammar.read_tdl(speech=True) #else: # grammar.read_tdl(speech=False) grammar.read_tdl(speech=False) try: # Do the thing! if arg.command == 'compare': print(compare(grammar, arg)) elif arg.command in ('count', 'convert', 'draw'): results = get_results(grammar, arg) if arg.command == 'count': print(collection_features(list(results.values()), arg.feature, arg.descendants)) elif arg.command == 'convert': print(convert_trees(results, arg.feature, arg.align, arg.paths, arg.failtok, arg.best, arg.backoff)) elif arg.command == 'draw': draw(results) except delphin.AceError as e: print(e) return 0
def index(profiles, treebank, in_grammar): stats_dict = defaultdict(delphin.TypeStats) trees = 0 failures = [] for path in profiles: grammar = in_grammar items_seen = set() print("processing {}".format(path)) profile = os.path.basename(path) if profile in ERG_SPEECH_PROFILES: alias = grammar.alias+'-speech' grammar = gram.get_grammar(alias) try: # for treebanked profiles: out = delphin.tsdb_query('select i-id derivation where t-active > 0', path) # for non-treebanked profiles # out = delphin.tsdb_query('select i-id derivation where readings > 0', path) except delphin.TsdbError as e: sys.stderr.write(str(e)+'\n') continue if out == '': continue results = out.strip().split('\n') for result in results: iid, derivation = result.split(' | ') if iid in items_seen or iid in BLACKLIST: continue try: counts = get_types(derivation, grammar) for name, count in counts.items(): stats_dict[name].update(count) except delphin.AceError as e: e.other_data.append(iid) e.other_data.append(path) failures.append(e) sys.stderr.write(str(e) + '\n') else: items_seen.add(iid) trees += 1 print(trees, iid) print("Processed {} trees".format(trees)) num_failures = len(failures) if num_failures > 0: print("Failed to reconstruct {} trees".format(num_failures)) print("See type-stats-errors.txt for details.") with open('type-stats-errors.txt', 'w') as f: errors_str = '\n'.join(str(e) for e in failures) f.write(errors_str.encode('utf8')+'\n\n') treebank_str = treebank.replace(' ', '_') filename = '{}--{}--{}.pickle'.format(grammar.alias, treebank_str, trees) with open(filename, 'wb') as f: pickle.dump(stats_dict, f)