Exemplo n.º 1
0
def main():
    arg = argparser().parse_args()
    grammar = gram.get_grammar(arg.grammar)

    if '@' in arg.sentences and not (arg.u or arg.i or arg.d):
        arg.d = True

    pos = []
    neg = []
    stype = pos

    for s in arg.sentences:
        if s =='@':
            stype = neg
        else:
            stype.append(s)
                
    try:
        if arg.json:
            print(export_json(pos, neg, grammar, arg.n, arg.frags, arg.supers, 
                              arg.descendants, arg.tagger))
        else:
            print(typediff(pos, neg, grammar, arg))
    except(delphin.AceError) as err:
        sys.stderr.write(err.msg)
        return 2
Exemplo n.º 2
0
def main():
    arg = argparser().parse_args()
    profiles = []

    if arg.command == 'index':
        if arg.multi:
            for name in os.listdir(arg.profile):
                path = os.path.join(arg.profile, name)
                if not os.path.isdir(path) or name.startswith('.'):
                    continue
                profiles.append(path)
        else:
            virtual_path = os.path.join(arg.profile, 'virtual')
            if os.path.exists(virtual_path):
                with open(virtual_path) as file:
                    profiles = [os.path.join(arg.profile, '..', p.strip('"\n'))
                                for p in file]
            else:
                profiles.append(arg.profile)

        grammar = gram.get_grammar(arg.grammar)
        index(profiles, arg.treebank, grammar)

    elif arg.command == 'output':
        output(arg.path, arg.type)
Exemplo n.º 3
0
def output(pickle_path, output_type):
    with open(pickle_path, 'rb') as f:
        type_stats = pickle.load(f)

    x = os.path.splitext(os.path.basename(pickle_path))[0].split('--')
    grammar_name = x[0]
    treebank = x[1]
    trees = x[2]
    metadata = {'grammar' : grammar_name, 'treebank' : treebank, 'trees' : trees}

    grammar = gram.get_grammar(grammar_name)
    hierarchy = delphin.load_hierarchy(grammar.types_path)
    signs = [x.name for x in hierarchy['sign'].descendants() 
             if not x.name.startswith('glb')]
 
    if output_type == 'txt':
        lex_entries = filter(lambda x:x.endswith('_le'), signs)
        rules = filter(lambda x:not x.endswith('_le'), signs)
        unknowns = filter(lambda x:x.endswith('unknown_rel"'), type_stats.keys()) 
        all_types = [x for x in hierarchy.types if not x.startswith('glb')]
        txt_output(lex_entries, type_stats, metadata, 'lex')
        txt_output(rules, type_stats, metadata, 'rule')
        txt_output(type_stats.keys(), type_stats, metadata, 'everything')
        txt_output(unknowns, type_stats, metadata, 'unknowns')
    elif output_type == 'json':
        json_output(type_stats, metadata)
Exemplo n.º 4
0
def main():
    arg = argparser().parse_args()

    if arg.command == "make-data":
        if not os.path.exists(config.DATAPATH):
            os.makedirs(config.DATAPATH)

        if len(arg.grammars) == 0:
            grammars = gram.get_grammars()
        else:
            grammars = [gram.get_grammar(alias) for alias in arg.grammars]

        for grammar in grammars:
            try:
                make_data(grammar)
            except UtilError as e:
                sys.stderr.write(e.msg + "\n")
Exemplo n.º 5
0
def main():
    global DEBUG
    arg = argparser().parse_args()
    DEBUG = arg.debug

    if arg.command == 'convert' and arg.align and arg.best > 1:
        sys.stderr.write("Align option requires that best = 1.\n")
        return 1
    elif arg.command == 'draw':
        arg.feature = None # just hack this rather than working out when defined

    grammar = gram.get_grammar(arg.grammar)
    #if arg.command == 'draw' or arg.feature not in NONTDL_FEATURES:
        #if os.path.basename(arg.paths[0]) in ('vm6', 'vm13', 'vm31', 'vm32'):
        #    # note that this program doesn't support mixing speech and non
        #    # speech profiles in the one invocation. If you need to use speech
        #    # profiles, just run with only speech or only non-speech profiles. 
        #    
        #    grammar.read_tdl(speech=True)
        #else:
        #    grammar.read_tdl(speech=False)
    grammar.read_tdl(speech=False)
        
    try:
        # Do the thing!
        if arg.command == 'compare':
            print(compare(grammar, arg))
        elif arg.command in ('count', 'convert', 'draw'):
            results = get_results(grammar, arg)
            if arg.command == 'count':
                 print(collection_features(list(results.values()), arg.feature, 
                                           arg.descendants))
            elif arg.command == 'convert':
                print(convert_trees(results, arg.feature, arg.align, arg.paths, 
                                    arg.failtok, arg.best, arg.backoff))
            elif arg.command == 'draw':
                draw(results)
    except delphin.AceError as e:
        print(e)

    return 0
Exemplo n.º 6
0
def index(profiles, treebank, in_grammar):
    stats_dict = defaultdict(delphin.TypeStats)
    trees = 0
    failures = []

    for path in profiles:
        grammar = in_grammar
        items_seen = set()
        print("processing {}".format(path))
        profile = os.path.basename(path) 

        if profile in ERG_SPEECH_PROFILES:
            alias = grammar.alias+'-speech'
            grammar = gram.get_grammar(alias)

        try:
            # for treebanked profiles:
            out = delphin.tsdb_query('select i-id derivation where t-active > 0', path)
            # for non-treebanked profiles
            # out = delphin.tsdb_query('select i-id derivation where readings > 0', path)
        except delphin.TsdbError as e:
            sys.stderr.write(str(e)+'\n')
            continue

        if out == '':
            continue

        results = out.strip().split('\n')
        for result in results:
            iid, derivation = result.split(' | ')

            if iid in items_seen or iid in BLACKLIST:
                continue
                
            try:
                counts = get_types(derivation, grammar)
                for name, count in counts.items():
                    stats_dict[name].update(count)
            except delphin.AceError as e:
                e.other_data.append(iid)
                e.other_data.append(path)
                failures.append(e)
                sys.stderr.write(str(e) + '\n')
            else:
                items_seen.add(iid)
                trees += 1
                print(trees, iid)

    print("Processed {} trees".format(trees))

    num_failures = len(failures)
    if num_failures > 0: 
        print("Failed to reconstruct {} trees".format(num_failures))
        print("See type-stats-errors.txt for details.")

        with open('type-stats-errors.txt', 'w') as f:
            errors_str = '\n'.join(str(e) for e in failures)
            f.write(errors_str.encode('utf8')+'\n\n')

    treebank_str = treebank.replace(' ', '_')
    filename = '{}--{}--{}.pickle'.format(grammar.alias, treebank_str, trees)

    with open(filename, 'wb') as f:
        pickle.dump(stats_dict, f)