Exemplo n.º 1
0
def main(argv):
    args = parse_args(argv, _add_logger_args)
    config_logger(args)
    logger.info('Starting')

    from srl_nlp.analysers.boxer import BoxerLocalAPI
    boxer = BoxerLocalAPI()
    kb_fr_path = path.join(args.kb_path, args.kb_fr)
    kb_fe_path = path.join(args.kb_path, args.kb_fe)
    anno = PrologBoxerAnnotator(boxer, kb_fr_path, kb_fe_path)

    print 'LF: %s' % boxer.sentence2LF(args.sentence)

    if args.frame_matching:
        print 'Frame Matching:'
        out, err = anno.frameMatching(args.sentence, out_error=True, lf_file_name=args.tmp_lf_file)
        logger.debug(err)
        print '\'%s\n\'' % '\n'.join(map(str, out))

    if args.frame_element_matching:
        print '\nFrame Element Matching:'
        out, err = anno.frameElementMatching(args.sentence, out_error=True, lf_file_name=args.tmp_lf_file)
        logger.debug(err)
        print '\'%s\n\'' % '\n'.join(map(str, out))

    if args.matching:
        print '\nMatching:'
        out, err = anno.matching(args.sentence, lf_file_name=args.tmp_lf_file)
        logger.debug(err)
        print '\'%s\n\'' % '\n'.join(map(str, out))
        # semeval_parser = SemEval07XMLAdapter()
        # for f_name, annoset in anno.sem_annotations(args.sentence, out).items():
        #     print "Frame:{f_name}\n\t{anno}".format(f_name=f_name, anno=semeval_parser._anno_set2XML(annoset))

    logger.info('Done')
Exemplo n.º 2
0
    def main(argv):
        args = parse_args(argv, _add_logger_args)
        config_logger(args)

        logger.info('Initialization')

        # Select the appropriate document parsers for the input and output
        input_parser = PARSERS_AVAILABLE[args.input_format]()
        logger.info('Parsing {file}'.format(file=args.data_base_path))

        # Parse corpus
        with open(args.data_base_path) as db_file:
            docs = input_parser.parseXML(db_file)
        logger.info('Done parsing')
        logger.info('Creating base')
        examples = []

        for d_id, doc in enumerate(docs):
            boxer = BoxerLocalAPI()
            for example in examples_from_doc(boxer, d_id=d_id, doc=doc):
                examples.append(example)

        # shuffle examples
        random.Random(args.seed).shuffle(examples)
        if args.train_ratio:
            len_train = (args.train_ratio * len(examples) / (args.train_ratio + 1))
            train, test = examples[:len_train], examples[len_train:]

            write_to_file(args.root, train, 'train')
            write_to_file(args.root, test, 'test')
        else:
            write_to_file(args.root, examples, 'train')
Exemplo n.º 3
0
def main(argv):
    """
    Process the arguments in order to return the theory.
    The theory is the union of the set of the deep role rules and the
    set of the frame matching rules.
    """
    args = parse_args(argv, _add_logger_args)
    config_logger(args)
    logger.info(args)
    logger.info('Starting FrameNet')
    parser = NetXMLParser()
    fn = parser.parse('framenet/fndata-1.7')

    if args.frame_related:
        logger.info('Initialization of frame matching rule inference')
        frame_matching_rules = make_frame_matching_rules(get_lus2frames(fn))

        if args.out_file:
            logger.info('Saving to %s' % args.out_file)
            stream = open(args.out_file, 'a')
        else:
            logger.info('Writing to stdout')
            stream = stdout
        with stream as f:
            f.write("%% Frame Matching Rules\n")
            for rule in frame_matching_rules:
                f.write(rule)
                f.write('\n')

    ########################
    #                      #
    #  Frame Element Rules #
    #                      #
    ########################
    if args.frame_element:
        examples = get_all_examples(fn, args.example_file)

        logger.info('Starting Boxer')
        boxer = BoxerLocalAPI(expand_predicates=True)
        logger.info('Boxer started')
        if args.limit:
            sliced_examples = examples[:int(args.limit)]
        else:
            sliced_examples = examples
        logger.info('There are %d examples being considered' %
                    len(sliced_examples))
        theory = make_theory(RuleGenerator(boxer), sliced_examples)

        if args.out_file:
            logger.info('Saving to %s' % args.out_file)
            stream = open(args.out_file, 'a')
        else:
            stream = stdout
        with stream as f:
            f.write("\n%% Deep Role Rules\n")
            for rule in theory:
                f.write(rule)
                f.write('\n')
Exemplo n.º 4
0
def main(argv):
    """
    Runs the pipeline in a predefined format of documents
    """
    args = parse_args(argv, add_logger_args)
    config_logger(args)

    logger.info('Initializing Boxer')
    # boxer = BoxerWebAPI()
    boxer = BoxerLocalAPI(TokenizerLocalAPI(),
                          CandCLocalAPI(),
                          expand_predicates=args.expand_boxer_predicates)
    logger.info('Initializing Dependence Tree')
    depTree = DependencyTreeLocalAPI()
    base_dir = args.dir_path
    file_paths = listdir(base_dir)

    if args.random:
        shuffle(file_paths)

    if args.max_docs != None:  # limit the number of documents read
        length = min(int(args.max_docs), len(file_paths))
    else:
        length = len(file_paths)

    if args.output_format == 'pl':  # define the whay the info whil be stored and dumped
        output = Processor2PL()
    elif args.output_format == 'problog':
        output = Processor2ProbLog()
    else:
        output = Processor2JSON()

    out_count = 0
    # iterate trhough all files in the specified dir
    # args.out_file
    logger.info('Reading files')
    for count, file_path in enumerate(file_paths[args.skip:(
            args.skip + length)]):  # enumerate is used only to set count
        with open(path.join(base_dir, file_path), 'r') as raw_text:
            output.add_doc(raw_text,
                           count,
                           boxer,
                           depTree,
                           replace=args.replace_entities)
        if args.break_output and (count +
                                  1) % args.break_output == 0:  # save to files
            output.save_output(args.out_file, out_count)
            output.erase()
            out_count += 1
        logger.info("%6.2f%% Read",
                    100 * float(count + 1) / length)  # logs progress

    # ensure the last file recives its dump
    if args.break_output:
        if length % args.break_output != 0:
            output.save_output(args.out_file, out_count)  # logs progress
    else:
        output.save_output(args.out_file)
Exemplo n.º 5
0
def main(argv):
    args = parse_args(argv, add_logger_args)
    config_logger(args)
    out = {}
    logger.info('Starting at %s', args.dir_path)
    run_tree(args.dir_path,
             _runAleph_out_parser,
             args.file_prefix,
             logger,
             d=out)
    if args.output_file:
        with open(args.output_file, 'w') as out_stream:
            dump(out, out_stream)
    else:
        print out
    logger.info('Done')
Exemplo n.º 6
0
    def main(argv):
        args = parse_args(argv, _add_logger_args)
        config_logger(args)

        logger.info('Initialization')

        boxer = BoxerLocalAPI()

        # Select the appropriate document parsers for the input and output
        input_parser = PARSERS_AVAILABLE[args.input_format]()
        output_parser = PARSERS_AVAILABLE[args.output_format]()

        # Get the paths of the rules
        kb_fr_path = path.join(args.kb_path, args.kb_fr)
        kb_fe_path = path.join(args.kb_path, args.kb_fe)

        annotator = PrologBoxerAnnotator(boxer, kb_fr_path, kb_fe_path)

        logger.info('{anno} is parsing {file}'.format(
            anno=annotator, file=args.data_base_path))

        # Parse corpus
        with open(args.data_base_path) as db_file:
            docs = input_parser.parseXML(db_file)
        logger.info('Done parsing')

        out_docs = eval_corpus(annotator,
                               boxer,
                               docs,
                               skip_frame_matching=args.skip_frame_matching)

        if args.annotation_output_path:
            logger.info("Storing results at {f_name}".format(
                f_name=args.annotation_output_path))
            with open(args.annotation_output_path, 'w') as f_out:
                output_parser.doc2XML(out_docs[0], f_out)
        else:
            logger.info("Printing results to stdout")
            output_parser.doc2XML(out_docs[0])
        logger.info('Done')
Exemplo n.º 7
0
def main(argv):
    args = parse_args(argv, add_logger_args)
    config_logger(args)

    # Read config file in conf
    if not args.configuration_file:
        args.configuration_file = 'tests.conf'
    with open(args.configuration_file, 'r') as f:
        conf = json.load(f)
    if args.engine == 'aleph':
        engine = Aleph()
    else:
        engine = ProbLog()

    # if not dir_name dir make it
    # if not engine dir make it
    ensure_dir(args.dir_name)
    engine_path = path.join(args.dir_name, args.engine)
    ensure_dir(engine_path)

    write_exp(conf, args.kb, args.base, args.facts, args.negatives,
              engine_path, engine, args.copy_kb)
Exemplo n.º 8
0
    def main(argv):
        args = parse_args(argv, _add_logger_args)
        config_logger(args)
        logger.info('Loading XML file')
        with open(args.input_file, 'r') as f:
            tree = XMLTree.parse(f)
        logger.info('XML tree ready')
        root = tree.getroot()

        adapter = PARSERS_AVAILABLE.get(args.parser, SemEval07XMLAdapter)()

        logger.info('Parsing XML tree')
        try:
            docs = adapter.parseXML(root)
        except KeyError:
            raise KeyError('Consider using another parser type by using the option --parser')
        logger.info('Done parsing XML tree')

        if args.check_examples:
            for doc in docs:
                for sentence in doc:
                    converted = sentence.get_fn_example().str_no_annotation()
                    print converted
                    print sentence.get_fn_example()
                    # raw_input()
                    if converted != sentence.text:
                        logger.critical("{sent} was not properly processed".format(sent=sentence))

        if args.output_file is not None:
            logger.info('Writing pickle file')
            with open(args.output_file, 'wb') as f:
                pickle.dump(docs, f)

        if args.output_xml_file is not None:
            logger.info('Writing XML file')
            with open(args.output_xml_file, 'w') as f:
                for doc in docs:
                    adapter.doc2XML(doc, xml_file=f)
Exemplo n.º 9
0
def main(argv):
    args = parse_args(argv, _add_logger_args)
    config_logger(args)
    logger.info('Starting at %s', args.dir_path)
    run_tree(args.dir_path, _runAleph, args.file_prefix)
    logger.info('Done')