Пример #1
0
def pgf_parse(args):
    grammar  = pgf.readPGF(args.pgfgrammar);
    import translation_pipeline;

    preprocessor = lexer();
    inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) );
    outputPrinter = lambda X: "%f\t%s" %(X[0], str(X[1])); #operator.itemgetter(1);
    callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))];
    parser = getKBestParses(grammar, args.srclang, 1, callbacks);
    
    sentidx = 0;
    for time, parsesBlock in imap(parser, inputSet):
	sentidx += 1;
	print >>args.outputstream, "%d\t%f\t%s" %(sentidx, time, str(outputPrinter(parsesBlock[0])) if len(parsesBlock) else '');
    return;
Пример #2
0
def pgf_kparse(args):
    grammar = pgf.readPGF(args.pgfgrammar);
    import translation_pipeline;
    
    preprocessor = lexer();
    inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) );
    outputPrinter = printJohnsonRerankerFormat;
    callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))];
    parser = getKBestParses(grammar, args.srclang, args.K, callbacks=callbacks);

    sentidx = 0;
    for time, parsesBlock in imap(parser, inputSet):
	sentidx += 1;
	strParses = str(outputPrinter(parsesBlock));
	if not (strParses == '\n'):
	    print >>args.outputstream, strParses;
    return;
Пример #3
0
def pgf_kparse(args):
    grammar = pgf.readPGF(args.pgfgrammar)
    import translation_pipeline

    preprocessor = lexer()
    inputSet = translation_pipeline.web_lexer(
        grammar, args.srclang, imap(preprocessor, args.inputstream))
    outputPrinter = printJohnsonRerankerFormat
    callbacks = [('PN', translation_pipeline.parseNames(grammar,
                                                        args.srclang)),
                 ('Symb',
                  translation_pipeline.parseUnknown(grammar, args.srclang))]
    parser = getKBestParses(grammar, args.srclang, args.K, callbacks=callbacks)

    sentidx = 0
    for time, parsesBlock in imap(parser, inputSet):
        sentidx += 1
        strParses = str(outputPrinter(parsesBlock))
        if not (strParses == '\n'):
            print >> args.outputstream, strParses
    return
Пример #4
0
def pgf_parse(args):
    grammar = pgf.readPGF(args.pgfgrammar)
    import translation_pipeline

    preprocessor = lexer()
    inputSet = translation_pipeline.web_lexer(
        grammar, args.srclang, imap(preprocessor, args.inputstream))
    outputPrinter = lambda X: "%f\t%s" % (X[0], str(X[1]))
    #operator.itemgetter(1);
    callbacks = [('PN', translation_pipeline.parseNames(grammar,
                                                        args.srclang)),
                 ('Symb',
                  translation_pipeline.parseUnknown(grammar, args.srclang))]
    parser = getKBestParses(grammar, args.srclang, 1, callbacks)

    sentidx = 0
    for time, parsesBlock in imap(parser, inputSet):
        sentidx += 1
        print >> args.outputstream, "%d\t%f\t%s" % (
            sentidx, time,
            str(outputPrinter(parsesBlock[0])) if len(parsesBlock) else '')
    return
Пример #5
0
    def worker(sentence):
	sentence = sentence.strip();
	curid = sentid.next();
	tstart = time.time();
	kBestParses = [];
	parseScores = {};
	if len(sentence.split()) > max_length:
	    tend, err = time.time(), "Sentence too long (%d tokens). Might potentially run out of memory" %(len(sentence.split()));
	    print >>sys.stderr, '%d\t%.4f\t%s' %(curid, tend-tstart, err);
	    return tend-tstart, kBestParses; # temporary hack to make sure parser does not get killed for very long sentences;
	try:
	    callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang, sentence)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang, sentence))]
	    for parseidx, parse in enumerate( parser(sentence, heuristics=0, callbacks=callbacks) ):
		parseScores[parse[0]] = True;
		kBestParses.append( (parse[0], str(parse[1]) if serializable else parse[1]) );
		if parseidx == K-1: break;
		#if len(parseScores) >= K: break;
	    tend = time.time();
	    print >>sys.stderr, '%d\t%.4f' %(curid, tend-tstart);
	    return tend-tstart, kBestParses;
	except pgf.ParseError, err:
	    tend = time.time();
	    print >>sys.stderr, '%d\t%.4f\t%s' %(curid, tend-tstart, err);
	    return tend-tstart, kBestParses;