def pgf_parse(args): grammar = pgf.readPGF(args.pgfgrammar); import translation_pipeline; preprocessor = lexer(); inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) ); outputPrinter = lambda X: "%f\t%s" %(X[0], str(X[1])); #operator.itemgetter(1); parser = getKBestParses(grammar, args.srclang, 1); sentidx = 0; for time, parsesBlock in imap(parser, inputSet): sentidx += 1; print >>args.outputstream, "%d\t%f\t%s" %(sentidx, time, str(outputPrinter(parsesBlock[0])) if len(parsesBlock) else ''); return;
def pgf_kparse(args): grammar = pgf.readPGF(args.pgfgrammar); import translation_pipeline; preprocessor = lexer(); inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) ); outputPrinter = printJohnsonRerankerFormat; parser = getKBestParses(grammar, args.srclang, args.K); sentidx = 0; for time, parsesBlock in imap(parser, inputSet): sentidx += 1; strParses = str(outputPrinter(parsesBlock)); if not (strParses == '\n'): print >>args.outputstream, strParses; return;
def pgf_kparse(args): grammar = pgf.readPGF(args.pgfgrammar) import translation_pipeline preprocessor = lexer() inputSet = translation_pipeline.web_lexer( grammar, args.srclang, imap(preprocessor, args.inputstream)) outputPrinter = printJohnsonRerankerFormat callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))] parser = getKBestParses(grammar, args.srclang, args.K, callbacks=callbacks) sentidx = 0 for time, parsesBlock in imap(parser, inputSet): sentidx += 1 strParses = str(outputPrinter(parsesBlock)) if not (strParses == '\n'): print >> args.outputstream, strParses return
def pgf_parse(args): grammar = pgf.readPGF(args.pgfgrammar) import translation_pipeline preprocessor = lexer() inputSet = translation_pipeline.web_lexer( grammar, args.srclang, imap(preprocessor, args.inputstream)) outputPrinter = lambda X: "%f\t%s" % (X[0], str(X[1])) #operator.itemgetter(1); callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))] parser = getKBestParses(grammar, args.srclang, 1, callbacks) sentidx = 0 for time, parsesBlock in imap(parser, inputSet): sentidx += 1 print >> args.outputstream, "%d\t%f\t%s" % ( sentidx, time, str(outputPrinter(parsesBlock[0])) if len(parsesBlock) else '') return