def do_parse(grammar, tagger_cls, parser_cls, input, topts, popts, backoff, npopts, options, identifier, multiprocessing=False, logfile=None, partition=None): """ Function called for each input to do tagging and parsing and return the results. It's a separate function so that we can hand it over to worker processes to do multiprocessing. @type logfile: str @param logfile: filename to send logging output to. If None, will log to stderr """ # If the input's a string, preprocess it if isinstance(input, str): input = input.rstrip("\n") if len(input) == 0: return input = ChordInput.from_string(input) print "Processing input: %s (%s)" % (input, identifier) if logfile is None: # Sending logging output to stderr logger = create_plain_stderr_logger() else: logger = create_logger(filename=logfile) print "Logging parser progress to %s" % logfile # Prepare an initial response # We'll fill in some values of this later response = { 'tagger' : None, 'parser' : None, 'input' : input, 'error' : None, 'messages' : [], 'time' : None, 'identifier' : identifier, 'results' : None, 'timed_out' : False, } tagger = None parser = None messages = [] if options.short_progress: # Only output the short form of the progress reports progress = 2 elif options.long_progress: progress = 1 else: progress = 0 # Start a timer now to time the parse timer = ExecutionTimer(clock=True) # Catch any errors and continue to the next input, instead of giving up try: ######### Do that parsing thang logger.info("Tagging sequence (%d timesteps)" % len(input)) # Prepare a suitable tagger component tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger) if not multiprocessing: response['tagger'] = tagger # Create a parser using this tagger parser = parser_cls(grammar, tagger, options=popts.copy(), backoff=backoff, backoff_options=npopts.copy(), logger=logger) if not multiprocessing: response['parser'] = parser try: # Parse to produce a list of results results = parser.parse(derivations=options.derivations, summaries=progress) except (KeyboardInterrupt, Exception), err: if multiprocessing: # Don't go interactive if we're in a subprocess # Instead, just return with an error response.update({ 'error' : exception_tuple(str_tb=True), }) return response else: # Drop into the shell if type(err) == KeyboardInterrupt: print "Dropping out on keyboard interrupt" print "Entering shell: use 'chart' command to see current state of parse" elif options.error_shell: print >> sys.stderr, "Error parsing %s" % str(input) print >> sys.stderr, "The error was:" traceback.print_exc(file=sys.stderr) # If we keyboard interrupted, always go into the shell, so # the user can see how far we got if options.error_shell or type(err) == KeyboardInterrupt: # Instead of exiting, enter the interactive shell print from jazzparser.shell import interactive_shell env = {} env.update(globals()) env.update(locals()) interactive_shell(parser.chart.parses,options,tagger,parser, grammar.formalism,env,input_data=input) return else: raise except (KeyboardInterrupt, Exception), err: if multiprocessing: response.update({ 'error' : exception_tuple(str_tb=True), }) return response else: if type(err) == KeyboardInterrupt: print "Exiting on keyboard interrupt" sys.exit(1) else: response.update({ 'error' : exception_tuple(str_tb=True), 'messages' : messages, 'time' : timer.get_time(), }) return response
def _result_callback(response): if response is None: # Empty input, or the subprocess doesn't want us to do anything return else: # Mark this input as completed global completed_parses completed_parses[response['identifier']] = True if response['results'] is None: # There was some error: check what it was error = response['error'] print >> sys.stderr, "Error parsing %s" % str(response['input']) print >> sys.stderr, "The error was:" print >>sys.stderr, error[2] global parse_exit_status parse_exit_status = 1 else: # Keep this together with all the other processes' responses all_results.append(response) print "Parsed: %s" % response['input'] # Run any cleanup routines that the formalism defines grammar.formalism.clean_results(response['results']) # Remove complex results if atomic-only option has been set if options.atoms_only: response['results'] = remove_complex_categories(response['results'], grammar.formalism) if not options.no_results: print "Results:" list_results(response['results']) if output_dir is not None: # Try getting a gold standard analysis if one has been # associated with the input gold = response['input'].get_gold_analysis() # Get the results with their probabilities top_results = [(getattr(res, 'probability', None), res) \ for res in response['results']] if options.topn is not None: # Limit the results that get stored top_results = list(reversed(sorted( top_results)))[:options.topn] # Output the results to a file presults = ParseResults( top_results, signs=True, gold_parse=gold, timed_out=response['timed_out'], cpu_time=response['time']) filename = get_output_filename(response['identifier']) presults.save(filename) print "Parse results output to %s" % filename if time_parse: print "Parse took %f seconds" % response['time'] if options.lh_analysis: print >>sys.stderr, "\nLonguet-Higgins tonal space analysis for each result:" # Output the tonal space path for each result for i,result in enumerate(response['results']): path = grammar.formalism.sign_to_coordinates(result) coords,times = zip(*path) print "%d> %s" % (i, ", ".join( ["%s@%s" % (crd,time) for (crd,time) in zip(coordinates_to_roman_names(coords),times)])) if options.lh_coord: print >>sys.stderr, "\nLonguet-Higgins tonal space coordinates for each result:" # Output the tonal space path for each result for i,result in enumerate(response['results']): path = grammar.formalism.sign_to_coordinates(result) print "%d> %s" % (i, ", ".join(["(%d,%d)@%s" % (x,y,t) for ((x,y),t) in path])) # Print out any messages the parse routine sent to us for message in response['messages']: print message # Print as summary of what we've completed num_completed = len(filter(lambda x:x[1], completed_parses.items())) if not stdinput: if not options.no_progress: print format_table([ [str(ident), "Complete" if completed_parses[ident] else ""] for ident in sorted(completed_parses.keys())]) if num_inputs is None: print "\nCompleted %d parses" % num_completed else: print "\nCompleted %d/%d parses" % (num_completed, num_inputs) # Enter interactive mode now if requested in options # Don't do this is we're in a process pool if not multiprocessing and options.interactive: print from jazzparser.shell import interactive_shell env = {} env.update(globals()) env.update(locals()) interactive_shell(response['results'], options, response['tagger'], response['parser'], grammar.formalism, env, input_data=response['input']) print # Flush the output to make sure everything gets out before we start the next one sys.stderr.flush() sys.stdout.flush()