Exemplo n.º 1
0
    def test_detect_input_type(self):
        # Load some input: DbInput
        dbi = DbInput.from_file(DB_SEQUENCES_FILE, {'index': 0})
        # Run it through the preprocessor
        datatype, obj = detect_input_type(dbi)
        # Get the datatype from the type name lists
        datatype2 = input_type_name(type(obj))
        self.assertEqual(datatype, datatype2)

        # Do the same with ChordInput
        ci = ChordInput.from_file(CHORDS_FILE, options={'roman': True})
        datatype, obj = detect_input_type(ci)
        datatype2 = input_type_name(type(obj))
        self.assertEqual(datatype, datatype2)

        # Try some bulk input
        bulk = DbBulkInput.from_file(DB_SEQUENCES_FILE)
        datatype, obj = detect_input_type(bulk, allow_bulk=True)
        datatype2 = input_type_name(type(obj))
        self.assertEqual(datatype, datatype2)

        # Try restricting the allowed type
        datatype, obj = detect_input_type(ci, allowed=['chords'])
        # And this one should get rejected
        self.assertRaises(InputTypeError, detect_input_type, (ci, ),
                          {'allowed': 'db'})
Exemplo n.º 2
0
    def test_detect_input_type(self):
        # Load some input: DbInput
        dbi = DbInput.from_file(DB_SEQUENCES_FILE, {"index": 0})
        # Run it through the preprocessor
        datatype, obj = detect_input_type(dbi)
        # Get the datatype from the type name lists
        datatype2 = input_type_name(type(obj))
        self.assertEqual(datatype, datatype2)

        # Do the same with ChordInput
        ci = ChordInput.from_file(CHORDS_FILE, options={"roman": True})
        datatype, obj = detect_input_type(ci)
        datatype2 = input_type_name(type(obj))
        self.assertEqual(datatype, datatype2)

        # Try some bulk input
        bulk = DbBulkInput.from_file(DB_SEQUENCES_FILE)
        datatype, obj = detect_input_type(bulk, allow_bulk=True)
        datatype2 = input_type_name(type(obj))
        self.assertEqual(datatype, datatype2)

        # Try restricting the allowed type
        datatype, obj = detect_input_type(ci, allowed=["chords"])
        # And this one should get rejected
        self.assertRaises(InputTypeError, detect_input_type, (ci,), {"allowed": "db"})
Exemplo n.º 3
0
 def test_from_string(self):
     ci = ChordInput.from_string(CHORDS, roman=True)
Exemplo n.º 4
0
 def test_from_file(self):
     # Just load the file
     ci = ChordInput.from_file(CHORDS_FILE, options={"roman": True})
Exemplo n.º 5
0
 # Get a filename for a logger for this input
 if parse_logger_dir:
     parse_logger = os.path.join(parse_logger_dir, "%s.log" % \
                                         slugify(input_identifier))
     print >>sys.stderr, "Logging parser progress to %s" % parse_logger
     logger = create_logger(filename=parse_logger)
 else:
     logger = create_plain_stderr_logger()
 
 # Catch any errors and continue to the next input, instead of giving up
 try:
     if isinstance(input, str):
         input = input.rstrip("\n")
         if len(input) == 0:
             return
         input = ChordInput.from_string(input)
     
     logger.info("Tagging sequence (%d timesteps)" % len(input))
     # Prepare a suitable tagger component
     tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger)
     
 except KeyboardInterrupt:
     print "Exiting on keyboard interrupt"
     break
 except:
     print "Error tagging %s" % input_identifier
     traceback.print_exc()
     print
 else:
     # Tagged successfully
     # Get tags from the tagger as the parser would
Exemplo n.º 6
0
            # Get a filename for a logger for this input
            if parse_logger_dir:
                parse_logger = os.path.join(parse_logger_dir, "%s.log" % \
                                                    slugify(input_identifier))
                print >> sys.stderr, "Logging parser progress to %s" % parse_logger
                logger = create_logger(filename=parse_logger)
            else:
                logger = create_plain_stderr_logger()

            # Catch any errors and continue to the next input, instead of giving up
            try:
                if isinstance(input, str):
                    input = input.rstrip("\n")
                    if len(input) == 0:
                        return
                    input = ChordInput.from_string(input)

                logger.info("Tagging sequence (%d timesteps)" % len(input))
                # Prepare a suitable tagger component
                tagger = tagger_cls(grammar,
                                    input,
                                    options=topts.copy(),
                                    logger=logger)

            except KeyboardInterrupt:
                print "Exiting on keyboard interrupt"
                break
            except:
                print "Error tagging %s" % input_identifier
                traceback.print_exc()
                print
Exemplo n.º 7
0
 def test_from_string(self):
     ci = ChordInput.from_string(CHORDS, roman=True)
Exemplo n.º 8
0
 def test_from_file(self):
     # Just load the file
     ci = ChordInput.from_file(CHORDS_FILE, options={'roman': True})
Exemplo n.º 9
0
def do_parse(grammar, tagger_cls, parser_cls, input, topts, popts, backoff, 
        npopts, options, identifier, multiprocessing=False, 
        logfile=None, partition=None):
    """
    Function called for each input to do tagging and parsing and return the 
    results. It's a separate function so that we can hand it over to worker 
    processes to do multiprocessing.
    
    @type logfile: str
    @param logfile: filename to send logging output to. If None, will log 
        to stderr
    
    """
    # If the input's a string, preprocess it
    if isinstance(input, str):
        input = input.rstrip("\n")
        if len(input) == 0:
            return
        input = ChordInput.from_string(input)
    
    print "Processing input: %s (%s)" % (input, identifier)
        
    if logfile is None:
        # Sending logging output to stderr
        logger = create_plain_stderr_logger()
    else:
        logger = create_logger(filename=logfile)
        print "Logging parser progress to %s" % logfile
    
    # Prepare an initial response
    # We'll fill in some values of this later
    response = {
        'tagger' : None,
        'parser' : None,
        'input' : input,
        'error' : None,
        'messages' : [],
        'time' : None,
        'identifier' : identifier,
        'results' : None,
        'timed_out' : False,
    }
    tagger = None
    parser = None
    messages = []
    
    if options.short_progress:
        # Only output the short form of the progress reports
        progress = 2
    elif options.long_progress:
        progress = 1
    else:
        progress = 0
    
    # Start a timer now to time the parse
    timer = ExecutionTimer(clock=True)
    
    # Catch any errors and continue to the next input, instead of giving up
    try:
        ######### Do that parsing thang
        logger.info("Tagging sequence (%d timesteps)" % len(input))
        
        # Prepare a suitable tagger component
        tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger)
        if not multiprocessing:
            response['tagger'] = tagger
        
        # Create a parser using this tagger
        parser = parser_cls(grammar, tagger, options=popts.copy(), 
                                backoff=backoff, 
                                backoff_options=npopts.copy(),
                                logger=logger)
        if not multiprocessing:
            response['parser'] = parser
        try:
            # Parse to produce a list of results
            results = parser.parse(derivations=options.derivations, summaries=progress)
        except (KeyboardInterrupt, Exception), err:
            if multiprocessing:
                # Don't go interactive if we're in a subprocess
                # Instead, just return with an error
                response.update({
                    'error' : exception_tuple(str_tb=True),
                })
                return response
            else:
                # Drop into the shell
                if type(err) == KeyboardInterrupt:
                    print "Dropping out on keyboard interrupt"
                    print "Entering shell: use 'chart' command to see current state of parse"
                elif options.error_shell:
                    print >> sys.stderr, "Error parsing %s" % str(input)
                    print >> sys.stderr, "The error was:"
                    traceback.print_exc(file=sys.stderr)
                # If we keyboard interrupted, always go into the shell, so 
                #  the user can see how far we got
                if options.error_shell or type(err) == KeyboardInterrupt:
                    # Instead of exiting, enter the interactive shell
                    print 
                    from jazzparser.shell import interactive_shell
                    env = {}
                    env.update(globals())
                    env.update(locals())
                    interactive_shell(parser.chart.parses,options,tagger,parser,
                                grammar.formalism,env,input_data=input)
                    return
                else:
                    raise
    except (KeyboardInterrupt, Exception), err:
        if multiprocessing:
            response.update({
                'error' : exception_tuple(str_tb=True),
            })
            return response
        else:
            if type(err) == KeyboardInterrupt:
                print "Exiting on keyboard interrupt"
                sys.exit(1)
            else:
                response.update({
                    'error' : exception_tuple(str_tb=True),
                    'messages' : messages,
                    'time' : timer.get_time(),
                })
                return response