示例#1
0
def main():
    usage = "%prog [options] <seq-file> <index>"
    description = "Outputs the key associated with each chord of a sequence "\
        "from an annotated corpus"
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
    
    if len(arguments) < 2:
        print "You must specify a sequence file and index"
        sys.exit(1)
        
    index = int(arguments[1])
    # Get the chord sequence
    seq = SequenceIndex.from_file(arguments[0]).sequence_by_index(index)
    
    print keys_for_sequence(seq)
示例#2
0
 def train(data, name, logger=None, options={}, chord_data=None):
     """
     Initializes and trains an HMM in a supervised fashion using the given 
     training data.
     
     """
     if len(data) == 0:
         raise ModelTrainError, "empty training data set"
         
     # Prepare a dummy logger if none was given
     if logger is None:
         logger = create_dummy_logger()
     
     # Process the options dict
     options = HPChordLabeler.process_training_options(options)
     
     # Work out what kind of input data we've got
     # It should be a bulk input type: check what type the first input is
     input_type = detect_input_type(data[0], allowed=['segmidi', 'db-annotated'])
     
     logger.info(">>> Beginning training of HP chord labeler model '%s'" % name)
     # If we got midi tagger training data, it may include chord data as well
     if isinstance(data, MidiTaggerTrainingBulkInput) and \
                                             data.chords is not None:
         if chord_data is None:
             # Use the chord data in the input data
             logger.info("Midi training data; chord corpus data available")
             chord_inputs = data.chords
         else:
             # Use the chord data that was given explicitly
             chord_inputs = chord_data
         midi_inputs = data
     elif isinstance(data, DbBulkInput):
         logger.info("Only chord corpus training data")
         # This was only chord input, no midi data
         chord_inputs = data
         midi_inputs = None
     else:
         chord_inputs = chord_data
         # Presumably this is another form of midi training data
         midi_inputs = data
         logger.info("Midi training data; no chord data was included")
     
     # Get the chord vocab from the options
     logger.info("Model chord vocabulary: %s" % options['vocab'])
     vocab, vocab_mapping = CHORD_VOCABS[options['vocab']]
     
     # Initialize a model according to the chord types
     logger.info("Initializing emission distributions to favour chord "\
                 "notes with chord probability %s" % (options['chordprob']))
     model = HPChordLabeler.initialize_chords(options['chordprob'], \
                                         options['maxnotes'], vocab, \
                                         vocab_mapping, name=name)
     
     # If we have chord training data, use this to train the transition dist
     if chord_inputs is not None:
         logger.info("Training using chord data")
         
         # Construct the trees implicit in the annotations to get the 
         #  key of every chord
         logger.info("Preparing key data for annotated chord sequences")
         input_keys = [keys_for_sequence(dbinput) for dbinput in chord_inputs]
         
         # Run the supervised training of the transition distribution
         logger.info("Training transition distribution on chord sequences")
         model.train_transition_distribution(chord_inputs, input_keys)
         
     if midi_inputs is not None:
         logger.info("Training using midi data")
         
         # Preprocess the midi inputs so they're ready for the model training
         emissions = [midi_to_emission_stream(seq, 
                                              remove_empty=False)[0] \
                         for seq in midi_inputs]
         
         # Use the midi data to train emission number dist
         logger.info("Training emission number distribution")
         model.train_emission_number_distribution(emissions)
         
         ####### EM unsupervised training on the midi data
         # Pull out the options to pass to the trainer
         # These are a subset of the model training options
         bw_opt_names = [opt.name for opt in HPBaumWelchTrainer.OPTIONS]
         bw_opts = dict([(name,val) for (name,val) in options.items() \
                                         if name in bw_opt_names])
         # Create a Baum-Welch trainer
         trainer = HPBaumWelchTrainer(model, bw_opts)
         # Do the Baum-Welch training
         model = trainer.train(emissions, logger=logger)
     logger.info("Training complete")
     
     return model