Пример #1
0
 def copy(self, mutable=False):
     """
     Creates a complete copy of the model, optionally making the 
     distributions mutable.
     
     """
     # Copy all the distributions
     initial_key_dist = prob_dist_to_dictionary_prob_dist(
                         self.initial_key_dist, mutable=mutable)
     initial_chord_dist = prob_dist_to_dictionary_prob_dist(
                         self.initial_chord_dist, mutable=mutable)
     key_transition_dist = prob_dist_to_dictionary_prob_dist(
                         self.key_transition_dist, mutable=mutable)
     chord_transition_dist = cond_prob_dist_to_dictionary_cond_prob_dist(
                         self.chord_transition_dist, mutable=mutable)
     emission_dist = cond_prob_dist_to_dictionary_cond_prob_dist(
                         self.emission_dist, mutable=mutable)
     note_number_dist = prob_dist_to_dictionary_prob_dist(
                         self.note_number_dist, mutable=mutable)
     
     return HPChordLabeler(initial_key_dist, 
                           initial_chord_dist,
                           key_transition_dist,
                           chord_transition_dist,
                           emission_dist,
                           note_number_dist, 
                           self.chord_vocab, 
                           self.max_notes,
                           self.chord_corpus_mapping, 
                           history = self.history,
                           description = self.description,
                           name = self.model_name)
Пример #2
0
 def update_model(self, model):
     """
     Replaces the distributions of the saved model with those of the given 
     model and saves it.
     
     """
     # Replicate the distributions of the source model so that we get 
     #  non-mutable distributions to store
     self.model.schema_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.schema_transition_dist)
     self.model.root_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.root_transition_dist)
     self.model.emission_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.emission_dist)
     self.model.initial_state_dist = prob_dist_to_dictionary_prob_dist(
             model.initial_state_dist)
Пример #3
0
 def update_model(self, model):
     """
     Replaces the distributions of the saved model with those of the given 
     model and saves it.
     
     """
     # Replicate the distributions of the source model so that we get
     #  non-mutable distributions to store
     self.model.schema_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.schema_transition_dist)
     self.model.root_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.root_transition_dist)
     self.model.emission_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.emission_dist)
     self.model.initial_state_dist = prob_dist_to_dictionary_prob_dist(
         model.initial_state_dist)
Пример #4
0
 def __init__(self, label_dist, emission_dist, label_dom, emission_dom, \
                     mutable=False):
     """
     @type label_dist: nltk prob dist
     @param label_dist: transition distribution
     @type emission_dist: nltk prob dist
     @param emission_dist: emission distribution
     @type label_dom: list
     @param label_dom: state domain
     @type emission_dom: list
     @param emission_dom: emission domain
     @type mutable: bool
     @param mutable: if true, the distributions stored will be mutable 
         dictionary distributions, so the model can be updated
     
     """
     self.order = 2
     
     self.label_dom = label_dom
     self.num_labels = len(label_dom)
     self.emission_dom = emission_dom
     self.num_emissions = len(emission_dom)
     
     self.label_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
                             label_dist, mutable=mutable)
     self.emission_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
                             emission_dist, mutable=mutable)
     # Marginalize the emission dist to get an unconditioned version
     observations = {}
     for label in emission_dist.conditions():
         for samp in emission_dist[label].samples():
             observations[samp] = observations.get(samp, 0.0) + \
                     emission_dist[label].prob(samp)
     self.observation_dist = DictionaryProbDist(observations)
     self.seen_labels = label_dom
     
     self.backoff_model = None
     
     # Initialize the various caches
     # These will be filled as we access probabilities
     self.clear_cache()
Пример #5
0
    def __init__(self, label_dist, emission_dist, label_dom, emission_dom, \
                        mutable=False):
        """
        @type label_dist: nltk prob dist
        @param label_dist: transition distribution
        @type emission_dist: nltk prob dist
        @param emission_dist: emission distribution
        @type label_dom: list
        @param label_dom: state domain
        @type emission_dom: list
        @param emission_dom: emission domain
        @type mutable: bool
        @param mutable: if true, the distributions stored will be mutable 
            dictionary distributions, so the model can be updated
        
        """
        self.order = 2

        self.label_dom = label_dom
        self.num_labels = len(label_dom)
        self.emission_dom = emission_dom
        self.num_emissions = len(emission_dom)

        self.label_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
                                label_dist, mutable=mutable)
        self.emission_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
                                emission_dist, mutable=mutable)
        # Marginalize the emission dist to get an unconditioned version
        observations = {}
        for label in emission_dist.conditions():
            for samp in emission_dist[label].samples():
                observations[samp] = observations.get(samp, 0.0) + \
                        emission_dist[label].prob(samp)
        self.observation_dist = DictionaryProbDist(observations)
        self.seen_labels = label_dom

        self.backoff_model = None

        # Initialize the various caches
        # These will be filled as we access probabilities
        self.clear_cache()
Пример #6
0
 def update_model(self, model, save=True):
     """
     Replaces the distributions of the saved model with those of the given 
     model and saves it.
     
     @type save: bool
     @param save: save the model. Otherwise just updates the distributions.
     
     """
     self.model.key_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(
             model.key_transition_dist)
     self.model.chord_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(
             model.chord_transition_dist)
     self.model.emission_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.emission_dist)
     self.model.chord_dist = prob_dist_to_dictionary_prob_dist(
             model.chord_dist)
     if save:
         self.model.save()
Пример #7
0
 def update_model(self, model, save=True):
     """
     Replaces the distributions of the saved model with those of the given 
     model and saves it.
     
     @type save: bool
     @param save: save the model. Otherwise just updates the distributions.
     
     """
     self.model.key_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(
             model.key_transition_dist)
     self.model.chord_transition_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(
             model.chord_transition_dist)
     self.model.emission_dist = \
         cond_prob_dist_to_dictionary_cond_prob_dist(model.emission_dist)
     self.model.chord_dist = prob_dist_to_dictionary_prob_dist(
         model.chord_dist)
     if save:
         self.model.save()
Пример #8
0
 def train(self, emissions, logger=None, save_callback=None):
     """
     Performs unsupervised training using Baum-Welch EM.
     
     This is performed on a model that has already been initialized. 
     You might, for example, create such a model using 
     L{jazzparser.taggers.segmidi.chordclass.hmm.ChordClassHmm.initialize_chord_classes}.
     
     This is based on the training procedure in NLTK for HMMs:
     C{nltk.tag.hmm.HiddenMarkovModelTrainer.train_unsupervised}.
     
     @type emissions: L{jazzparser.data.input.MidiTaggerTrainingBulkInput} or 
         list of L{jazzparser.data.input.Input}s
     @param emissions: training MIDI data
     @type logger: logging.Logger
     @param logger: a logger to send progress logging to
     
     """
     if logger is None:
         from jazzparser.utils.loggers import create_dummy_logger
         logger = create_dummy_logger()
         
     self.model.add_history("Beginning Baum-Welch training on %s" % get_host_info_string())
     self.model.add_history("Training on %d MIDI sequences (with %s segments)" % \
         (len(emissions), ", ".join("%d" % len(seq) for seq in emissions)))
     logger.info("Beginning Baum-Welch training on %s" % get_host_info_string())
     
     # Get some options out of the module options
     max_iterations = self.options['max_iterations']
     convergence_logprob = self.options['convergence_logprob']
     split_length = self.options['split']
     truncate_length = self.options['truncate']
     save_intermediate = self.options['save_intermediate']
     processes = self.options['trainprocs']
     
     # Make a mutable distribution for each of the distributions 
     #  we'll be updating
     emission_mdist = cond_prob_dist_to_dictionary_cond_prob_dist(
                                 self.model.emission_dist, mutable=True)
     schema_trans_mdist = cond_prob_dist_to_dictionary_cond_prob_dist(
                                 self.model.schema_transition_dist, mutable=True)
     root_trans_mdist = cond_prob_dist_to_dictionary_cond_prob_dist(
                                 self.model.root_transition_dist, mutable=True)
     init_state_mdist = prob_dist_to_dictionary_prob_dist(
                                 self.model.initial_state_dist, mutable=True)
     
     # Get the sizes we'll need for the matrices
     num_schemata = len(self.model.schemata)
     num_root_changes = 12
     num_chord_classes = len(self.model.chord_classes)
     if self.model.metric:
         num_emission_conds = num_chord_classes * 4
     else:
         num_emission_conds = num_chord_classes
     num_emissions = 12
     
     # Enumerations to use for the matrices, so we know what they mean
     schema_ids = dict([(sch,i) for (i,sch) in enumerate(self.model.schemata+[None])])
     if self.model.metric:
         rs = range(4)
     else:
         rs = [0]
     emission_cond_ids = dict([(cc,i) for (i,cc) in enumerate(\
             sum([[
                 (str(cclass.name),r) for r in rs] for cclass in self.model.chord_classes], 
             []))])
     
     # Construct a model using these mutable distributions so we can 
     #  evaluate using them
     model = ChordClassHmm(schema_trans_mdist, 
                        root_trans_mdist, 
                        emission_mdist, 
                        self.model.emission_number_dist, 
                        init_state_mdist, 
                        self.model.schemata, 
                        self.model.chord_class_mapping,
                        self.model.chord_classes, 
                        metric=self.model.metric,
                        illegal_transitions=self.model.illegal_transitions,
                        fixed_root_transitions=self.model.fixed_root_transitions)
     
     def _save():
         if save_callback is None:
             logger.error("Could not save model, as no callback was given")
         else:
             # If the writing fails, wait till I've had a chance to sort it 
             #  out and then try again. This happens when my AFS token runs 
             #  out
             while True:
                 try:
                     save_callback()
                 except (IOError, OSError), err:
                     print "Error writing model to disk: %s. " % err
                     raw_input("Press <enter> to try again... ")
                 else:
                     break
Пример #9
0
 def train_transition_distribution(self, inputs, input_keys, 
                                                         chord_mapping=None):
     """
     Train the transition distribution parameters in a supervised manner, 
     using chord corpus input.
     
     This is used as an initialization step to set transition parameters 
     before running EM on unannotated data.
     
     @type inputs: L{jazzparser.data.input.AnnotatedDbBulkInput}
     @param inputs: annotated chord training data
     @type input_keys: list of lists of ints
     @param input_keys: the key associated with each chord. Should contain 
         a key list for each input sequence and each should be the length 
         of the chord sequence
     @type chord_mapping: dict
     @param chord_mapping: a mapping from the chord labels of the corpus to 
         those we will use for this model, so that we can use the training 
         data. See L{jazzparser.misc.chordlabel.chord_vocabs} for mappings 
         and use C{get_mapping} to prepare a dict from them. This doesn't 
         have to be the same as the mapping stored in the model 
         (C{model.chord_corpus_mapping}) and won't overwrite it. If not 
         given, the model's corpus mapping will be used
     
     """
     self.add_history(
             "Training transition probabilities using %d annotated chord "\
             "sequences" % len(inputs))
     
     if chord_mapping is None:
         chord_mapping = self.get_mapping_from_corpus()
     
     # Prepare the label sequences that we'll train on
     sequences = []
     for seq in inputs:
         sequence = []
         for chord in seq.chords:
             sequence.append((chord.root, chord.type, chord.duration))
         sequences.append(sequence)
     
     # Apply the mapping to the chord data
     sequences = [ \
         [(root, chord_mapping.get(label, label), duration) for \
             (root, label, duration) in sequence] for sequence in sequences]
     
     # Repeat values with a duration > 1
     rep_sequences = []
     for seq in sequences:
         sequence = []
         for root,label,duration in seq:
             # Put it in once for each duration
             for i in range(duration):
                 sequence.append((root,label))
         rep_sequences.append(sequence)
     
     # Count up the observations
     initial_chord_counts = FreqDist()
     key_transition_counts = FreqDist()
     chord_transition_counts = ConditionalFreqDist()
     
     for sequence,seq_keys in zip(rep_sequences, input_keys):
         # Count the initial events
         root0, label0 = sequence[0]
         key0 = seq_keys[0][1]
         initial_chord_counts.inc(((root0-key0)%12,label0))
         # Don't count the initial key distribution: leave that uniform
         
         last_relroot = (root0 - key0) % 12
         last_label = label0
         last_key = key0
         
         for (root,label),(chord,key) in zip(sequence[1:], seq_keys[1:]):
             key_change = (key - last_key) % 12
             key_transition_counts.inc(key_change)
             
             # Take the root relative to the key we're in
             relroot = (root-key) % 12
             chord_transition_counts[(last_relroot,last_label)].inc(\
                                                         (relroot,label))
             
             last_key = key
             last_relroot = relroot
             last_label = label
         # Note the transition to the final state from this last state
         chord_transition_counts[(last_relroot,last_label)].inc(None)
     
     # Build the correct domains of these distributions
     possible_chords = [(root,label) for root in range(12) for label in \
                                     list(sorted(self.chord_vocab.keys()))]
     
     # Estimate the prob dists from these counts
     initial_chord_dist = prob_dist_to_dictionary_prob_dist(\
                             laplace_estimator(initial_chord_counts, \
                                     len(possible_chords)),
                                 samples=possible_chords)
     key_transition_dist = prob_dist_to_dictionary_prob_dist(\
                             laplace_estimator(key_transition_counts, 12),
                                 samples=range(12))
     chord_transition_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
                             ConditionalProbDist(chord_transition_counts,
                                 laplace_estimator, len(possible_chords)+1),
                                     conditions=possible_chords,
                                     samples=possible_chords+[None])
     
     # Replace the model's transition distributions
     self.initial_chord_dist = initial_chord_dist
     self.key_transition_dist = key_transition_dist
     self.chord_transition_dist = chord_transition_dist
     # Invalidate the cache
     self.clear_cache()
Пример #10
0
    def train(self, emissions, logger=None, save_callback=None):
        """
        Performs unsupervised training using Baum-Welch EM.
        
        This is performed on a model that has already been initialized. 
        You might, for example, create such a model using 
        L{jazzparser.taggers.segmidi.chordclass.hmm.ChordClassHmm.initialize_chord_classes}.
        
        This is based on the training procedure in NLTK for HMMs:
        C{nltk.tag.hmm.HiddenMarkovModelTrainer.train_unsupervised}.
        
        @type emissions: L{jazzparser.data.input.MidiTaggerTrainingBulkInput} or 
            list of L{jazzparser.data.input.Input}s
        @param emissions: training MIDI data
        @type logger: logging.Logger
        @param logger: a logger to send progress logging to
        
        """
        if logger is None:
            from jazzparser.utils.loggers import create_dummy_logger
            logger = create_dummy_logger()

        self.model.add_history("Beginning Baum-Welch training on %s" %
                               get_host_info_string())
        self.model.add_history("Training on %d MIDI sequences (with %s segments)" % \
            (len(emissions), ", ".join("%d" % len(seq) for seq in emissions)))
        logger.info("Beginning Baum-Welch training on %s" %
                    get_host_info_string())

        # Get some options out of the module options
        max_iterations = self.options['max_iterations']
        convergence_logprob = self.options['convergence_logprob']
        split_length = self.options['split']
        truncate_length = self.options['truncate']
        save_intermediate = self.options['save_intermediate']
        processes = self.options['trainprocs']

        # Make a mutable distribution for each of the distributions
        #  we'll be updating
        emission_mdist = cond_prob_dist_to_dictionary_cond_prob_dist(
            self.model.emission_dist, mutable=True)
        schema_trans_mdist = cond_prob_dist_to_dictionary_cond_prob_dist(
            self.model.schema_transition_dist, mutable=True)
        root_trans_mdist = cond_prob_dist_to_dictionary_cond_prob_dist(
            self.model.root_transition_dist, mutable=True)
        init_state_mdist = prob_dist_to_dictionary_prob_dist(
            self.model.initial_state_dist, mutable=True)

        # Get the sizes we'll need for the matrices
        num_schemata = len(self.model.schemata)
        num_root_changes = 12
        num_chord_classes = len(self.model.chord_classes)
        if self.model.metric:
            num_emission_conds = num_chord_classes * 4
        else:
            num_emission_conds = num_chord_classes
        num_emissions = 12

        # Enumerations to use for the matrices, so we know what they mean
        schema_ids = dict([
            (sch, i) for (i, sch) in enumerate(self.model.schemata + [None])
        ])
        if self.model.metric:
            rs = range(4)
        else:
            rs = [0]
        emission_cond_ids = dict([(cc,i) for (i,cc) in enumerate(\
                sum([[
                    (str(cclass.name),r) for r in rs] for cclass in self.model.chord_classes],
                []))])

        # Construct a model using these mutable distributions so we can
        #  evaluate using them
        model = ChordClassHmm(
            schema_trans_mdist,
            root_trans_mdist,
            emission_mdist,
            self.model.emission_number_dist,
            init_state_mdist,
            self.model.schemata,
            self.model.chord_class_mapping,
            self.model.chord_classes,
            metric=self.model.metric,
            illegal_transitions=self.model.illegal_transitions,
            fixed_root_transitions=self.model.fixed_root_transitions)

        def _save():
            if save_callback is None:
                logger.error("Could not save model, as no callback was given")
            else:
                # If the writing fails, wait till I've had a chance to sort it
                #  out and then try again. This happens when my AFS token runs
                #  out
                while True:
                    try:
                        save_callback()
                    except (IOError, OSError), err:
                        print "Error writing model to disk: %s. " % err
                        raw_input("Press <enter> to try again... ")
                    else:
                        break
Пример #11
0
 def train_transition_distribution(self, inputs, grammar, contprob=0.3):
     """
     Train the transition distribution parameters in a supervised manner, 
     using chord corpus input.
     
     This is used as an initialization step to set transition parameters 
     before running EM on unannotated data.
     
     @type inputs: L{jazzparser.data.input.AnnotatedDbBulkInput}
     @param inputs: annotated chord training data
     @type contprob: float or string
     @param contprob: probability mass to reserve for staying on the 
         same state (self transitions). Use special value 'learn' to 
         learn the probabilities from the durations
     
     """
     self.add_history(
             "Training transition probabilities using %d annotated chord "\
             "sequences" % len(inputs))
     learn_cont = contprob == "learn"
     
     # Prepare the label sequences that we'll train on
     if learn_cont:
         # Repeat values with a duration > 1
         sequences = []
         for seq in inputs:
             sequence = []
             last_cat = None
             for chord,cat in zip(seq, seq.categories):
                 # Put it in once for each duration
                 for i in range(chord.duration):
                     sequence.append((chord,cat))
             sequences.append(sequence)
     else:
         sequences = [list(zip(sequence, sequence.categories)) for \
                                 sequence in inputs]
     
     # Prepare a list of transformations to apply to the categories
     label_transform = {}
     # First include all the categories we want to keep as they were
     for schema in self.schemata:
         label_transform[schema] = (schema, 0)
     # Then include any transformations the grammar defines
     for pos,mapping in grammar.equiv_map.items():
         label_transform[pos] = (mapping.target.pos, mapping.root)
     
     # Apply the transformation to all the training data
     training_samples = []
     for chord_cats in sequences:
         seq_samples = []
         for chord,cat in chord_cats:
             # Transform the label if it has a transformation
             if cat in label_transform:
                 use_cat, alter_root = label_transform[cat]
             else:
                 use_cat, alter_root = cat, 0
             root = (chord.root + alter_root) % 12
             seq_samples.append((str(use_cat), root))
         training_samples.append(seq_samples)
     
     training_data = sum([
         [(cat0, cat1, (root1 - root0) % 12)
                 for ((cat0,root0),(cat1,root1)) in \
                     group_pairs(seq_samples)] \
             for seq_samples in training_samples], [])
     
     # Count up the observations
     schema_transition_counts = ConditionalFreqDist()
     root_transition_counts = ConditionalFreqDist()
     for (label0, label1, root_change) in training_data:
         # Only use counts for categories the model's looking for
         if label0 in self.schemata and label1 in self.schemata:
             schema_transition_counts[label0].inc(label1)
             root_transition_counts[(label0,label1)].inc(root_change)
     
     # Transition probability to final state (end of sequence)
     for sequence in training_samples:
         # Inc the count of going from the label the sequence ends on to 
         #  the final state
         schema_transition_counts[sequence[-1][0]].inc(None)
         
     # Use Laplace (plus one) smoothing
     # We don't use the laplace_estimator because we want the conversion 
     #  to a dict prob dist to get all the labels, not just to discount 
     #  the ones it's seen
     for label0 in self.schemata:
         for label1 in self.schemata:
             for root_change in range(12):
                 # Exclude self-transition for now, unless we're learning it
                 if learn_cont or not (label0 == label1 and root_change == 0):
                     schema_transition_counts[label0].inc(label1)
                     root_transition_counts[(label0,label1)].inc(root_change)
             # We don't add a count for going to the final state: we don't 
             #  want to initialize it with too much weight
     
     # Estimate distribution from this frequency distribution
     schema_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
             ConditionalProbDist(schema_transition_counts, mle_estimator, None), \
                 mutable=True, samples=self.schemata+[None])
     root_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\
             ConditionalProbDist(root_transition_counts, mle_estimator, None), \
                 mutable=True, samples=range(12))
     
     if not learn_cont:
         # Discount all probabilities to allow for self-transition probs
         discount = logprob(1.0 - contprob)
         self_prob = logprob(contprob)
         for label0 in self.schemata:
             # Give saved prob mass to self-transitions
             trans_dist[label0].update((label0, 0), self_prob)
             
             # Discount all other transitions to allow for this
             for label1 in self.schemata:
                 for root_change in range(12):
                     if not (label0 == label1 and root_change == 0):
                         # Discount non self transitions
                         trans_dist[label0].update((label1, root_change), \
                             trans_dist[label0].logprob((label1, root_change)) + \
                             discount)
     
     # Recreate the dict prob dist so it's not mutable any more
     schema_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(schema_trans_dist)
     root_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(root_trans_dist)
     
     ## Now for the initial distribution
     # Count up the observations
     initial_counts = FreqDist()
     for sequence in training_samples:
         initial_counts.inc(sequence[0][0])
     # Use Laplace (plus one) smoothing
     #for label in self.schemata:
     #    initial_counts.inc(label)
     
     # Estimate distribution from this frequency distribution
     initial_dist = prob_dist_to_dictionary_prob_dist(\
                 mle_estimator(initial_counts, None), samples=self.schemata)
     
     # Replace the model's transition distributions
     self.schema_transition_dist = schema_trans_dist
     self.root_transition_dist = root_trans_dist
     self.initial_state_dist = initial_dist
     # Invalidate the cache
     self.clear_cache()
Пример #12
0
 def initialize_chord_classes(cls, tetrad_prob, max_notes, grammar, \
         illegal_transitions=[], fixed_root_transitions={}, metric=False):
     """
     Creates a new model with the distributions initialized naively to 
     favour simple chord-types, in a similar way to what R&S do in the paper. 
     
     The transition distribution is initialized so that everything is 
     equiprobable.
     
     @type tetrad_prob: float
     @param tetrad_prob: prob of a note in the tetrad. This prob is 
         distributed over the notes of the tetrad. The remaining prob 
         mass is distributed over the remaining notes. You'll want this 
         to be >0.33, so that tetrad notes are more probable than others.
     @type max_notes: int
     @param max_notes: maximum number of notes that can be generated in 
         each emission. Usually best to set to something high, like 100 - 
         it's just to make the distribution finite.
     @type grammar: L{jazzparser.grammar.Grammar}
     @param grammar: grammar from which to take the chord class definitions
     @type metric: bool
     @param metric: if True, creates a model with a metrical component 
         (dependence on metrical position). Default False
     
     """
     # Only use chord classes that are used by some morph item in the lexicon
     classes = [ccls for ccls in grammar.chord_classes.values() if ccls.used]
     
     # Create a probability distribution for the emission distribution
     dists = {}
     
     # Create the distribution for each possible r-value if we're creating 
     #  a metrical model
     if metric:
         r_vals = range(4)
     else:
         r_vals = [0]
     # Separate emission distribution for each chord class
     for ccls in classes:
         for r in r_vals:
             probabilities = {}
             # We assign two different probabilities: in tetrad or out
             # Don't assume the tetrad has 4 notes!
             in_tetrad_prob = tetrad_prob / len(ccls.notes)
             out_tetrad_prob = (1.0 - tetrad_prob) / (12 - len(ccls.notes))
             # Give a probability to every pitch class
             for d in range(12):
                 if d in ccls.notes:
                     probabilities[d] = in_tetrad_prob
                 else:
                     probabilities[d] = out_tetrad_prob
             dists[(ccls.name,r)] = DictionaryProbDist(probabilities)
     emission_dist = DictionaryConditionalProbDist(dists)
     
     # Take the state labels from the lexical entries in the grammar
     # Include only tonic categories that were generated from lexical 
     #  expansion rules - i.e. only tonic repetition categories
     schemata = grammar.midi_families.keys()
     
     # Check that the transition constraint specifications refer to existing 
     #  schemata
     for labels in illegal_transitions:
         for label in labels:
             if label not in schemata:
                 raise ValueError, "%s, given in illegal transition "\
                     "specification, is not a valid schema in the grammar" \
                     % label
     for labels in fixed_root_transitions:
         for label in labels:
             if label not in schemata:
                 raise ValueError, "%s, given in fixed root transition "\
                     "specification, is not a valid schema in the grammar" \
                     % label
     
     # Build from the grammar a mapping from lexical schemata (POSs) to 
     #  chord classes
     chord_class_mapping = {}
     for morph in grammar.morphs:
         if morph.pos in schemata:
             chord_class_mapping.setdefault(morph.pos, []).append(str(morph.chord_class.name))
     # Make sure that every label appears in the mapping
     for label in schemata:
         if label not in chord_class_mapping:
             chord_class_mapping[label] = []
     
     # Initialize transition distribution so every transition is equiprobable
     schema_transition_counts = ConditionalFreqDist()
     root_transition_counts = ConditionalFreqDist()
     for label0 in schemata:
         for label1 in schemata:
             # Increment the count once for each chord class associated 
             #  with this schema: schemata with 2 chord classes get 2 
             #  counts
             for cclass in chord_class_mapping[label1]:
                 schema_transition_counts[label0].inc(label1)
                 for root_change in range(12):
                     # Give one count to the root transition corresponding to this state transition
                     root_transition_counts[(label0,label1)].inc(root_change)
         # Give a count to finishing in this state
         schema_transition_counts[label0].inc(None)
     # Estimate distribution from this frequency distribution
     schema_trans_dist = ConditionalProbDist(schema_transition_counts, mle_estimator, None)
     root_trans_dist = ConditionalProbDist(root_transition_counts, mle_estimator, None)
     # Sample this to get dictionary prob dists
     schema_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(schema_trans_dist)
     root_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(root_trans_dist)
     
     # Do the same with the initial states (just schemata, not roots)
     initial_state_counts = FreqDist()
     for label in schemata:
         initial_state_counts.inc(label)
     initial_state_dist = mle_estimator(initial_state_counts, None)
     initial_state_dist = prob_dist_to_dictionary_prob_dist(initial_state_dist)
     
     # Also initialize the notes number distribution to uniform
     emission_number_counts = FreqDist()
     for i in range(max_notes):
         emission_number_counts.inc(i)
     emission_number_dist = mle_estimator(emission_number_counts, None)
     emission_number_dist = prob_dist_to_dictionary_prob_dist(emission_number_dist)
     
     # Create the model
     model = cls(schema_trans_dist, 
                   root_trans_dist, 
                   emission_dist, 
                   emission_number_dist, 
                   initial_state_dist, 
                   schemata, 
                   chord_class_mapping, 
                   classes, 
                   metric=metric, 
                   illegal_transitions=illegal_transitions,
                   fixed_root_transitions=fixed_root_transitions)
     model.add_history(\
         "Initialized model to chord type probabilities, using "\
         "tetrad probability %s. Metric: %s" % \
         (tetrad_prob, metric))
     
     return model
Пример #13
0
 def initialize_chord_types(cls, probs, model_name="default", chord_set="scale+dom7"):
     """
     Creates a new model with the distributions initialized naively to 
     favour simple chord-types, as R&S do in the paper. They don't say 
     what values they use for C{probs}, except that they're high, medium 
     and low respectively.
     
     The transition distribution is initialized so that everything is 
     equiprobable.
     
     @type probs: 3-tuple of floats
     @param probs: probability mass to assign to (0.) chord notes, (1.) 
         scale notes and (2.) other notes. The three values should sum to
         1.0 (but will be normalized to if they don't)
     
     """
     prob_sum = sum(probs)
     probs = [p/prob_sum for p in probs]
     
     # Create a probability distribution for the emission 
     #  distribution
     dists = {}
     # Create the distribution for each possible r-value
     for r in range(4):
         probabilities = {}
         for d in [0,1,2]:
             probabilities[d] = probs[0]/3.0
         probabilities[3] = probs[1]
         probabilities[4] = probs[2]
         dists[r] = DictionaryProbDist(probabilities)
     emission_dist = DictionaryConditionalProbDist(dists)
     
     # These distributions will make everything equiprobable
     key_transition_counts = ConditionalFreqDist()
     chord_transition_counts = ConditionalFreqDist()
     chord_counts = {}
     # Get all possible labels
     label_dom = cls.get_label_dom(chord_set=chord_set)
     
     for label0 in label_dom:
         for label1 in label_dom:
             key,pkey = states_to_key_transition(label1, label0)
             # Give one count to the key transition corresponding to this state transition
             key_transition_counts[pkey].inc(key)
             # And one to the chord transition corresponding to this state transition
             if label0[0] == label1[0] and label0[1] == label1[1]:
                 # tonic = tonic', mode = mode'
                 chord_transition_counts[label0[2]].inc(label1[2])
             else:
                 chord_counts.setdefault(label1[2], 0)
                 chord_counts[label1[2]] += 1
     
     # Estimate distributions from these frequency distributions
     key_dist = ConditionalProbDist(key_transition_counts, mle_estimator, None)
     chord_trans_dist = ConditionalProbDist(chord_transition_counts, mle_estimator, None)
     chord_dist = DictionaryProbDist(chord_counts)
     # Sample these to get dictionary prob dists
     key_dist = cond_prob_dist_to_dictionary_cond_prob_dist(key_dist)
     chord_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(chord_trans_dist)
     chord_dist = prob_dist_to_dictionary_prob_dist(chord_dist)
     
     model = cls(key_dist, \
                   chord_trans_dist, \
                   emission_dist, \
                   chord_dist, \
                   model_name=model_name,
                   chord_set=chord_set)
     model.add_history(\
         "Initialized model '%s' to chord type probabilities, using "\
         "parameters: %s, %s, %s" % (model_name, probs[0], probs[1], probs[2]))
     return model
Пример #14
0
    def initialize_chord_types(cls,
                               probs,
                               model_name="default",
                               chord_set="scale+dom7"):
        """
        Creates a new model with the distributions initialized naively to 
        favour simple chord-types, as R&S do in the paper. They don't say 
        what values they use for C{probs}, except that they're high, medium 
        and low respectively.
        
        The transition distribution is initialized so that everything is 
        equiprobable.
        
        @type probs: 3-tuple of floats
        @param probs: probability mass to assign to (0.) chord notes, (1.) 
            scale notes and (2.) other notes. The three values should sum to
            1.0 (but will be normalized to if they don't)
        
        """
        prob_sum = sum(probs)
        probs = [p / prob_sum for p in probs]

        # Create a probability distribution for the emission
        #  distribution
        dists = {}
        # Create the distribution for each possible r-value
        for r in range(4):
            probabilities = {}
            for d in [0, 1, 2]:
                probabilities[d] = probs[0] / 3.0
            probabilities[3] = probs[1]
            probabilities[4] = probs[2]
            dists[r] = DictionaryProbDist(probabilities)
        emission_dist = DictionaryConditionalProbDist(dists)

        # These distributions will make everything equiprobable
        key_transition_counts = ConditionalFreqDist()
        chord_transition_counts = ConditionalFreqDist()
        chord_counts = {}
        # Get all possible labels
        label_dom = cls.get_label_dom(chord_set=chord_set)

        for label0 in label_dom:
            for label1 in label_dom:
                key, pkey = states_to_key_transition(label1, label0)
                # Give one count to the key transition corresponding to this state transition
                key_transition_counts[pkey].inc(key)
                # And one to the chord transition corresponding to this state transition
                if label0[0] == label1[0] and label0[1] == label1[1]:
                    # tonic = tonic', mode = mode'
                    chord_transition_counts[label0[2]].inc(label1[2])
                else:
                    chord_counts.setdefault(label1[2], 0)
                    chord_counts[label1[2]] += 1

        # Estimate distributions from these frequency distributions
        key_dist = ConditionalProbDist(key_transition_counts, mle_estimator,
                                       None)
        chord_trans_dist = ConditionalProbDist(chord_transition_counts,
                                               mle_estimator, None)
        chord_dist = DictionaryProbDist(chord_counts)
        # Sample these to get dictionary prob dists
        key_dist = cond_prob_dist_to_dictionary_cond_prob_dist(key_dist)
        chord_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(
            chord_trans_dist)
        chord_dist = prob_dist_to_dictionary_prob_dist(chord_dist)

        model = cls(key_dist, \
                      chord_trans_dist, \
                      emission_dist, \
                      chord_dist, \
                      model_name=model_name,
                      chord_set=chord_set)
        model.add_history(\
            "Initialized model '%s' to chord type probabilities, using "\
            "parameters: %s, %s, %s" % (model_name, probs[0], probs[1], probs[2]))
        return model