Exemplo n.º 1
0
    def _assemble_batch(self, melody, chords, with_correct=True):
        encoded_melodies = [[] for _ in self.encodings]
        relative_posns = [[] for _ in self.encodings]
        correct_notes = []
        chord_roots = []
        chord_types = []
        for m, c in zip(melody, chords):
            m = leadsheet.constrain_melody(m, self.bounds)
            for i, encoding in enumerate(self.encodings):
                e_m, r_p = encoding.encode_melody_and_position(m, c)
                encoded_melodies[i].append(e_m)
                relative_posns[i].append(r_p)
            correct_notes.append(
                Encoding.encode_absolute_melody(m, self.bounds.lowbound,
                                                self.bounds.highbound))
            c_roots, c_types = zip(*c)
            chord_roots.append(c_roots)
            chord_types.append(c_types)

        retlist = [
            np.array(chord_types, np.float32),
            np.array(chord_roots, np.int32)
        ]
        if with_correct:
            retlist.append(np.array(correct_notes, np.int32))
        retlist.extend(np.array(x, np.int32) for x in relative_posns)
        retlist.extend(np.array(x, np.int32) for x in encoded_melodies)
        return retlist
Exemplo n.º 2
0
        def _build(det_dropout):
            all_out_probs = []
            for encoding, lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.lstmstacks, encoded_melodies, relative_posns):
                activations = lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) ,
                                                            relative_position=relative_pos,
                                                            cur_chord_type=chord_types,
                                                            cur_chord_root=chord_roots,
                                                            last_output=T.concatenate([T.tile(encoding.initial_encoded_form(), (n_batch,1,1)),
                                                                                encoded_melody[:,:-1,:] ], 1),
                                                            deterministic_dropout=det_dropout)

                out_probs = encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound)
                all_out_probs.append(out_probs)
            reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs)
            if self.normalize_artic_only:
                non_artic_probs = reduced_out_probs[:,:,:2]
                artic_probs = reduced_out_probs[:,:,2:]
                non_artic_sum = T.sum(non_artic_probs, 2, keepdims=True)
                artic_sum = T.sum(artic_probs, 2, keepdims=True)
                norm_artic_probs = artic_probs*(1-non_artic_sum)/artic_sum
                norm_out_probs = T.concatenate([non_artic_probs, norm_artic_probs], 2)
            else:
                normsum = T.sum(reduced_out_probs, 2, keepdims=True)
                normsum = T.maximum(normsum, constants.EPSILON)
                norm_out_probs = reduced_out_probs/normsum
            return Encoding.compute_loss(norm_out_probs, correct_notes, True)
 def decode(self, chords, feat_strengths, feat_vects):
     assert self.decode_fun is not None, "Need to call setup_decode before decode"
     chord_roots = []
     chord_types = []
     for c in chords:
         c_roots, c_types = zip(*c)
         chord_roots.append(c_roots)
         chord_types.append(c_types)
     chosen = self.decode_fun(np.array(chord_roots, np.int32), np.array(chord_types, np.float32), feat_strengths, feat_vects)
     return [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
Exemplo n.º 4
0
    def _scan_fn(*inputs):
        # inputs is [ spec_sequences..., last_absolute_position, spec_taps..., spec_non_sequences... ]
        inputs = list(inputs)

        partitioned_inputs = [[] for _ in specs]
        for cur_part, spec in zip(partitioned_inputs, specs):
            cur_part.extend(inputs[:len(spec.sequences)])
            del inputs[:len(spec.sequences)]
        last_absolute_chosen = inputs.pop(0)
        for cur_part, spec in zip(partitioned_inputs, specs):
            cur_part.extend(inputs[:spec.num_taps])
            del inputs[:spec.num_taps]
        for cur_part, spec in zip(partitioned_inputs, specs):
            cur_part.extend(inputs[:len(spec.non_sequences)])
            del inputs[:len(spec.non_sequences)]

        scan_routs = [ lstmstack.sample_scan_routine(spec, *p_input) for lstmstack,spec,p_input in zip(lstmstacks, specs, partitioned_inputs) ]
        new_posns = []
        all_out_probs = []
        for scan_rout, encoding in zip(scan_routs, encodings):
            last_rel_pos, last_out, cur_kwargs = scan_rout.send(None)

            new_pos = encoding.get_new_relative_position(last_absolute_chosen, last_rel_pos, last_out, bounds.lowbound, bounds.highbound, **cur_kwargs)
            new_posns.append(new_pos)
            addtl_kwargs = {
                "last_output": last_out
            }

            out_activations = scan_rout.send((new_pos, addtl_kwargs))
            out_probs = encoding.decode_to_probs(out_activations,new_pos,bounds.lowbound, bounds.highbound)
            all_out_probs.append(out_probs)

        reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs)
        if normalize_artic_only:
            non_artic_probs = reduced_out_probs[:,:2]
            artic_probs = reduced_out_probs[:,2:]
            non_artic_sum = T.sum(non_artic_probs, 1, keepdims=True)
            artic_sum = T.sum(artic_probs, 1, keepdims=True)
            norm_artic_probs = artic_probs*(1-non_artic_sum)/artic_sum
            norm_out_probs = T.concatenate([non_artic_probs, norm_artic_probs], 1)
        else:
            normsum = T.sum(reduced_out_probs, 1, keepdims=True)
            normsum = T.maximum(normsum, constants.EPSILON)
            norm_out_probs = reduced_out_probs/normsum

        sampled_note = Encoding.sample_absolute_probs(srng, norm_out_probs)

        outputs = []
        for scan_rout, encoding, new_pos in zip(scan_routs, encodings, new_posns):
            encoded_output = encoding.note_to_encoding(sampled_note, new_pos, bounds.lowbound, bounds.highbound)
            scan_outputs = scan_rout.send(encoded_output)
            scan_rout.close()
            outputs.extend(scan_outputs)

        return [sampled_note, norm_out_probs] + all_out_probs + outputs
Exemplo n.º 5
0
    def generate(self, chords):
        assert self.generate_fun is not None, "Need to call setup_generate before generate"

        chord_roots = []
        chord_types = []
        for c in chords:
            c_roots, c_types = zip(*c)
            chord_roots.append(c_roots)
            chord_types.append(c_types)
        chosen = self.generate_fun(np.array(chord_roots, np.int32),np.array(chord_types, np.float32))
        return [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
        def _build(det_dropout):
            activations = self.lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) ,
                                                             relative_position=relative_pos,
                                                             cur_chord_type=chord_types,
                                                             cur_chord_root=chord_roots,
                                                             last_output=T.concatenate([T.tile(self.encoding.initial_encoded_form(), (n_batch,1,1)),
                                                                                   encoded_melody[:,:-1,:] ], 1),
                                                             deterministic_dropout=det_dropout)

            out_probs = self.encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound)
            return Encoding.compute_loss(out_probs, correct_notes, True)
    def generate_visualize(self, chords):
        assert self.generate_fun is not None, "Need to call setup_generate before generate"
        chord_roots = []
        chord_types = []
        for c in chords:
            c_roots, c_types = zip(*c)
            chord_roots.append(c_roots)
            chord_types.append(c_types)
        chosen, all_probs = self.generate_visualize_fun(chord_roots, chord_types)

        melody = [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
        return melody, chosen, all_probs
Exemplo n.º 8
0
    def generate_visualize(self, chords):
        assert self.generate_fun is not None, "Need to call setup_generate before generate"
        chord_roots = []
        chord_types = []
        for c in chords:
            c_roots, c_types = zip(*c)
            chord_roots.append(c_roots)
            chord_types.append(c_types)
        stuff = self.generate_visualize_fun(chord_roots, chord_types)
        chosen, all_probs = stuff[:2]

        melody = [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
        return melody, chosen, all_probs, stuff[2:]
    def decode_visualize(self, chords, feat_strengths, feat_vects):
        assert self.decode_visualize_fun is not None, "Need to call setup_decode before decode_visualize"
        chord_roots = []
        chord_types = []
        for c in chords:
            c_roots, c_types = zip(*c)
            chord_roots.append(c_roots)
            chord_types.append(c_types)
        stuff = self.decode_visualize_fun(np.array(chord_roots, np.int32), np.array(chord_types, np.float32), feat_strengths, feat_vects)
        chosen, all_probs = stuff[:2]

        melody = [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
        return melody, chosen, all_probs, stuff[2:]
Exemplo n.º 10
0
    def decode_visualize(self, chords, feat_strengths, feat_vects):
        assert self.decode_visualize_fun is not None, "Need to call setup_decode before decode_visualize"
        chord_roots = []
        chord_types = []
        for c in chords:
            c_roots, c_types = zip(*c)
            chord_roots.append(c_roots)
            chord_types.append(c_types)
        stuff = self.decode_visualize_fun(np.array(chord_roots, np.int32),
                                          np.array(chord_types, np.float32),
                                          feat_strengths, feat_vects)
        chosen, all_probs = stuff[:2]

        melody = [
            Encoding.decode_absolute_melody(c, self.bounds.lowbound,
                                            self.bounds.highbound)
            for c in chosen
        ]
        return melody, chosen, all_probs, stuff[2:]
 def _assemble_batch(self, melody, chords):
     encoded_melody = []
     relative_pos = []
     correct_notes = []
     chord_roots = []
     chord_types = []
     for m,c in zip(melody,chords):
         m = leadsheet.constrain_melody(m, self.bounds)
         e_m, r_p = self.encoding.encode_melody_and_position(m,c)
         encoded_melody.append(e_m)
         relative_pos.append(r_p)
         correct_notes.append(Encoding.encode_absolute_melody(m, self.bounds.lowbound, self.bounds.highbound))
         c_roots, c_types = zip(*c)
         chord_roots.append(c_roots)
         chord_types.append(c_types)
     return (np.array(chord_types, np.float32),
             np.array(chord_roots, np.int32),
             np.array(relative_pos, np.int32),
             np.array(encoded_melody, np.float32),
             np.array(correct_notes, np.int32))
 def _assemble_batch(self, melody, chords):
     encoded_melodies = [[] for _ in self.encodings]
     relative_posns = [[] for _ in self.encodings]
     correct_notes = []
     chord_roots = []
     chord_types = []
     for m,c in zip(melody,chords):
         m = leadsheet.constrain_melody(m, self.bounds)
         for i,encoding in enumerate(self.encodings):
             e_m, r_p = encoding.encode_melody_and_position(m,c)
             encoded_melodies[i].append(e_m)
             relative_posns[i].append(r_p)
         correct_notes.append(Encoding.encode_absolute_melody(m, self.bounds.lowbound, self.bounds.highbound))
         c_roots, c_types = zip(*c)
         chord_roots.append(c_roots)
         chord_types.append(c_types)
     return ([np.array(chord_types, np.float32),
              np.array(chord_roots, np.int32),
              np.array(correct_notes, np.int32)]
             + [np.array(x, np.int32) for x in relative_posns]
             + [np.array(x, np.int32) for x in encoded_melodies])
        def _scan_fn(*inputs):
            # inputs is [ spec_sequences..., last_absolute_position, spec_taps..., spec_non_sequences... ]
            inputs = list(inputs)
            last_absolute_chosen = inputs.pop(len(spec.sequences))
            scan_rout = self.lstmstack.sample_scan_routine(spec, *inputs)

            last_rel_pos, last_out, cur_kwargs = scan_rout.send(None)

            new_pos = self.encoding.get_new_relative_position(last_absolute_chosen, last_rel_pos, last_out, self.bounds.lowbound, self.bounds.highbound, **cur_kwargs)
            addtl_kwargs = {
                "last_output": last_out
            }

            out_activations = scan_rout.send((new_pos, addtl_kwargs))
            out_probs = self.encoding.decode_to_probs(out_activations,new_pos,self.bounds.lowbound, self.bounds.highbound)
            sampled_note = Encoding.sample_absolute_probs(self.srng, out_probs)
            encoded_output = self.encoding.note_to_encoding(sampled_note, new_pos, self.bounds.lowbound, self.bounds.highbound)
            scan_outputs = scan_rout.send(encoded_output)
            scan_rout.close()

            return [sampled_note, out_probs] + scan_outputs
        def _build(det_dropout):
            all_activations = []
            for encoding, enc_lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.enc_lstmstacks, encoded_melodies, relative_posns):
                activations = enc_lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) ,
                                                            relative_position=relative_pos,
                                                            cur_chord_type=chord_types,
                                                            cur_chord_root=chord_roots,
                                                            cur_input=encoded_melody,
                                                            deterministic_dropout=det_dropout)
                all_activations.append(activations)
            reduced_activations = functools.reduce((lambda x,y: x+y), all_activations)
            queue_loss, feat_strengths, feat_vects, queue_info = self.qman.process(reduced_activations, extra_info=True)
            features = QueueManager.queue_transform(feat_strengths, feat_vects)

            all_out_probs = []
            for encoding, dec_lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.dec_lstmstacks, encoded_melodies, relative_posns):
                activations = dec_lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) ,
                                                            relative_position=relative_pos,
                                                            cur_chord_type=chord_types,
                                                            cur_chord_root=chord_roots,
                                                            cur_feature=features,
                                                            last_output=T.concatenate([T.tile(encoding.initial_encoded_form(), (n_batch,1,1)),
                                                                                encoded_melody[:,:-1,:] ], 1),
                                                            deterministic_dropout=det_dropout)
                out_probs = encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound)
                all_out_probs.append(out_probs)

            reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs)
            normsum = T.sum(reduced_out_probs, 2, keepdims=True)
            normsum = T.maximum(normsum, constants.EPSILON)
            norm_out_probs = reduced_out_probs/normsum
            reconstruction_loss, reconstruction_info = Encoding.compute_loss(norm_out_probs, correct_notes, extra_info=True)

            queue_surrogate_loss_parts = self.qman.surrogate_loss(reconstruction_loss, queue_info)

            updates = []
            full_info = queue_info.copy()
            full_info.update(reconstruction_info)
            full_info["queue_loss"] = queue_loss
            full_info["reconstruction_loss"] = reconstruction_loss

            float_n_batch = T.cast(n_batch,'float32')
            if self.loss_mode is "add":
                full_loss = queue_loss + reconstruction_loss
            elif self.loss_mode is "priority":
                curviness = np.array(self.loss_mode_params[0], np.float32)*float_n_batch
                # ln( e^x + e^y - 1 )
                # ln( C(e^x + e^y - 1) ) - ln(C)
                # ln( e^c(e^x + e^y - 1) ) - c
                # ln( e^(x+c) + e^(y+c) - e^c ) - c
                # ln( e^(x-c) + e^(y-c) - e^(-c) ) + c
                # Now let c = maximum(x,y), d = minimum(x,y). WOLOG replace x=c, y=d
                # ln( e^(c-c) + e^(d-c) - e^(-c) ) + c
                # ln( 1 + e^(d-c) - e^(-c) ) + c
                x = reconstruction_loss/curviness
                y = queue_loss/curviness
                c = T.maximum(x,y)
                d = T.minimum(x,y)
                full_loss = (T.log( 1 + T.exp(d-c) - T.exp(-c)) + c)*curviness
            elif self.loss_mode is "cutoff":
                cutoff_val = np.array(self.loss_mode_params[0], np.float32)
                full_loss = T.switch(reconstruction_loss<cutoff_val*float_n_batch, reconstruction_loss+queue_loss, reconstruction_loss)
            elif self.loss_mode is "trigger":
                trigger_val = np.array(self.loss_mode_params[0], np.float32)
                trigger_speed = np.array(1.0/self.loss_mode_params[1], np.float32)
                trigger_is_on = theano.shared(np.array(0, np.int8))
                trigger_scale = theano.shared(np.array(0.0, np.float32))
                full_loss = reconstruction_loss + trigger_scale * queue_loss
                updates.append((trigger_is_on, T.or_(trigger_is_on, reconstruction_loss<trigger_val*float_n_batch)))
                updates.append((trigger_scale, T.switch(trigger_is_on, T.minimum(trigger_scale + trigger_speed, np.array(1.0,np.float32)), np.array(0.0,np.float32))))
                full_info["trigger_scale"] = trigger_scale

            if queue_surrogate_loss_parts is not None:
                surrogate_loss, addtl_updates = queue_surrogate_loss_parts
                full_loss = full_loss + surrogate_loss
                updates.extend(addtl_updates)
                full_info["surrogate_loss"] = surrogate_loss

            return full_loss, full_info, updates
Exemplo n.º 15
0
        def _build(det_dropout):
            all_activations = []
            for encoding, enc_lstmstack, encoded_melody, relative_pos in zip(
                    self.encodings, self.enc_lstmstacks, encoded_melodies,
                    relative_posns):
                activations = enc_lstmstack.do_preprocess_scan(
                    timestep=T.tile(T.arange(n_time), (n_batch, 1)),
                    relative_position=relative_pos,
                    cur_chord_type=chord_types,
                    cur_chord_root=chord_roots,
                    cur_input=encoded_melody,
                    deterministic_dropout=det_dropout)
                all_activations.append(activations)
            reduced_activations = functools.reduce((lambda x, y: x + y),
                                                   all_activations)
            queue_loss, feat_strengths, feat_vects, queue_info = self.qman.process(
                reduced_activations, extra_info=True)
            features = QueueManager.queue_transform(feat_strengths, feat_vects)

            all_out_probs = []
            for encoding, dec_lstmstack, encoded_melody, relative_pos in zip(
                    self.encodings, self.dec_lstmstacks, encoded_melodies,
                    relative_posns):
                activations = dec_lstmstack.do_preprocess_scan(
                    timestep=T.tile(T.arange(n_time), (n_batch, 1)),
                    relative_position=relative_pos,
                    cur_chord_type=chord_types,
                    cur_chord_root=chord_roots,
                    cur_feature=features,
                    last_output=T.concatenate([
                        T.tile(encoding.initial_encoded_form(),
                               (n_batch, 1, 1)), encoded_melody[:, :-1, :]
                    ], 1),
                    deterministic_dropout=det_dropout)
                out_probs = encoding.decode_to_probs(activations, relative_pos,
                                                     self.bounds.lowbound,
                                                     self.bounds.highbound)
                all_out_probs.append(out_probs)

            reduced_out_probs = functools.reduce((lambda x, y: x * y),
                                                 all_out_probs)
            normsum = T.sum(reduced_out_probs, 2, keepdims=True)
            normsum = T.maximum(normsum, constants.EPSILON)
            norm_out_probs = reduced_out_probs / normsum
            reconstruction_loss, reconstruction_info = Encoding.compute_loss(
                norm_out_probs, correct_notes, extra_info=True)

            queue_surrogate_loss_parts = self.qman.surrogate_loss(
                reconstruction_loss, queue_info)

            updates = []
            full_info = queue_info.copy()
            full_info.update(reconstruction_info)
            full_info["queue_loss"] = queue_loss
            full_info["reconstruction_loss"] = reconstruction_loss

            float_n_batch = T.cast(n_batch, 'float32')
            if self.loss_mode is "add":
                full_loss = queue_loss + reconstruction_loss
            elif self.loss_mode is "priority":
                curviness = np.array(self.loss_mode_params[0],
                                     np.float32) * float_n_batch
                # ln( e^x + e^y - 1 )
                # ln( C(e^x + e^y - 1) ) - ln(C)
                # ln( e^c(e^x + e^y - 1) ) - c
                # ln( e^(x+c) + e^(y+c) - e^c ) - c
                # ln( e^(x-c) + e^(y-c) - e^(-c) ) + c
                # Now let c = maximum(x,y), d = minimum(x,y). WOLOG replace x=c, y=d
                # ln( e^(c-c) + e^(d-c) - e^(-c) ) + c
                # ln( 1 + e^(d-c) - e^(-c) ) + c
                x = reconstruction_loss / curviness
                y = queue_loss / curviness
                c = T.maximum(x, y)
                d = T.minimum(x, y)
                full_loss = (T.log(1 + T.exp(d - c) - T.exp(-c)) +
                             c) * curviness
            elif self.loss_mode is "cutoff":
                cutoff_val = np.array(self.loss_mode_params[0], np.float32)
                full_loss = T.switch(
                    reconstruction_loss < cutoff_val * float_n_batch,
                    reconstruction_loss + queue_loss, reconstruction_loss)
            elif self.loss_mode is "trigger":
                trigger_val = np.array(self.loss_mode_params[0], np.float32)
                trigger_speed = np.array(1.0 / self.loss_mode_params[1],
                                         np.float32)
                trigger_is_on = theano.shared(np.array(0, np.int8))
                trigger_scale = theano.shared(np.array(0.0, np.float32))
                full_loss = reconstruction_loss + trigger_scale * queue_loss
                updates.append(
                    (trigger_is_on,
                     T.or_(trigger_is_on,
                           reconstruction_loss < trigger_val * float_n_batch)))
                updates.append((trigger_scale,
                                T.switch(
                                    trigger_is_on,
                                    T.minimum(trigger_scale + trigger_speed,
                                              np.array(1.0, np.float32)),
                                    np.array(0.0, np.float32))))
                full_info["trigger_scale"] = trigger_scale

            if queue_surrogate_loss_parts is not None:
                surrogate_loss, addtl_updates = queue_surrogate_loss_parts
                full_loss = full_loss + surrogate_loss
                updates.extend(addtl_updates)
                full_info["surrogate_loss"] = surrogate_loss

            return full_loss, full_info, updates