Exemplo n.º 1
0
  def get_note_from_softmax(self, softmax):
    """Extracts a one-hot encoding of the most probable note.

    Args:
      softmax: Softmax probabilities over possible next notes.
    Returns:
      One-hot encoding of most probable note.
    """

    note_idx = np.argmax(softmax)
    note_enc = rl_tuner_ops.make_onehot([note_idx], rl_tuner_ops.NUM_CLASSES)
    return np.reshape(note_enc, (rl_tuner_ops.NUM_CLASSES))
Exemplo n.º 2
0
  def get_note_from_softmax(self, softmax):
    """Extracts a one-hot encoding of the most probable note.

    Args:
      softmax: Softmax probabilities over possible next notes.
    Returns:
      One-hot encoding of most probable note.
    """

    note_idx = np.argmax(softmax)
    note_enc = rl_tuner_ops.make_onehot([note_idx], rl_tuner_ops.NUM_CLASSES)
    return np.reshape(note_enc, (rl_tuner_ops.NUM_CLASSES))
Exemplo n.º 3
0
    def fitness(self, should_print=False):

        RL = True
        if not RL:
            # self.create_midi_file()
            repertory = "output/"
            file = repertory + str(self.ind) + ".mid"

            return -abs(self.vae.get_distance(file, self.ind))
        else:
            '''
            self.rlt.train(num_steps=100000, exploration_period=500000)

            stat_dict = self.rlt.evaluate_music_theory_metrics(num_compositions=100)
            self.rlt.plot_rewards()
            stat3 = self.rlt.generate_music_sequence(visualize_probs=True, title='post_rl')
            print(stat3)
            exit(0)
            '''

            # print("NOTE COMPO: ")
            self.rlt.num_notes_in_melody = self.number_of_notes
            self.rlt.reset_composition()
            to_mean_note_reward = []
            to_mean_rnn = []
            for note in self.sequence:
                one_hot = np.array(
                    rl_tuner_ops.make_onehot([note.bit.pitch], 38)).flatten()
                note_reward = self.rlt.reward_music_theory(one_hot)
                # if should_print:
                #    print(one_hot,note_reward )
                self.rlt.composition.append(np.argmax(one_hot))
                self.rlt.beat += 1
                to_mean_note_reward.append(note_reward)
                a, b, c = self.rlt.action(one_hot)

                reward_scores = np.reshape(c, (38))

                # print(to_mean_rnn)

                note_rnn_reward = self.rlt.reward_from_reward_rnn_scores(
                    one_hot, reward_scores)
                to_mean_rnn.append(note_rnn_reward)

            # print(self.rlt.composition)
            mean_note_reward = np.mean(to_mean_note_reward)
            to_mean_rnn = np.mean(to_mean_rnn)
            return to_mean_rnn + mean_note_reward
Exemplo n.º 4
0
    def testRewardNetwork(self):
        graph = tf.Graph()
        self.session = tf.Session(graph=graph)
        note_rnn = note_rnn_loader.NoteRNNLoader(
            graph, scope='test',
            checkpoint_dir=None)  #, midi_primer='/tmp/RL/nice.mid')
        note_rnn.initialize_new(self.session)
        #print("NOTESEQ", note_rnn.primer)
        #print("AFER")

        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir)

        initial_note = rlt.prime_internal_models()
        print("INITIAL NOTE", initial_note)
        print("FIRST SCORE", rlt.reward_key(initial_note))

        action = rlt.action(initial_note, 100, enable_random=True)
        print("ACTION  CHOSEN ", action[0])
        print("ACTION  REWARD ", action[1])
        print("ACTION  NEXT OBS ", action[2])
        print("FINAL", rlt.reward_key(action[2]))

        print("ONE HOT CREATED")
        x = np.array(rl_tuner_ops.make_onehot([10], 24)).flatten()

        print(x)
        print("FINAL", rlt.reward_key(x))

        last_observation = rlt.prime_internal_models()
        rlt.num_notes_in_melody = 12
        rlt.reset_composition()

        for _ in range(rlt.num_notes_in_melody):
            _, new_observation, reward_scores = rlt.action(
                last_observation,
                0,
                enable_random=False,
                sample_next_obs=False)

            music_theory_reward = rlt.reward_music_theory(new_observation)

            #music_theory_rewards = music_theory_reward * rlt.reward_scaler
            print(music_theory_reward)
            print(new_observation)
            rlt.composition.append(np.argmax(new_observation))
            rlt.beat += 1
            last_observation = new_observation
        print("num note", rlt.num_notes_in_melody)

        rlt.reset_composition()
        final = []
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
        final.append(test)

        avg = []
        print("BEGIN")
        for x in final:
            reward = rlt.reward_music_theory(x)
            print(x, reward)
            rlt.composition.append(np.argmax(x))
            rlt.beat += 1
            avg.append(reward)
            rlt.music_theory_reward_last_n += reward * rlt.reward_scaler
        print("AVG", np.mean(avg))

        rlt.reset_composition()
        final = []
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
        final.append(test)
        test = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
        final.append(test)
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)

        print("SECOND")
        avg = []
        for x in final:
            reward = rlt.reward_music_theory(x)
            print(x, reward)
            rlt.composition.append(np.argmax(x))
            rlt.beat += 1
            avg.append(reward)
            rlt.music_theory_reward_last_n += reward * rlt.reward_scaler
        print("AVG", np.mean(avg))