Python RLTuner примеры, magenta.models.rl_tuner.rl_tuner.RLTuner Python примеры использования

Пример #1

0

Показать файл

Файл: rl_tuner_test.py Проект: prasad03kp/Music-generation

    def testCompositionStats(self):
        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir,
                               output_every_nth=30)
        stat_dict = rlt.evaluate_music_theory_metrics(num_compositions=10)

        self.assertTrue(stat_dict['num_repeated_notes'] >= 0)
        self.assertTrue(len(stat_dict['autocorrelation1']) > 1)

Пример #2

0

Показать файл

Файл: rl_tuner_test.py Проект: prasad03kp/Music-generation

    def testAction(self):
        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir)

        initial_note = rlt.prime_internal_models()

        action = rlt.action(initial_note, 100, enable_random=False)
        self.assertTrue(action is not None)

Пример #3

0

Показать файл

Файл: rl_tuner_test.py Проект: prasad03kp/Music-generation

    def testRewardNetwork(self):
        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir)

        zero_state = rlt.q_network.get_zero_state()
        priming_note = rlt.get_random_note()

        reward_scores = rlt.get_reward_rnn_scores(priming_note, zero_state)
        self.assertTrue(reward_scores is not None)

Пример #4

0

Показать файл

Файл: rl_tuner_test.py Проект: prasad03kp/Music-generation

    def testInitialGeneration(self):
        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir)

        plot_name = 'test_initial_plot.png'
        rlt.generate_music_sequence(visualize_probs=True,
                                    prob_image_name=plot_name)
        output_path = os.path.join(self.output_dir, plot_name)
        self.assertTrue(os.path.exists(output_path))

Пример #5

0

Показать файл

def main(_):


  hparams = rl_tuner_ops.default_hparams()
  if FLAGS.note_rnn_type == 'basic_rnn':
      hparams = rl_tuner_ops.basic_rnn_hparams()
  elif FLAGS.note_rnn_type == 'attention_rnn':
      hparams = rl_tuner_ops.attention_rnn_hparams()

  dqn_hparams = tf.contrib.training.HParams(random_action_probability=0.1,
                                            store_every_nth=1,
                                            train_every_nth=5,
                                            minibatch_size=32,
                                            discount_rate=0.5,
                                            max_experience=100000,
                                            target_network_update_rate=0.01)

  output_dir = os.path.join(FLAGS.output_dir, FLAGS.algorithm)
  output_ckpt = FLAGS.algorithm + '.ckpt'
  backup_checkpoint_file = os.path.join(FLAGS.note_rnn_checkpoint_dir,
                                        FLAGS.note_rnn_checkpoint_name)

  rlt = rl_tuner.RLTuner(output_dir,
                         midi_primer=FLAGS.midi_primer,
                         dqn_hparams=dqn_hparams,
                         reward_scaler=FLAGS.reward_scaler,
                         save_name=output_ckpt,
                         output_every_nth=FLAGS.output_every_nth,
                         note_rnn_checkpoint_dir=FLAGS.note_rnn_checkpoint_dir,
                         note_rnn_checkpoint_file=backup_checkpoint_file,
                         note_rnn_type=FLAGS.note_rnn_type,
                         note_rnn_hparams=hparams,
                         num_notes_in_melody=FLAGS.num_notes_in_melody,
                         exploration_mode=FLAGS.exploration_mode,
                         algorithm=FLAGS.algorithm)

  tf.logging.info('Saving images and melodies to: %s', rlt.output_dir)

  tf.logging.info('Training...')
  rlt.train(num_steps=FLAGS.training_steps,
            exploration_period=FLAGS.exploration_steps)

  tf.logging.info('Finished training. Saving output figures and composition.')
  rlt.plot_rewards(image_name='Rewards-' + FLAGS.algorithm + '.eps')

  rlt.generate_music_sequence(visualize_probs=True, title=FLAGS.algorithm,
                              prob_image_name=FLAGS.algorithm + '.png')

  rlt.save_model_and_figs(FLAGS.algorithm)

  tf.logging.info('Calculating music theory metric stats for 1000 '
                  'compositions.')
  statistics = rlt.evaluate_music_theory_metrics(num_compositions=1000)
  print("music theory evaluation statistic: {0}".format(statistics))

Пример #6

0

Показать файл

    def testTraining(self):
        rlt = rl_tuner.RLTuner(
            self.output_dir, note_rnn_checkpoint_dir=self.checkpoint_dir,
            output_every_nth=30)
        rlt.train(num_steps=31, exploration_period=3)

        checkpoint_dir = os.path.dirname(rlt.save_path)
        checkpoint_files = [
            f for f in os.listdir(checkpoint_dir)
            if os.path.isfile(os.path.join(checkpoint_dir, f))]
        checkpoint_step_30 = [
            f for f in checkpoint_files
            if os.path.basename(rlt.save_path) + '-30' in f]

        self.assertTrue(len(checkpoint_step_30))

        self.assertTrue(len(rlt.rewards_batched) >= 1)
        self.assertTrue(len(rlt.eval_avg_reward) >= 1)

Пример #7

0

Показать файл

def run_algorithm(alg, dqn_hparams, hparams, num_compositions):
  output_dir = os.path.join(FLAGS.output_dir, alg)
  output_ckpt = alg + '.ckpt'
  backup_checkpoint_file = os.path.join(FLAGS.note_rnn_checkpoint_dir,
                                        FLAGS.note_rnn_checkpoint_name)
  rlt = rl_tuner.RLTuner(output_dir,
                         midi_primer=FLAGS.midi_primer,
                         dqn_hparams=dqn_hparams,
                         reward_scaler=FLAGS.reward_scaler,
                         save_name=output_ckpt,
                         output_every_nth=FLAGS.output_every_nth,
                         note_rnn_checkpoint_dir=FLAGS.note_rnn_checkpoint_dir,
                         note_rnn_checkpoint_file=backup_checkpoint_file,
                         note_rnn_type=FLAGS.note_rnn_type,
                         note_rnn_hparams=hparams,
                         num_notes_in_melody=FLAGS.num_notes_in_melody,
                         exploration_mode=FLAGS.exploration_mode,
                         algorithm=alg)

  tf.logging.info('Saving images and melodies to: %s', rlt.output_dir)

  if (alg != "none_rl"):
    tf.logging.info('Training...')
    rlt.train(num_steps=FLAGS.training_steps,
              exploration_period=FLAGS.exploration_steps)

  tf.logging.info('Finished training. Saving output figures and composition.')
  rlt.plot_rewards(image_name='Rewards-' + alg + '.eps')

  rlt.generate_music_sequence(visualize_probs=True, title=alg,
                              prob_image_name=alg + '.png')

  rlt.save_model_and_figs(alg)

  tf.logging.info('Calculating music theory metric stats for %d '
                  'compositions.', num_compositions)
  rlt.evaluate_music_theory_metrics(num_compositions=num_compositions)
  return rlt

Пример #8

0

Показать файл

Файл: rl_tuner_test.py Проект: prasad03kp/Music-generation

    def testInitializationAndPriming(self):
        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir)

        initial_note = rlt.prime_internal_models()
        self.assertTrue(initial_note is not None)

Пример #9

0

Показать файл

Файл: main.py Проект: zfying/AI-Project

# Model parameter settings
SAVE_PATH = "/tmp/rl_tuner/"
ALGORITHM = 'q'
REWARD_SCALER = 1
OUTPUT_EVERY_NTH = 50000
NUM_NOTES_IN_COMPOSITION = 32
PRIME_WITH_MIDI = True
NOTE_RNN_CHECKPOINT_FILE = 'note_rnn.ckpt'
MIDI_PRIMER = 'bach_846.mid'
PRIMING_MODE = 'single_midi'
NOTE_RNN_TYPE = 'default'

# Creates RLTuner object with above specified parameters.
rl_net = rl_tuner.RLTuner(
    SAVE_PATH,
    # Hyperparameters
    reward_scaler=REWARD_SCALER,
    priming_mode=PRIMING_MODE,
    algorithm=ALGORITHM,
    note_rnn_checkpoint_file=NOTE_RNN_CHECKPOINT_FILE,
    note_rnn_type=NOTE_RNN_TYPE,
    # Other music related settings.
    num_notes_in_melody=NUM_NOTES_IN_COMPOSITION,
    input_size=rl_tuner_ops.NUM_CLASSES,
    num_actions=rl_tuner_ops.NUM_CLASSES,
    midi_primer=MIDI_PRIMER,
    # Logistics.
    output_every_nth=OUTPUT_EVERY_NTH)

# Generate and display music sequence
rl_net.generate_music_sequence(visualize_probs=True, title='post_rl')

Пример #10

0

Показать файл

class IndividualDrum(Individual):
    _count = 0
    vae = CVae()

    # reward = reward.RLTunerTest()
    # reward.setUp()

    graph = tf.Graph()
    session = tf.Session(graph=graph)
    note_rnn = note_rnn_loader.NoteRNNLoader(
        graph, scope='test', checkpoint_dir=os.getcwd() +
        "/src/")  # , midi_primer='/tmp/RL/nice.mid')
    # note_rnn = note_rnn_loader.NoteRNNLoader(graph, scope='test',
    #                                         checkpoint_dir=None)  # , midi_primer='/tmp/RL/nice.mid')
    note_rnn.initialize_new(session)
    with graph.as_default():
        saver = tf.train.Saver(var_list=note_rnn.get_variable_name_dict())
        saver.save(session, "/tmp/RL/")

    rlt = rl_tuner.RLTuner(os.getcwd() + "/output/",
                           note_rnn_checkpoint_dir=os.getcwd() + "/src/")

    # rlt = rl_tuner.RLTuner("/Users/Cyril_Musique/Documents/Cours/M2/MuGen/output",
    #                       note_rnn_checkpoint_dir=None)

    def overlapped_keys(self, key_to_check, bars):
        overlapped = []
        for key in bars:
            if key_to_check.pitch != key.pitch:
                if key_to_check.timestamp <= key.timestamp <= (
                        key_to_check.timestamp + key_to_check.duration):
                    overlapped.append(key)
                    # print("key ", key_to_check, " overlapped by ", key )
        return overlapped

    def check_collision(self, key_to_check, changed_pitch, bars):
        for key in bars:
            if (key_to_check.bit.pitch + changed_pitch) == key.bit.pitch:
                if key_to_check.bit.timestamp <= key.bit.timestamp <= (
                        key_to_check.bit.timestamp +
                        key_to_check.bit.duration):
                    return False
        return True

    def mutate(self):
        """
        for key in self.sequence:

            RL = True

            if not RL:
                #print("key",key)
                # probability to switch a key


                #if random.random() > 1 / len(self.sequence):
                #    change_pitch = random.randint(-1, 1)
                #    if self.check_collision(key, change_pitch, self.sequence) and 49 <= key.bit.pitch + change_pitch <= 58:
                #        key.bit.pitch += change_pitch
                #        #print("MUTATE KEY")
                #    self.sequence.remove(key)
                if random.random()>0.1:
                    self.sequence.remove(key)
                    self.generate_note()


                if random.random() > 1 / len(self.sequence):
                    if random.random()>0.5:
                        if key.bit.timestamp>0.5:
                            key.bit.timestamp -= 0.01

                    else:
                        if key.bit.timestamp< 7.5:
                            key.bit.timestamp += 0.01
            if RL:

                #random_note = random.randrange(0, self.number_of_notes - 1)
                #change_pitch = random.randint(-3, 3)



                if key.bit.pitch + change_pitch < 24 and key.bit.pitch + change_pitch > 0:
                    key.bit.pitch += change_pitch

                if random.random() > 1 / len(self.sequence):
                    change_pitch = random.randint(-3,3)
                    if key.bit.pitch + change_pitch<24 and key.bit.pitch + change_pitch>0:
                        key.bit.pitch += change_pitch

                #if random.random()>0.9:
                #    key.bit.duration/=2
                #    new_note = Note(key.bit.pitch, key.bit.timestamp+key.bit.duration, key.bit.duration)
                #    self.sequence.append(GeneDrum(new_note))
                #if random.random() > 0.5:
                #    index = self.sequence.index(key)
                #    self.sequence[index-1].bit.duration*=2
                #    self.sequence.remove(key)
        """
        if len(self.sequence) > 10:
            random_note = random.randrange(0, len(self.sequence))
            change_pitch = random.randint(-7, 7)
            key = self.sequence[random_note]
            if key.bit.pitch + change_pitch < 24 and key.bit.pitch + change_pitch > 0:
                key.bit.pitch += change_pitch
            # if random.random() > 0.9:
            #    index = self.sequence.index(key)
            #    if index >0:
            #        self.sequence[index-1].bit.duration*=2
            #        self.sequence.remove(key)
        else:
            print("CANNOT")

    def crossover(self, other):
        fc = IndividualDrum(self.parameters, empty=True)
        sc = IndividualDrum(self.parameters, empty=True)
        fc.sequence = deepcopy(self.sequence)
        sc.sequence = deepcopy(other.sequence)
        return fc, sc

    def __init__(self, parameters, empty=False):
        super().__init__(parameters)
        IndividualDrum._count += 1
        self.ind = IndividualDrum._count
        max_number_of_notes = 8  # 16 POUR 2 MESURES
        self.length = 4  # *4 = 8 MESURES
        self.number_of_notes = max_number_of_notes * self.length  # randint(0, max_number_of_notes)
        if not empty:
            self.generate_seq()

    def create_midi_file(self, file_name=None):
        track = 0
        channel = 1
        tempo = 120  # In BPM
        volume = 100  # 0-127, as per the MIDI standard
        my_midi = MIDIFile(
            1
        )  # One track, defaults to format 1 (tempo track is created automatically)
        my_midi.addTempo(track, 0, tempo)
        # my_midi.addProgramChange(0, 10, 0, 0)
        my_midi.tracks[0].addChannelPressure(0, 4, 0)

        repertory = "output/"
        if file_name is not None:
            file = file_name + ".mid"
        else:
            file = str(self.ind) + ".mid"
        for note in self.sequence:
            # print(note)
            my_midi.addNote(track, channel, note.bit.pitch + 36,
                            note.bit.timestamp, note.bit.duration, volume)

        with open(repertory + file, "wb") as output_file:
            my_midi.writeFile(output_file)

    def generate_note(self):

        # allowed_pitch = [36, 38, 42, 46, 41, 45, 48, 51, 49]
        new_note = Note(random.randrange(0, 24),
                        round_down(round(uniform(0, 7.75), 2), 0.25),
                        0.25)  # QUANTIZED MELO
        # new_note = Note(sample(allowed_pitch, 1)[0], round_down(round(uniform(0, 7.75), 2), 0.25), 0.25) #QUANTIZED
        # new_note = Note(random.sample(allowed_pitch, 1)[0], round(random.uniform(0, 7.75), 2), 0.25) #UNQUANTIZED
        if new_note not in self.sequence:
            self.sequence.append(GeneDrum(new_note))

    def generate_seq(self):

        for i in range(16):
            automate = _automate.create_automate()
            while not automate.has_finished():
                self.sequence.append(
                    GeneDrum(automate.next_state(position=i * 4)))

        # Create a PrettyMIDI object
        # pm = pretty_midi.PrettyMIDI()
        """
        RL = True
        if not RL:
            max_number_of_notes = 100
            #self.number_of_notes = randint(20, max_number_of_notes)
            for x in range(self.number_of_notes):
                self.generate_note()

        if RL:
            divide = 8/self.number_of_notes*self.length
            #print(divide)
            for i in range(self.number_of_notes):

                #self.sequence.append(GeneDrum(Note(  random.randrange(0,24),i*divide, divide  )))
                self.sequence.append(GeneDrum(Note(12, i * divide, divide)))

        #print(self.sequence)
        #self.create_midi_file()
        """
        '''
        midi_data = PrettyMIDI(repertory + file)
        a = None
        for instrument in midi_data.instruments:
            if instrument.is_drum:
                instrument.is_drum = False
                a = instrument.get_piano_roll()[36:48]
                a[a > 0] = 1
                a = np.pad(a, [(0, 0), (0, 400 - a.shape[1])], 'constant')
                a = a.astype(dtype=bool)
                # a = a.transpose()
                break
        for i in range(a.shape[0]):
            self.sequence.append(GeneDrum(   ))
        # print(len(self.sequence))
        # b = np.array(self.sequence)
        # print(b.shape)
        '''

    def fitness(self, should_print=False):

        RL = True
        if not RL:
            # self.create_midi_file()
            repertory = "output/"
            file = repertory + str(self.ind) + ".mid"

            return -abs(self.vae.get_distance(file, self.ind))
        else:
            '''
            self.rlt.train(num_steps=100000, exploration_period=500000)

            stat_dict = self.rlt.evaluate_music_theory_metrics(num_compositions=100)
            self.rlt.plot_rewards()
            stat3 = self.rlt.generate_music_sequence(visualize_probs=True, title='post_rl')
            print(stat3)
            exit(0)
            '''

            # print("NOTE COMPO: ")
            self.rlt.num_notes_in_melody = self.number_of_notes
            self.rlt.reset_composition()
            to_mean_note_reward = []
            to_mean_rnn = []
            for note in self.sequence:
                one_hot = np.array(
                    rl_tuner_ops.make_onehot([note.bit.pitch], 38)).flatten()
                note_reward = self.rlt.reward_music_theory(one_hot)
                # if should_print:
                #    print(one_hot,note_reward )
                self.rlt.composition.append(np.argmax(one_hot))
                self.rlt.beat += 1
                to_mean_note_reward.append(note_reward)
                a, b, c = self.rlt.action(one_hot)

                reward_scores = np.reshape(c, (38))

                # print(to_mean_rnn)

                note_rnn_reward = self.rlt.reward_from_reward_rnn_scores(
                    one_hot, reward_scores)
                to_mean_rnn.append(note_rnn_reward)

            # print(self.rlt.composition)
            mean_note_reward = np.mean(to_mean_note_reward)
            to_mean_rnn = np.mean(to_mean_rnn)
            return to_mean_rnn + mean_note_reward
            # # print(to_mean_note_reward)
            # if len(to_mean_note_reward) > 8:
            #     worst_score = np.min(to_mean_note_reward)
            #
            # # print("mean", mean_note_reward)
            # if len(to_mean_note_reward) > 8:
            #     # if should_print:
            #     #    print("Worst : ",worst_score, " mean :", mean_note_reward, " ALL ", worst_score * 0.8 + 0.2 * mean_note_reward)
            #     return worst_score * 0.8 + 0.2 * mean_note_reward
            # else:
            #     return -100

    def __eq__(self, other):
        if type(other) != type(self):
            return False
        for a, b in zip(self.sequence, other.sequence):
            if a.bit != b.bit:
                return False
        return True

    def __repr__(self):
        # r = f"I: {self.fitness()}"
        # for g in self.sequence:
        #    r += f'\n\t{g.bit}'
        r = str(self.ind)
        return r

    def __hash__(self):
        r = 0
        for _ in self.sequence:
            r += randint(1, 100)
        return r

Пример #11

0

Показать файл

Файл: reward.py Проект: KyrillosL/MuGen

    def testRewardNetwork(self):
        graph = tf.Graph()
        self.session = tf.Session(graph=graph)
        note_rnn = note_rnn_loader.NoteRNNLoader(
            graph, scope='test',
            checkpoint_dir=None)  #, midi_primer='/tmp/RL/nice.mid')
        note_rnn.initialize_new(self.session)
        #print("NOTESEQ", note_rnn.primer)
        #print("AFER")

        rlt = rl_tuner.RLTuner(self.output_dir,
                               note_rnn_checkpoint_dir=self.checkpoint_dir)

        initial_note = rlt.prime_internal_models()
        print("INITIAL NOTE", initial_note)
        print("FIRST SCORE", rlt.reward_key(initial_note))

        action = rlt.action(initial_note, 100, enable_random=True)
        print("ACTION  CHOSEN ", action[0])
        print("ACTION  REWARD ", action[1])
        print("ACTION  NEXT OBS ", action[2])
        print("FINAL", rlt.reward_key(action[2]))

        print("ONE HOT CREATED")
        x = np.array(rl_tuner_ops.make_onehot([10], 24)).flatten()

        print(x)
        print("FINAL", rlt.reward_key(x))

        last_observation = rlt.prime_internal_models()
        rlt.num_notes_in_melody = 12
        rlt.reset_composition()

        for _ in range(rlt.num_notes_in_melody):
            _, new_observation, reward_scores = rlt.action(
                last_observation,
                0,
                enable_random=False,
                sample_next_obs=False)

            music_theory_reward = rlt.reward_music_theory(new_observation)

            #music_theory_rewards = music_theory_reward * rlt.reward_scaler
            print(music_theory_reward)
            print(new_observation)
            rlt.composition.append(np.argmax(new_observation))
            rlt.beat += 1
            last_observation = new_observation
        print("num note", rlt.num_notes_in_melody)

        rlt.reset_composition()
        final = []
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
        final.append(test)

        avg = []
        print("BEGIN")
        for x in final:
            reward = rlt.reward_music_theory(x)
            print(x, reward)
            rlt.composition.append(np.argmax(x))
            rlt.beat += 1
            avg.append(reward)
            rlt.music_theory_reward_last_n += reward * rlt.reward_scaler
        print("AVG", np.mean(avg))

        rlt.reset_composition()
        final = []
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
        final.append(test)
        test = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)
        test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
        final.append(test)
        test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        final.append(test)

        print("SECOND")
        avg = []
        for x in final:
            reward = rlt.reward_music_theory(x)
            print(x, reward)
            rlt.composition.append(np.argmax(x))
            rlt.beat += 1
            avg.append(reward)
            rlt.music_theory_reward_last_n += reward * rlt.reward_scaler
        print("AVG", np.mean(avg))

Python RLTuner примеры использования