cruve = np.mean( [reward_rec[i:i + 10] for i in range(0, len(reward_rec), 10)], axis=1) plt.plot(range(len(cruve)), cruve, c='b') plt.xlabel(r'Epochs $ \times 10$') plt.ylabel('Rewards') # Save model critic_model.save_weights(r'./saved_model/DRL/crtic_3000.HDF5') print('model saved.') elif MODE == 'eval': # Load model critic_model = Network(num_actions, embeddings_matrix) _ = critic_model(inputs=[inputs, act_input]) critic_model.load_weights(r'./saved_model/DRL/crtic_3000.HDF5') print('model loaded.') actor_q_model = tf.keras.Model( inputs=critic_model.input, outputs=critic_model.get_layer('q_outputs').output) ones = np.ones(shape=(len(test_seg), num_actions)) pred = predict(actor_q_model, (input_validation, ones)) print('On validate data: ') metrics(y_test, pred) ones_tr = np.ones(shape=(len(train_seg), num_actions)) pred_tr = predict(actor_q_model, (input_train, ones_tr)) print('On train data: ') metrics(y_train, pred_tr)