directory = plot_dir, file_name = "3D_exp_MC_" + str(decks) + "_decks.png", show = False) """ """ # Extended state Q-learning Q_conv = rl.convert_to_sum_states(Q, env) V_conv = rl.convert_to_value_function(Q_conv) V_conv_filt = rl.fill_missing_sum_states(rl.filter_states(V_conv)) pl.plot_value_function(V_conv_filt, title = "Expanded state, " + str(decks) + " decks", directory = plot_dir, file_name = "3D_exp_" + str(decks) + "_decks.png") """ # Likewise make 3D plots for sumQ V_sum = rl.convert_to_value_function(sumQ) V_sum_filt = rl.fill_missing_sum_states(rl.filter_states(V_sum)) pl.plot_value_function(V_sum_filt, title="Sum state, " + str(decks) + " decks", directory=plot_dir, file_name="3D_sum_" + str(decks) + "_decks.png") """ # create line plots env_types = ["hand_MC", "hand", "sum"] fig, lgd = pl.plot_avg_reward_episode(directory, env_types, [str(decks)]) fig.savefig("{}/avgReturnEp_ndeck{}.png".format(plot_dir, decks), bbox_extra_artists=(lgd,), bbox_inches='tight') matplotlib.pyplot.close(fig) """ keys = list(sumQ.keys())
warmup=warmup) time_to_completion_sum = time.time() - start_time_sum print("Number of explored states (sum states): " + str(len(sumQ))) print("Cumulative avg. reward = " + str(sum_avg_reward)) print( "Training time: \n " + "Expanded state space MC: {} \n Expanded state space: {} \n Sum state space: {}" .format(time_to_completion_MC, time_to_completion_expanded, time_to_completion_sum)) # Convert Q (extended state) to sum state representation and make 3D plots # Extended state MC-learning Q_conv_MC = rl.convert_to_sum_states(Q_MC, env) V_conv_MC = rl.convert_to_value_function(Q_conv_MC) V_conv_filt_MC = rl.fill_missing_sum_states( rl.filter_states(V_conv_MC)) pl.plot_value_function( V_conv_filt_MC, title="Expanded state MC, " + str(decks) + " decks", directory=plot_dir, file_name="3D_exp_MC_" + str(decks) + "_decks.png") # Extended state Q-learning Q_conv = rl.convert_to_sum_states(Q, env) V_conv = rl.convert_to_value_function(Q_conv) V_conv_filt = rl.fill_missing_sum_states(rl.filter_states(V_conv)) pl.plot_value_function(V_conv_filt, title="Expanded state, " + str(decks) + " decks",