def plot_fishers(fishers_data, title=''): """Plot Fisher's exact correlation scores with a heatmap.""" # try to infer the necessary figure size and set up axis nrows, ncols = fishers_data.shape figsize = (ncols, nrows) fig, ax = plt.subplots(figsize=figsize) # check for inf values for plotting, since these # cannot be plotted otherwise if np.inf in fishers_data.values or -np.inf in fishers_data.values: fishers_data = fishers_data.replace(np.inf, 300) fishers_data = fishers_data.replace(-np.inf, -300) title = title + "\nNB: Fisher's test (+/-)np.inf replaced with 300 for plotting" # plot with specifications heatmap( fishers_data.round().astype(int), ax=ax, #robust=True, fmt="d", ) ax.set_title(title) ax.set_xlabel('') ax.set_ylabel('')
def final_plots_and_error(self, idx, target, err_func): self.final_err = err_func(self.final_y, self.final_yhat).mean().mean() fig = plt.figure() plot_ts(pd.concat([self.final_X, self.final_y], axis=1), idx=idx, c='steelblue',lw=2, label='actual') label_suffix = 'mean across obs' if isinstance(idx, str): label_suffix = idx plot_ts(self.final_yhat, idx=idx, c='indianred', lw=3.5, label=f'forecast for {label_suffix}') plt.title(f'actual and predicted {target}; err: {self.final_err:.4f}') fig = plt.figure() heatmap(df=pd.concat([self.final_X, self.final_yhat], axis=1), target=target, sort_col=self.final_X.columns[-1], forecast_line=self.n_forecast) return
def main(): # Read in the data sample = pd.read_csv(READ_PATH) print sample.info() print sample.describe() print sample.describe(include=['O']) # Create summary plots plotting.violin(sample) plotting.pairplot(sample) plotting.pairplot_kde(sample) plotting.heatmap(sample) plotting.swarmplot(sample) # Standardize variables X = standardize(sample) # Build models with k=2 through k=10 models = [] for k in range(2, 11, 1): y_pred, km = build_cluster_model(X, k) models.append(('k%d_class' % k, km)) sample['k%d_class' % k] = y_pred # Inertia Analysis inertia = np.array([m[1].inertia_ for m in models]) k_value = np.arange(2, 11, 1) plotting.inertia(k_value, inertia) plotting.d_inertia(k_value, inertia) # Pair plots with new color coding for k in range(2, 11, 1): plotting.pairplot(sample, group='k%d_class' % k) # Comparison of k2 model with original groupings k2_confusion_matrix(sample) # Comparison of k3 model with original groupings k3_confusion_matrix(sample) # Comparison of k5 model with original groupings k5_confusion_matrix(sample) # Feature-Feature plots comparing pred and truth plotting.compare_model(df=sample, model='k5_class', x='heartrate', y='height') plotting.compare_model(df=sample, model='k5_class', x='weight', y='height')
plt.ioff() ####################################### # cost function 1 , gamma=0.99 ####################################### gamma = .99 #Initialize the MarkovDecisionProcess object for method 1 of the reward mdp1_a = MarkovDecisionProcess(transition=Transitions, reward=Reward_1, method=1, gamma=gamma, epsilon=epsilon) """ value iteration with method 1""" V1_a, error_v1_a = mdp1_a.value_iteration(maze.maze) pi_v1_a = mdp1_a.best_policy(V1_a) pl.heatmap(V1_a, pi_v1_a, maze.height, maze.width, 'VI', gamma, 1) pl.plot_error(error_v1_a, 'VI', gamma, 1) """ policy iteration with method 1""" error_p1_a, pi_p1_a, U1_a = mdp1_a.policy_iteration(maze.maze) pl.heatmap(U1_a, pi_p1_a, maze.height, maze.width, 'PI', gamma, 1) pl.plot_error(error_p1_a, 'PI', gamma, 1) ####################################### # cost function 2 , gamma=0.99 ####################################### gamma = .99 #Initialize the MarkovDecisionProcess object for method 2 of the reward mdp2_a = MarkovDecisionProcess(transition=Transitions, reward=Reward_2, method=2, gamma=gamma,
def collect_entropy_policies(env, epochs, T, MODEL_DIR=''): video_dir = 'videos/' + args.exp_name direct = os.getcwd() + '/data/' experiment_directory = direct + args.exp_name print(experiment_directory) print(sys.argv) if not os.path.exists(experiment_directory): os.makedirs(experiment_directory) f = open(experiment_directory + '/args', 'w') f.write(' '.join(sys.argv)) f.flush() indexes = [1, 5, 10, 15] states_visited_indexes = [0, 5, 10, 15] states_visited_cumulative = [] states_visited_cumulative_baseline = [] running_avg_p = np.zeros(shape=(tuple(ant_utils.num_states))) running_avg_p_xy = np.zeros(shape=(tuple(ant_utils.num_states_2d))) running_avg_ent = 0 running_avg_ent_xy = 0 running_avg_p_baseline = np.zeros(shape=(tuple(ant_utils.num_states))) running_avg_p_baseline_xy = np.zeros( shape=(tuple(ant_utils.num_states_2d))) running_avg_ent_baseline = 0 running_avg_ent_baseline_xy = 0 pct_visited = [] pct_visited_baseline = [] pct_visited_xy = [] pct_visited_xy_baseline = [] running_avg_entropies = [] running_avg_entropies_xy = [] running_avg_ps_xy = [] avg_ps_xy = [] running_avg_entropies_baseline = [] running_avg_entropies_baseline_xy = [] running_avg_ps_baseline_xy = [] avg_ps_baseline_xy = [] policies = [] distributions = [] initial_state = init_state(env) prebuf = ExperienceBuffer() env.reset() for t in range(10000): action = env.action_space.sample() obs, reward, done, _ = env.step(action) prebuf.store(get_state(env, obs)) if done: env.reset() done = False prebuf.normalize() normalization_factors = prebuf.normalization_factors utils.log_statement(normalization_factors) prebuf = None if not args.gaussian: normalization_factors = [] reward_fn = np.zeros(shape=(tuple(ant_utils.num_states))) for i in range(epochs): utils.log_statement("*** ------- EPOCH %d ------- ***" % i) # clear initial state if applicable. if not args.initial_state: initial_state = [] else: utils.log_statement(initial_state) utils.log_statement("max reward: " + str(np.max(reward_fn))) logger_kwargs = setup_logger_kwargs("model%02d" % i, data_dir=experiment_directory) # Learn policy that maximizes current reward function. print("Learning new oracle...") seed = random.randint(1, 100000) sac = AntSoftActorCritic(lambda: gym.make(args.env), reward_fn=reward_fn, xid=i + 1, seed=seed, gamma=args.gamma, ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), logger_kwargs=logger_kwargs, normalization_factors=normalization_factors) # The first policy is random if i == 0: sac.soft_actor_critic(epochs=0) else: sac.soft_actor_critic(epochs=args.episodes, initial_state=initial_state, start_steps=args.start_steps) policies.append(sac) p, _ = sac.test_agent(T, normalization_factors=normalization_factors) distributions.append(p) weights = utils.get_weights(distributions) epoch = 'epoch_%02d' % (i) if args.render: if i < 10: sac.record(T=args.record_steps, n=1, video_dir=video_dir + '/baseline/' + epoch, on_policy=False) sac.record(T=args.record_steps, n=1, video_dir=video_dir + '/entropy/' + epoch, on_policy=True) # Execute the cumulative average policy thus far. # Estimate distribution and entropy. print("Executing mixed policy...") average_p, average_p_xy, initial_state, states_visited, states_visited_xy = \ execute_average_policy(env, policies, T, weights, reward_fn=reward_fn, norm=normalization_factors, initial_state=initial_state, n=args.n, render=args.render, video_dir=video_dir+'/mixed/'+epoch, epoch=i, record_steps=args.record_steps) print("Calculating maxEnt entropy...") round_entropy = entropy(average_p.ravel()) round_entropy_xy = entropy(average_p_xy.ravel()) # Update running averages for maxEnt. print("Updating maxEnt running averages...") running_avg_ent = running_avg_ent * ( i) / float(i + 1) + round_entropy / float(i + 1) running_avg_ent_xy = running_avg_ent_xy * ( i) / float(i + 1) + round_entropy_xy / float(i + 1) running_avg_p *= (i) / float(i + 1) running_avg_p += average_p / float(i + 1) running_avg_p_xy *= (i) / float(i + 1) running_avg_p_xy += average_p_xy / float(i + 1) # update reward function print("Update reward function") eps = 1 / np.sqrt(ant_utils.total_state_space) if args.cumulative: reward_fn = grad_ent(running_avg_p) else: reward_fn = 1. average_p += eps reward_fn /= average_p average_p = None # delete big array # (save for plotting) running_avg_entropies.append(running_avg_ent) running_avg_entropies_xy.append(running_avg_ent_xy) if i in indexes: running_avg_ps_xy.append(np.copy(running_avg_p_xy)) avg_ps_xy.append(np.copy(average_p_xy)) print("Collecting baseline experience....") p_baseline, p_baseline_xy, states_visited_baseline, states_visited_xy_baseline = sac.test_agent_random( T, normalization_factors=normalization_factors, n=args.n) plotting.states_visited_over_time(states_visited, states_visited_baseline, i) plotting.states_visited_over_time(states_visited_xy, states_visited_xy_baseline, i, ext='_xy') # save for cumulative plot. if i in states_visited_indexes: # average over a whole bunch of rollouts # slow: so only do this when needed. print("Averaging unique xy states visited....") states_visited_xy = compute_states_visited_xy( env, policies, norm=normalization_factors, T=T, n=args.n, N=args.avg_N) states_visited_xy_baseline = compute_states_visited_xy( env, policies, norm=normalization_factors, T=T, n=args.n, N=args.avg_N, initial_state=initial_state, baseline=True) states_visited_cumulative.append(states_visited_xy) states_visited_cumulative_baseline.append( states_visited_xy_baseline) print("Compute baseline entropy....") round_entropy_baseline = entropy(p_baseline.ravel()) round_entropy_baseline_xy = entropy(p_baseline_xy.ravel()) # Update baseline running averages. print("Updating baseline running averages...") running_avg_ent_baseline = running_avg_ent_baseline * ( i) / float(i + 1) + round_entropy_baseline / float(i + 1) running_avg_ent_baseline_xy = running_avg_ent_baseline_xy * ( i) / float(i + 1) + round_entropy_baseline_xy / float(i + 1) running_avg_p_baseline *= (i) / float(i + 1) running_avg_p_baseline += p_baseline / float(i + 1) running_avg_p_baseline_xy *= (i) / float(i + 1) running_avg_p_baseline_xy += p_baseline_xy / float(i + 1) p_baseline = None # (save for plotting) running_avg_entropies_baseline.append(running_avg_ent_baseline) running_avg_entropies_baseline_xy.append(running_avg_ent_baseline_xy) if i in indexes: running_avg_ps_baseline_xy.append( np.copy(running_avg_p_baseline_xy)) avg_ps_baseline_xy.append(np.copy(p_baseline_xy)) utils.log_statement(average_p_xy) utils.log_statement(p_baseline_xy) # Calculate percent of state space visited. pct = np.count_nonzero(running_avg_p) / float(running_avg_p.size) pct_visited.append(pct) pct_xy = np.count_nonzero(running_avg_p_xy) / float( running_avg_p_xy.size) pct_visited_xy.append(pct_xy) pct_baseline = np.count_nonzero(running_avg_p_baseline) / float( running_avg_p_baseline.size) pct_visited_baseline.append(pct_baseline) pct_xy_baseline = np.count_nonzero(running_avg_p_baseline_xy) / float( running_avg_p_baseline_xy.size) pct_visited_xy_baseline.append(pct_xy_baseline) # Print round summary. col_headers = ["", "baseline", "maxEnt"] col1 = [ "round_entropy_xy", "running_avg_ent_xy", "round_entropy", "running_avg_ent", "% state space xy", "% total state space" ] col2 = [ round_entropy_baseline_xy, running_avg_ent_baseline_xy, round_entropy_baseline, running_avg_ent_baseline, pct_xy_baseline, pct_baseline ] col3 = [ round_entropy_xy, running_avg_ent_xy, round_entropy, running_avg_ent, pct_xy, pct ] table = tabulate(np.transpose([col1, col2, col3]), col_headers, tablefmt="fancy_grid", floatfmt=".4f") utils.log_statement(table) # Plot from round. plotting.heatmap(running_avg_p_xy, average_p_xy, i) plotting.heatmap1(running_avg_p_baseline_xy, i) if i == states_visited_indexes[3]: plotting.states_visited_over_time_multi( states_visited_cumulative, states_visited_cumulative_baseline, states_visited_indexes) # save final expert weights to use with the trained oracles. weights_file = experiment_directory + '/policy_weights' np.save(weights_file, weights) # cumulative plots. plotting.running_average_entropy(running_avg_entropies, running_avg_entropies_baseline) plotting.running_average_entropy(running_avg_entropies_xy, running_avg_entropies_baseline_xy, ext='_xy') plotting.heatmap4(running_avg_ps_xy, running_avg_ps_baseline_xy, indexes, ext="cumulative") plotting.heatmap4(avg_ps_xy, avg_ps_baseline_xy, indexes, ext="epoch") plotting.percent_state_space_reached(pct_visited, pct_visited_baseline, ext='_total') plotting.percent_state_space_reached(pct_visited_xy, pct_visited_xy_baseline, ext="_xy") return policies
import numpy as np from matplotlib import pyplot as plt from generate_wavepacket import wavepacket from plotting import heatmap from potential import x, y from config import GRID_SIZE, k, WAVELENGTH, k_step NORM = 'ortho' print(np.sum(np.absolute(wavepacket)**2)) fourier = np.fft.fftshift(np.fft.fft2(wavepacket, norm=NORM)) print(np.sum(np.absolute(fourier)**2)) inversed = np.fft.ifft2(fourier, norm=NORM).real print(np.sum(np.absolute(inversed)**2)) k_step = 2 * np.pi / GRID_SIZE fig, ax = plt.subplots() heatmap(inversed, x * k_step, y * k_step, ax=ax, cbarlabel="s") plt.show()
# directory_to_save = "{}{}_potential_{}_x_{}_y_{}_{}_n_{}_cutoff_{}_grid_{}_wavelength_{}_timestep_{}_lasernum_{}_repeat_{}_retroreflective_{}/".format( # PLOT_SAVE_DIR_BASE, PATH, V_0_REL / NUMBER_OF_LASERS, WAVEPACKET_CENTER_X, WAVEPACKET_CENTER_Y, METHOD, # POTENTIAL_CHANGE_SPEED, CUTOFF, GRID_SIZE, WAVELENGTH, TIME_STEP_REL, NUMBER_OF_LASERS, REPEATS, # not NON_RETROREFLECTIVE) # # p = Path("{}otwell".format(directory_to_save)) # # with p.open('rb') as f: # fsz = os.fstat(f.fileno()).st_size # out = np.load(f) # while f.tell() < fsz: # out = np.vstack((out, np.load(f))) # print(out.reshape(out.shape[0] // 5, 5, 5)) wavef = np.load( "{}move_square_potential_0.1_x_929_y_1093_ssf_n_100_cutoff_800_grid_800_wavelength_80_timestep_0.2_lasernum_5_repeat_1_retroreflective_False/Modulation finished_wavefunction.npy" .format(PLOT_SAVE_DIR_BASE), allow_pickle=True) fig, ax = plt.subplots() # im_pot = heatmap(generate_potential(0) / v_rec, x / WAVELENGTH, y / WAVELENGTH, # ax, cbarlabel="Potential / Recoil Energy", cmap=plt.cm.gray) heatmap(np.abs(wavef)**2, x / WAVELENGTH, y / WAVELENGTH, cbarlabel="Probability Distribution") annotate(fig, ax, "Probability distribution at the finish of modulation", r"$x/\lambda$", r"$y/\lambda$") # heatmap(np.abs(np.fft.fftshift(np.fft.fft2(wavef, norm=NORM))) ** 2, x, y)
# Put it all together and produce the final figure # In[6]: variables = ['cconstitutive', 'q', 'p', 'pup'] fig, axesgrid = plt.subplots(nrows=2, ncols=2, figsize=(7, 5.0), sharey=True, sharex=True) ymin, ymax = 0.09, 20.0 axes = axesgrid.flatten() boundarykwargs = dict(ylimmax=ymax, ylimmin=ymin, lw=7.5, color='w') for counter, var in enumerate(variables): ax = axes[counter] cmap = cm.viridis if var != 'cconstitutive' else cm.viridis_r cmap.set_bad('darkmagenta', 1.) im, cbar = plotting.heatmap(dft.pivot(index='tauenv', columns='pienv', values=var), imshow=True, zlabel=evolimmune.varname_to_tex[var], cmap=cmap, ax=ax, interpolation='bilinear') cbar.outline.set_linewidth(0.0) if var == 'cconstitutive': analysis.plot_interior_boundary(ax, phases['p'], **boundarykwargs) analysis.plot_interior_boundary(ax, phases['a'], **boundarykwargs) elif var in ['q', 'p']: analysis.plot_interior_boundary(ax, qpos, **boundarykwargs) if var == 'p': analysis.plot_interior_boundary(ax, phases['c'], **boundarykwargs) elif var == 'pup': analysis.plot_interior_boundary(ax, puppos, **boundarykwargs) ax.set_ylabel('') ax.set_xlabel('') ax.set_xlim(0.0, 1.0) ax.set_ylim(ymin, ymax)
def collect_entropy_policies(env, epochs, T, MODEL_DIR): video_dir = 'videos/' + args.exp_name reward_fn = np.zeros(shape=(tuple(base_utils.num_states))) online_reward_fn = np.zeros(shape=(tuple(base_utils.num_states))) # set initial state to base, motionless state. seed = [] if args.env == "Pendulum-v0": env.env.state = [np.pi, 0] seed = env.env._get_obs() elif args.env == "MountainCarContinuous-v0": env.env.state = [-0.50, 0] seed = env.env.state running_avg_p = np.zeros(shape=(tuple(base_utils.num_states))) running_avg_ent = 0 running_avg_entropies = [] running_avg_ps = [] running_avg_p_online = np.zeros(shape=(tuple(base_utils.num_states))) running_avg_ent_online = 0 running_avg_entropies_online = [] running_avg_ps_online = [] running_avg_p_baseline = np.zeros(shape=(tuple(base_utils.num_states))) running_avg_ent_baseline = 0 running_avg_entropies_baseline = [] running_avg_ps_baseline = [] online_average_ps = [] policies = [] initial_state = init_state(args.env) online_policies = [] online_initial_state = init_state(args.env) for i in range(epochs): # Learn policy that maximizes current reward function. policy = Policy(env, args.gamma, args.lr, base_utils.obs_dim, base_utils.action_dim) online_policy = Policy(env, args.gamma, args.lr, base_utils.obs_dim, base_utils.action_dim) if i == 0: policy.learn_policy(reward_fn, episodes=0, train_steps=0) online_policy.learn_policy(online_reward_fn, episodes=0, train_steps=0) else: policy.learn_policy(reward_fn, initial_state=initial_state, episodes=args.episodes, train_steps=args.train_steps) online_policy.learn_policy(online_reward_fn, initial_state=online_initial_state, episodes=args.episodes, train_steps=args.train_steps) policies.append(policy) online_policies.append(online_policy) epoch = 'epoch_%02d/' % (i) a = 10 # average over this many rounds p_baseline = policy.execute_random(T, render=args.render, video_dir=video_dir+'/baseline/'+epoch) round_entropy_baseline = scipy.stats.entropy(p_baseline.flatten()) for av in range(a - 1): next_p_baseline = policy.execute_random(T) p_baseline += next_p_baseline round_entropy_baseline += scipy.stats.entropy(next_p_baseline.flatten()) p_baseline /= float(a) round_entropy_baseline /= float(a) # running average of the entropy # Execute the cumulative average policy thus far. # Estimate distribution and entropy. average_p, round_avg_ent, initial_state = \ curiosity.execute_average_policy(env, policies, T, initial_state=initial_state, avg_runs=a, render=False) online_average_p, online_round_avg_ent, online_initial_state = \ curiosity.execute_average_policy(env, online_policies, T, initial_state=online_initial_state, avg_runs=a, render=False) # Get next distribution p by executing pi for T steps. # ALSO: Collect video of each policy p = policy.execute(T, initial_state=initial_state, render=args.render, video_dir=video_dir+'/normal/'+epoch) p_online = online_policy.execute(T, initial_state=initial_state, render=args.render, video_dir=video_dir+'/online/'+epoch) # Force first round to be equal if i == 0: average_p = p_baseline round_avg_ent = round_entropy_baseline online_average_p = p_baseline online_round_avg_ent = round_entropy_baseline # If in pendulum, set velocity to 0 with some probability if args.env == "Pendulum-v0" and random.random() < 0.3: initial_state[1] = 0 # goal: try online reward structure online_reward_fn = online_rewards(online_average_p, online_average_ps, epochs) online_average_ps.append(online_average_p) reward_fn = grad_ent(average_p) # Update experimental running averages. running_avg_ent = running_avg_ent * (i)/float(i+1) + round_avg_ent/float(i+1) running_avg_p = running_avg_p * (i)/float(i+1) + average_p/float(i+1) running_avg_entropies.append(running_avg_ent) running_avg_ps.append(running_avg_p) # Update online running averages. running_avg_ent_online = running_avg_ent_online * (i)/float(i+1) + online_round_avg_ent/float(i+1) running_avg_p_online = running_avg_p_online * (i)/float(i+1) + online_average_p/float(i+1) running_avg_entropies_online.append(running_avg_ent_online) running_avg_ps_online.append(running_avg_p_online) # Update baseline running averages. running_avg_ent_baseline = running_avg_ent_baseline * (i)/float(i+1) + round_entropy_baseline/float(i+1) running_avg_p_baseline = running_avg_p_baseline * (i)/float(i+1) + p_baseline/float(i+1) running_avg_entropies_baseline.append(running_avg_ent_baseline) running_avg_ps_baseline.append(running_avg_p_baseline) print("--------------------------------") print("p=") print(p) print("average_p =") print(average_p) print("online_average_p") print(online_average_p) print("---------------------") print("round_avg_ent[%d] = %f" % (i, round_avg_ent)) print("running_avg_ent = %s" % running_avg_ent) print("..........") print("online_round_avg_ent[%d] = %f" % (i, online_round_avg_ent)) print("running_avg_ent_online = %s" % running_avg_ent_online) print("..........") print("round_entropy_baseline[%d] = %f" % (i, round_entropy_baseline)) print("running_avg_ent_baseline = %s" % running_avg_ent_baseline) print("--------------------------------") plotting.heatmap(running_avg_p, average_p, i, args.env) plotting.running_average_entropy(running_avg_entropies, running_avg_entropies_baseline) plotting.running_average_entropy3(running_avg_entropies, running_avg_entropies_baseline, running_avg_entropies_online) indexes = [1,2,5,10] plotting.heatmap4(running_avg_ps, running_avg_ps_baseline, indexes) plotting.heatmap3x4(running_avg_ps, running_avg_ps_online, running_avg_ps_baseline, indexes) return policies
def collect_entropy_policies(env, epochs, T, MODEL_DIR): reward_fn = np.zeros(shape=(tuple(utils.num_states))) # set initial state to base, motionless state. seed = [] if args.env == "Pendulum-v0": env.env.state = [np.pi, 0] seed = env.env._get_obs() elif args.env == "MountainCarContinuous-v0": env.env.state = [-0.50, 0] seed = env.env.state reward_fn[tuple(utils.discretize_state(seed))] = 1 running_avg_p = np.zeros(shape=(tuple(utils.num_states))) running_avg_ent = 0 window_running_avg_p = np.zeros(shape=(tuple(utils.num_states))) window_running_avg_ent = 0 running_avg_p_baseline = np.zeros(shape=(tuple(utils.num_states))) running_avg_ent_baseline = 0 window_running_avg_p_baseline = np.zeros(shape=(tuple(utils.num_states))) window_running_avg_ent_baseline = 0 baseline_entropies = [] baseline_ps = [] entropies = [] ps = [] average_entropies = [] average_ps = [] running_avg_entropies = [] running_avg_ps = [] running_avg_entropies_baseline = [] running_avg_ps_baseline = [] window_running_avg_ents = [] window_running_avg_ps = [] window_running_avg_ents_baseline = [] window_running_avg_ps_baseline = [] policies = [] initial_state = init_state(args.env) for i in range(epochs): # Learn policy that maximizes current reward function. policy = Policy(env, args.gamma, args.lr, utils.obs_dim, utils.action_dim) policy.learn_policy(reward_fn, initial_state, args.episodes, args.train_steps) policies.append(policy) if args.save_models: policy.save(MODEL_DIR + 'model_' + str(i) + '.pt') # Get next distribution p by executing pi for T steps. p_videos = 'cmp_videos/%sp_%d/' % (MODEL_DIR, i) p = policy.execute(T, initial_state, render=args.record, video_dir=p_videos) a = 10 # average over this many rounds baseline_videos = 'cmp_videos/%sbaseline_%d/' % ( MODEL_DIR, i) # note that MODEL_DIR has trailing slash entropy_videos = 'cmp_videos/%sentropy_%d/' % (MODEL_DIR, i) p_baseline = policy.execute_random( T, render=False, video_dir=baseline_videos) # args.episodes? round_entropy_baseline = scipy.stats.entropy(p_baseline.flatten()) for av in range(a - 1): next_p_baseline = policy.execute_random(T) p_baseline += next_p_baseline # print(scipy.stats.entropy(next_p_baseline.flatten())) round_entropy_baseline += scipy.stats.entropy( next_p_baseline.flatten()) p_baseline /= float(a) round_entropy_baseline /= float(a) # running average of the entropy # note: the entropy is p_baseline is not the same as the computed avg entropy # print("baseline compare:") # print(round_entropy_baseline) # running average # print(scipy.stats.entropy(p_baseline.flatten())) # entropy of final # reward_fn = grad_ent(p) round_entropy = scipy.stats.entropy(p.flatten()) entropies.append(round_entropy) baseline_entropies.append(round_entropy_baseline) ps.append(p) baseline_ps.append(p_baseline) # Execute the cumulative average policy thus far. # Estimate distribution and entropy. average_p, round_avg_ent, initial_state = \ curiosity.execute_average_policy(env, policies, T, initial_state=initial_state, avg_runs=a, render=False, video_dir=entropy_videos) # If in pendulum, set velocity to 0 with some probability if args.env == "Pendulum-v0" and random.random() < 0.3: initial_state[1] = 0 reward_fn = grad_ent(average_p) print(average_p) print("! -------- !") print(reward_fn) average_ps.append(average_p) average_entropies.append(round_avg_ent) # Update running average. window = 5 if (i < window): # add normally window_running_avg_ent = window_running_avg_ent * ( i) / float(i + 1) + round_avg_ent / float(i + 1) window_running_avg_p = window_running_avg_ent * ( i) / float(i + 1) + average_p / float(i + 1) window_running_avg_ent_baseline = window_running_avg_ent_baseline * ( i) / float(i + 1) + round_entropy_baseline / float(i + 1) window_running_avg_p_baseline = window_running_avg_p_baseline * ( i) / float(i + 1) + p_baseline / float(i + 1) else: window_running_avg_ent = window_running_avg_ent + round_avg_ent / float( window) - average_entropies[i - 5] / float(window) window_running_avg_p = window_running_avg_p + average_p / float( window) - average_ps[i - 5] / float(window) window_running_avg_ent_baseline = window_running_avg_ent_baseline + round_entropy_baseline / float( window) - baseline_entropies[i - 5] / float(window) window_running_avg_p_baseline = window_running_avg_p_baseline + p_baseline / float( window) - baseline_ps[i - 5] / float(window) running_avg_ent = running_avg_ent * ( i) / float(i + 1) + round_avg_ent / float(i + 1) running_avg_p = running_avg_p * ( i) / float(i + 1) + average_p / float(i + 1) running_avg_entropies.append(running_avg_ent) running_avg_ps.append(running_avg_p) # Update baseline running averages. running_avg_ent_baseline = running_avg_ent_baseline * ( i) / float(i + 1) + round_entropy_baseline / float(i + 1) running_avg_p_baseline = running_avg_p_baseline * ( i) / float(i + 1) + p_baseline / float(i + 1) running_avg_entropies_baseline.append(running_avg_ent_baseline) running_avg_ps_baseline.append(running_avg_p_baseline) window_running_avg_ents.append(window_running_avg_ent) window_running_avg_ps.append(window_running_avg_p) window_running_avg_ents_baseline.append( window_running_avg_ent_baseline) window_running_avg_ps_baseline.append(window_running_avg_p_baseline) print("p=") print(p) print("..........") print("round_entropy = %f" % (round_entropy)) print("---------------------") print("average_p =") print(average_p) print("..........") print("round_avg_ent[%d] = %f" % (i, round_avg_ent)) print("running_avg_ent = %s" % running_avg_ent) print("window_running_avg_ent = %s" % window_running_avg_ent) print("..........") print("round_entropy_baseline[%d] = %f" % (i, round_entropy_baseline)) print("running_avg_ent_baseline = %s" % running_avg_ent_baseline) print("window_running_avg_ent_baseline = %s" % window_running_avg_ent_baseline) # print("running_avg_p_baseline =") # print(running_avg_p_baseline) print("----------------------") plotting.heatmap(running_avg_p, average_p, i) # plotting.smear_lines(running_avg_ps, running_avg_ps_baseline) plotting.running_average_entropy(running_avg_entropies, running_avg_entropies_baseline) plotting.running_average_entropy_window(window_running_avg_ents, window_running_avg_ents_baseline, window) # plotting.difference_heatmap(running_avg_ps, running_avg_ps_baseline) indexes = [] print('which indexes?') for i in range(4): idx = input("index :") indexes.append(int(idx)) plotting.heatmap4(running_avg_ps, running_avg_ps_baseline, indexes) return policies
fig, axesgrid = plt.subplots(nrows=2, ncols=2, figsize=(7, 5.0), sharey=True, sharex=True) ymin, ymax = 0.09, 20.0 axes = axesgrid.flatten() boundarykwargs = dict(ylimmax=ymax, ylimmin=ymin, lw=7.5, color='w') for counter, var in enumerate(variables): ax = axes[counter] cmap = cm.viridis if var != 'cconstitutive' else cm.viridis_r cmap.set_bad('darkmagenta', 1.) im, cbar = plotting.heatmap(dft.pivot(index='tauenv', columns='pienv', values=var), imshow=True, zlabel=evolimmune.varname_to_tex[var], cmap=cmap, ax=ax, interpolation='bilinear') cbar.outline.set_linewidth(0.0) if var == 'cconstitutive': analysis.plot_interior_boundary(ax, phases['p'], **boundarykwargs) analysis.plot_interior_boundary(ax, phases['a'], **boundarykwargs) elif var in ['q', 'p']: analysis.plot_interior_boundary(ax, qpos, **boundarykwargs) if var == 'p': analysis.plot_interior_boundary(ax, phases['c'], **boundarykwargs) elif var == 'pup': analysis.plot_interior_boundary(ax, puppos, **boundarykwargs) ax.set_ylabel('') ax.set_xlabel('')