def plot_roc(self): plotter = Plotter() plotter.set_title('Receiver Operating Characteristic') plotter.set_axis_labels('') plotter.set_output_filename() #TODO: Fill filename plotter.close() pass # Save the image as "roc.png"
# g.modeller.gen_least_squares(x,y) # g.analyzer.f_dist(LinearModel, 100) image_manager.scale(g.files['plot'], g.files['plot'], g.image_height) if len(sys.argv) != 2: print(f'Usage: {sys.argv[0]} <debug_mode>') print('debug_level -->') print('\t0 - Supress all messages.') print('\t1 - Show errors and warnings.') print('\t2 - Show log of relevant actions.') print('\t3 - Show all messages.') quit() g.debug_level = int(sys.argv[1]) plotter = Plotter() init_globals() g.output_file_formatter.format_folder('imgs') image_manager = ImageManager() gen_plot() #sg.theme('Dark Red 5') g.gui.standard() g.gui.compile() g.gui.loop() g.gui.close()
def main(): import random import gym_micropolis import game_of_life args = get_args() args.log_dir = args.save_dir + '/logs' assert args.algo in ['a2c', 'ppo', 'acktr'] if args.recurrent_policy: assert args.algo in ['a2c', 'ppo'], \ 'Recurrent policy is not implemented for ACKTR' num_updates = int(args.num_frames) // args.num_steps // args.num_processes torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) graph_name = args.save_dir.split('trained_models/')[1].replace('/', ' ') actor_critic = False agent = False past_steps = 0 try: os.makedirs(args.log_dir) except OSError: files = glob.glob(os.path.join(args.log_dir, '*.monitor.csv')) for f in files: if args.overwrite: os.remove(f) else: pass torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") if args.vis: from visdom import Visdom viz = Visdom(port=args.port) win = None win_eval = None if 'GameOfLife' in args.env_name: print('env name: {}'.format(args.env_name)) num_actions = 1 envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, args.add_timestep, device, False, None, args=args) if isinstance(envs.observation_space, gym.spaces.Discrete): num_inputs = envs.observation_space.n elif isinstance(envs.observation_space, gym.spaces.Box): if len(envs.observation_space.shape) == 3: in_w = envs.observation_space.shape[1] in_h = envs.observation_space.shape[2] else: in_w = 1 in_h = 1 num_inputs = envs.observation_space.shape[0] if isinstance(envs.action_space, gym.spaces.Discrete): out_w = 1 out_h = 1 if 'Micropolis' in args.env_name: #otherwise it's set if args.power_puzzle: num_actions = 1 else: num_actions = 19 # TODO: have this already from env elif 'GameOfLife' in args.env_name: num_actions = 1 else: num_actions = envs.action_space.n elif isinstance(envs.action_space, gym.spaces.Box): if len(envs.action_space.shape) == 3: out_w = envs.action_space.shape[1] out_h = envs.action_space.shape[2] elif len(envs.action_space.shape) == 1: out_w = 1 out_h = 1 num_actions = envs.action_space.shape[-1] print('num actions {}'.format(num_actions)) if args.auto_expand: args.n_recs -= 1 actor_critic = Policy(envs.observation_space.shape, envs.action_space, base_kwargs={'map_width': args.map_width, 'num_actions': num_actions, 'recurrent': args.recurrent_policy, 'in_w': in_w, 'in_h': in_h, 'num_inputs': num_inputs, 'out_w': out_w, 'out_h': out_h}, curiosity=args.curiosity, algo=args.algo, model=args.model, args=args) if args.auto_expand: args.n_recs += 1 evaluator = None if not agent: agent = init_agent(actor_critic, args) #saved_model = os.path.join(args.save_dir, args.env_name + '.pt') if args.load_dir: saved_model = os.path.join(args.load_dir, args.env_name + '.tar') else: saved_model = os.path.join(args.save_dir, args.env_name + '.tar') vec_norm = get_vec_normalize(envs) if os.path.exists(saved_model) and not args.overwrite: checkpoint = torch.load(saved_model) saved_args = checkpoint['args'] actor_critic.load_state_dict(checkpoint['model_state_dict']) #for o, l in zip(agent.optimizer.state_dict, checkpoint['optimizer_state_dict']): # print(o, l) #print(agent.optimizer.state_dict()['param_groups']) #print('\n') #print(checkpoint['model_state_dict']) actor_critic.to(device) actor_critic.cuda() #agent = init_agent(actor_critic, saved_args) agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) if args.auto_expand: if not args.n_recs - saved_args.n_recs == 1: print('can expand by 1 rec only from saved model, not {}'.format(args.n_recs - saved_args.n_recs)) raise Exception actor_critic.base.auto_expand() print('expanded net: \n{}'.format(actor_critic.base)) past_steps = checkpoint['past_steps'] ob_rms = checkpoint['ob_rms'] past_steps = next(iter(agent.optimizer.state_dict()['state'].values()))['step'] print('Resuming from step {}'.format(past_steps)) #print(type(next(iter((torch.load(saved_model)))))) #actor_critic, ob_rms = \ # torch.load(saved_model) #agent = \ # torch.load(os.path.join(args.save_dir, args.env_name + '_agent.pt')) #if not agent.optimizer.state_dict()['state'].values(): # past_steps = 0 #else: # raise Exception if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms saved_args.num_frames = args.num_frames saved_args.vis_interval = args.vis_interval saved_args.eval_interval = args.eval_interval saved_args.overwrite = args.overwrite saved_args.n_recs = args.n_recs saved_args.intra_shr = args.intra_shr saved_args.inter_shr = args.inter_shr saved_args.map_width = args.map_width saved_args.render = args.render saved_args.print_map = args.print_map saved_args.load_dir = args.load_dir saved_args.experiment_name = args.experiment_name saved_args.log_dir = args.log_dir saved_args.save_dir = args.save_dir args = saved_args actor_critic.to(device) if 'LSTM' in args.model: recurrent_hidden_state_size = actor_critic.base.get_recurrent_state_size() else: recurrent_hidden_state_size = actor_critic.recurrent_hidden_state_size if args.curiosity: rollouts = CuriosityRolloutStorage(args.num_steps, args.num_processes, envs.observation_space.shape, envs.action_space, recurrent_hidden_state_size, actor_critic.base.feature_state_size(), args=args) else: rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.observation_space.shape, envs.action_space, recurrent_hidden_state_size, args=args) obs = envs.reset() rollouts.obs[0].copy_(obs) rollouts.to(device) episode_rewards = deque(maxlen=10) start = time.time() model = actor_critic.base reset_eval = False plotter = None if args.model == 'FractalNet' or args.model == 'fractal': n_cols = model.n_cols if args.rule == 'wide1' and args.n_recs > 3: col_step = 3 else: col_step = 1 else: n_cols = 0 col_step = 1 for j in range(past_steps, num_updates): if reset_eval: print('post eval reset') obs = envs.reset() rollouts.obs[0].copy_(obs) rollouts.to(device) reset_eval = False #if np.random.rand(1) < 0.1: # envs.venv.venv.remotes[1].send(('setRewardWeights', None)) if args.model == 'FractalNet' and args.drop_path: #if args.intra_shr and args.inter_shr: # n_recs = np.randint # model.set_n_recs() model.set_drop_path() if args.model == 'fixed' and model.RAND: model.num_recursions = random.randint(1, model.map_width * 2) player_act = None for step in range(args.num_steps): # Sample actions with torch.no_grad(): if args.render: if args.num_processes == 1: if not ('Micropolis' in args.env_name or 'GameOfLife' in args.env_name): envs.venv.venv.render() else: pass else: if not ('Micropolis' in args.env_name or 'GameOfLife' in args.env_name): envs.render() envs.venv.venv.render() else: pass #envs.venv.venv.remotes[0].send(('render', None)) #envs.venv.venv.remotes[0].recv() value, action, action_log_probs, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step], player_act=player_act, icm_enabled=args.curiosity, deterministic=False) # Observe reward and next obs obs, reward, done, infos = envs.step(action) player_act = None if args.render: if infos[0]: if 'player_move' in infos[0].keys(): player_act = infos[0]['player_move'] if args.curiosity: # run icm with torch.no_grad(): feature_state, feature_state_pred, action_dist_pred = actor_critic.icm_act( (rollouts.obs[step], obs, action_bin) ) intrinsic_reward = args.eta * ((feature_state - feature_state_pred).pow(2)).sum() / 2. if args.no_reward: reward = 0 reward += intrinsic_reward.cpu() for info in infos: if 'episode' in info.keys(): episode_rewards.append(info['episode']['r']) # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) if args.curiosity: rollouts.insert(obs, recurrent_hidden_states, action, action_log_probs, value, reward, masks, feature_state, feature_state_pred, action_bin, action_dist_pred) else: rollouts.insert(obs, recurrent_hidden_states, action, action_log_probs, value, reward, masks) with torch.no_grad(): next_value = actor_critic.get_value(rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) if args.curiosity: value_loss, action_loss, dist_entropy, fwd_loss, inv_loss = agent.update(rollouts) else: value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() total_num_steps = (j + 1) * args.num_processes * args.num_steps if not dist_entropy: dist_entropy = 0 if j % args.log_interval == 0 and len(episode_rewards) > 1: end = time.time() print("Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n \ dist entropy {:.1f}, val/act loss {:.1f}/{:.1f},". format(j, total_num_steps, int((total_num_steps - past_steps * args.num_processes * args.num_steps) / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), dist_entropy, value_loss, action_loss)) if args.curiosity: print("fwd/inv icm loss {:.1f}/{:.1f}\n". format( fwd_loss, inv_loss)) if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0): if evaluator is None: evaluator = Evaluator(args, actor_critic, device, envs=envs, vec_norm=vec_norm) model = evaluator.actor_critic.base col_idx = [-1, *range(0, n_cols, col_step)] for i in col_idx: evaluator.evaluate(column=i) #num_eval_frames = (args.num_frames // (args.num_steps * args.eval_interval * args.num_processes)) * args.num_processes * args.max_step # making sure the evaluator plots the '-1'st column (the overall net) if args.vis: #and j % args.vis_interval == 0: try: # Sometimes monitor doesn't properly flush the outputs win_eval = evaluator.plotter.visdom_plot(viz, win_eval, evaluator.eval_log_dir, graph_name, args.algo, args.num_frames, n_graphs= col_idx) except IOError: pass #elif args.model == 'fixed' and model.RAND: # for i in model.eval_recs: # evaluator.evaluate(num_recursions=i) # win_eval = visdom_plot(viz, win_eval, evaluator.eval_log_dir, graph_name, # args.algo, args.num_frames, n_graphs=model.eval_recs) #else: # evaluator.evaluate(column=-1) # win_eval = visdom_plot(viz, win_eval, evaluator.eval_log_dir, graph_name, # args.algo, args.num_frames) reset_eval = True if j % args.save_interval == 0 and args.save_dir != "": save_path = os.path.join(args.save_dir) try: os.makedirs(save_path) except OSError: pass # A really ugly way to save a model to CPU save_model = actor_critic ob_rms = getattr(get_vec_normalize(envs), 'ob_rms', None) save_model = copy.deepcopy(actor_critic) save_agent = copy.deepcopy(agent) if args.cuda: save_model.cpu() optim_save = save_agent.optimizer.state_dict() # experimental: torch.save({ 'past_steps': next(iter(agent.optimizer.state_dict()['state'].values()))['step'], 'model_state_dict': save_model.state_dict(), 'optimizer_state_dict': optim_save, 'ob_rms': ob_rms, 'args': args }, os.path.join(save_path, args.env_name + ".tar")) #save_model = [save_model, # getattr(get_vec_normalize(envs), 'ob_rms', None)] #torch.save(save_model, os.path.join(save_path, args.env_name + ".pt")) #save_agent = copy.deepcopy(agent) #torch.save(save_agent, os.path.join(save_path, args.env_name + '_agent.pt')) #torch.save(actor_critic.state_dict(), os.path.join(save_path, args.env_name + "_weights.pt")) if args.vis and j % args.vis_interval == 0: if plotter is None: plotter = Plotter(n_cols, args.log_dir, args.num_processes) try: # Sometimes monitor doesn't properly flush the outputs win = plotter.visdom_plot(viz, win, args.log_dir, graph_name, args.algo, args.num_frames) except IOError: pass
def __init__(self, args, actor_critic, device, envs=None, vec_norm=None, frozen=False): ''' frozen: we are not in the main training loop, but evaluating frozen model separately''' if frozen: self.win_eval = None past_steps = args.past_steps self.frozen = frozen #eval_args.render = True self.device = device #if args.model == 'fractal': # for i in range(-1, args.n_recs): # eval_log_dir = args.log_dir + "_eval_col_{}".format(i) # try: # os.makedirs(eval_log_dir) # except OSError: # files = glob.glob(os.path.join(eval_log_dir, '*.monitor.csv')) # for f in files: # os.remove(f) # setattr(self, 'eval_log_dir_col_{}'.format(i), eval_log_dir) if frozen: if 'GameOfLife' in args.env_name: self.eval_log_dir = args.log_dir + "/eval_{}-steps_w{}_{}rec_{}s_{}pl".format(past_steps, args.map_width, args.n_recs, args.max_step, args.prob_life, '.1f') else: self.eval_log_dir = args.log_dir + "/eval_{}-steps_w{}_{}rec_{}s".format(past_steps, args.map_width, args.n_recs, args.max_step, '.1f') merge_col_logs = True else: self.eval_log_dir = args.log_dir + "_eval" merge_col_logs = False try: os.makedirs(self.eval_log_dir) except OSError: files = glob.glob(os.path.join(self.eval_log_dir, '*.monitor.csv')) files += glob.glob(os.path.join(self.eval_log_dir, '*_eval.csv')) if args.overwrite: for f in files: os.remove(f) elif files: merge_col_logs = True self.args = args self.actor_critic = actor_critic self.num_eval_processes = args.num_processes if envs: self.eval_envs = envs self.vec_norm = vec_norm else: #print('making envs in Evaluator: ', self.args.env_name, self.args.seed + self.num_eval_processes, self.num_eval_processes, # self.args.gamma, self.eval_log_dir, self.args.add_timestep, self.device, True, self.args) self.eval_envs = make_vec_envs( self.args.env_name, self.args.seed + self.num_eval_processes, self.num_eval_processes, self.args.gamma, self.eval_log_dir, self.args.add_timestep, self.device, False, args=self.args) self.vec_norm = get_vec_normalize(self.eval_envs) if self.vec_norm is not None: self.vec_norm.eval() self.vec_norm.ob_rms = get_vec_normalize(self.eval_envs).ob_rms self.tstart = time.time() fieldnames = ['r', 'l', 't'] model = actor_critic.base if args.model == 'FractalNet' or args.model =='fractal': n_cols = model.n_cols else: n_cols = 0 self.plotter = Plotter(n_cols, self.eval_log_dir, self.num_eval_processes, max_steps=self.args.max_step) eval_cols = range(-1, n_cols) if args.model == 'fixed' and model.RAND: eval_cols = model.eval_recs if eval_cols is not None: for i in eval_cols: log_file = '{}/col_{}_eval.csv'.format(self.eval_log_dir, i) if merge_col_logs and os.path.exists(log_file): merge_col_log = True else: merge_col_log = False if merge_col_log: if len(eval_cols) > 1 and i == eval_cols[-2] and self.args.auto_expand: # problem if we saved model after auto-expanding, without first evaluating! # for the newly added column, we duplicate the last col.'s records new_col_log_file = '{}/col_{}_eval.csv'.format(self.eval_log_dir, i + 1) copyfile(log_file, new_col_log_file) old_log = '{}_old'.format(log_file) os.rename(log_file, old_log) log_file_col = open(log_file, mode='w') setattr(self, 'log_file_col_{}'.format(i), log_file_col) writer_col = csv.DictWriter(log_file_col, fieldnames=fieldnames) setattr(self, 'writer_col_{}'.format(i), writer_col) if merge_col_log: with open(old_log, newline='') as old: reader = csv.DictReader(old, fieldnames=('r', 'l', 't')) h = 0 try: # in case of null bytes resulting from interrupted logging for row in reader: if h > 1: row['t'] = 0.0001 * h # HACK: false times for past logs to maintain order writer_col.writerow(row) h += 1 except csv.Error: h_i = 0 for row in reader: if h_i > h: row['t'] = 0.0001 * h_i # HACK: false times for past logs to maintain order writer_col.writerow(row) h_i += 1 os.remove(old_log) else: writer_col.writeheader() log_file_col.flush()
class Evaluator(object): ''' Manages environments used for evaluation during main training loop.''' def __init__(self, args, actor_critic, device, envs=None, vec_norm=None, frozen=False): ''' frozen: we are not in the main training loop, but evaluating frozen model separately''' if frozen: self.win_eval = None past_steps = args.past_steps self.frozen = frozen #eval_args.render = True self.device = device #if args.model == 'fractal': # for i in range(-1, args.n_recs): # eval_log_dir = args.log_dir + "_eval_col_{}".format(i) # try: # os.makedirs(eval_log_dir) # except OSError: # files = glob.glob(os.path.join(eval_log_dir, '*.monitor.csv')) # for f in files: # os.remove(f) # setattr(self, 'eval_log_dir_col_{}'.format(i), eval_log_dir) if frozen: if 'GameOfLife' in args.env_name: self.eval_log_dir = args.log_dir + "/eval_{}-steps_w{}_{}rec_{}s_{}pl".format(past_steps, args.map_width, args.n_recs, args.max_step, args.prob_life, '.1f') else: self.eval_log_dir = args.log_dir + "/eval_{}-steps_w{}_{}rec_{}s".format(past_steps, args.map_width, args.n_recs, args.max_step, '.1f') merge_col_logs = True else: self.eval_log_dir = args.log_dir + "_eval" merge_col_logs = False try: os.makedirs(self.eval_log_dir) except OSError: files = glob.glob(os.path.join(self.eval_log_dir, '*.monitor.csv')) files += glob.glob(os.path.join(self.eval_log_dir, '*_eval.csv')) if args.overwrite: for f in files: os.remove(f) elif files: merge_col_logs = True self.args = args self.actor_critic = actor_critic self.num_eval_processes = args.num_processes if envs: self.eval_envs = envs self.vec_norm = vec_norm else: #print('making envs in Evaluator: ', self.args.env_name, self.args.seed + self.num_eval_processes, self.num_eval_processes, # self.args.gamma, self.eval_log_dir, self.args.add_timestep, self.device, True, self.args) self.eval_envs = make_vec_envs( self.args.env_name, self.args.seed + self.num_eval_processes, self.num_eval_processes, self.args.gamma, self.eval_log_dir, self.args.add_timestep, self.device, False, args=self.args) self.vec_norm = get_vec_normalize(self.eval_envs) if self.vec_norm is not None: self.vec_norm.eval() self.vec_norm.ob_rms = get_vec_normalize(self.eval_envs).ob_rms self.tstart = time.time() fieldnames = ['r', 'l', 't'] model = actor_critic.base if args.model == 'FractalNet' or args.model =='fractal': n_cols = model.n_cols else: n_cols = 0 self.plotter = Plotter(n_cols, self.eval_log_dir, self.num_eval_processes, max_steps=self.args.max_step) eval_cols = range(-1, n_cols) if args.model == 'fixed' and model.RAND: eval_cols = model.eval_recs if eval_cols is not None: for i in eval_cols: log_file = '{}/col_{}_eval.csv'.format(self.eval_log_dir, i) if merge_col_logs and os.path.exists(log_file): merge_col_log = True else: merge_col_log = False if merge_col_log: if len(eval_cols) > 1 and i == eval_cols[-2] and self.args.auto_expand: # problem if we saved model after auto-expanding, without first evaluating! # for the newly added column, we duplicate the last col.'s records new_col_log_file = '{}/col_{}_eval.csv'.format(self.eval_log_dir, i + 1) copyfile(log_file, new_col_log_file) old_log = '{}_old'.format(log_file) os.rename(log_file, old_log) log_file_col = open(log_file, mode='w') setattr(self, 'log_file_col_{}'.format(i), log_file_col) writer_col = csv.DictWriter(log_file_col, fieldnames=fieldnames) setattr(self, 'writer_col_{}'.format(i), writer_col) if merge_col_log: with open(old_log, newline='') as old: reader = csv.DictReader(old, fieldnames=('r', 'l', 't')) h = 0 try: # in case of null bytes resulting from interrupted logging for row in reader: if h > 1: row['t'] = 0.0001 * h # HACK: false times for past logs to maintain order writer_col.writerow(row) h += 1 except csv.Error: h_i = 0 for row in reader: if h_i > h: row['t'] = 0.0001 * h_i # HACK: false times for past logs to maintain order writer_col.writerow(row) h_i += 1 os.remove(old_log) else: writer_col.writeheader() log_file_col.flush() def evaluate(self, column=None, num_recursions=None): model = self.actor_critic.base if num_recursions is not None: model.num_recursions = num_recursions if column is not None and self.args.model == 'FractalNet': model.set_active_column(column) self.actor_critic.visualize_net() eval_episode_rewards = [] obs = self.eval_envs.reset() if 'LSTM' in self.args.model: recurrent_hidden_state_size = self.actor_critic.base.get_recurrent_state_size() eval_recurrent_hidden_states = torch.zeros(2, self.num_eval_processes, *recurrent_hidden_state_size, device=self.device) else: recurrent_hidden_state_size = self.actor_critic.recurrent_hidden_state_size eval_recurrent_hidden_states = torch.zeros(self.num_eval_processes, recurrent_hidden_state_size, device=self.device) eval_masks = torch.zeros(self.num_eval_processes, 1, device=self.device) i = 0 while len(eval_episode_rewards) < self.num_eval_processes: #while i < self.args.max_step: with torch.no_grad(): _, action, eval_recurrent_hidden_states, _ = self.actor_critic.act( obs, eval_recurrent_hidden_states, eval_masks, deterministic=True) # Observe reward and next obs obs, reward, done, infos = self.eval_envs.step(action) if self.args.render: if self.args.num_processes == 1: if not ('Micropolis' in self.args.env_name or 'GameOfLife' in self.args.env_name): self.eval_envs.venv.venv.render() else: pass #self.eval_envs.venv.venv.envs[0].render() else: if not ('Micropolis' in self.args.env_name or 'GameOfLife' in self.args.env_name): self.eval_envs.venv.venv.render() else: pass #self.eval_envs.venv.venv.remotes[0].send(('render', None)) #self.eval_envs.venv.venv.remotes[0].recv() eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) for info in infos: if 'episode' in info.keys(): eval_episode_rewards.append(info['episode']['r']) i += 1 self.eval_envs.reset() #self.eval_envs.close() eprew = np.mean(eval_episode_rewards) args = self.args if not self.frozen: # note: eval interval given in terms of updates consisting of num_steps each n_frame = args.num_steps * args.num_processes * args.eval_interval # relative to training session else: n_frame = args.max_step * args.num_processes if num_recursions is not None: column = num_recursions if column is not None: print(" Column {}".format(column)) log_info = {'r': round(eprew, 6), 'l': n_frame, 't': round(time.time() - self.tstart, 6)} writer, log_file = getattr(self, 'writer_col_{}'.format(column)),\ getattr(self, 'log_file_col_{}'.format(column)) writer.writerow(log_info) log_file.flush() print(" Evaluation using {} episodes: mean reward {:.5f}\n". format(len(eval_episode_rewards), eprew)) if self.frozen: if args.vis: from visdom import Visdom viz = Visdom(port=args.port) self.win_eval = self.plotter.bar_plot(viz, self.win_eval, self.eval_log_dir, self.eval_log_dir.split('/')[-1], args.algo, args.num_frames, n_cols=model.n_cols)
def plot(self): plotter = Plotter() image_manager = ImageManager() sketches = [] plotter.set_output_filename(g.files['least-squares']) plotter.set_title('Least Squares') # min_x = min(self.training.get_output()[x_index]) # max_x = max(self.training.get_output()[x_index]) min_x = min(self.training_x) max_x = max(self.training_x) x_vals = [] y_vals = [] for x in range(min_x * 100, max_x * 100): x_vals.append([x / 100]) y_vals.append([self.f(x / 100)]) sketches.append(SmoothSketch()) sketches[-1].add_x(x_vals) sketches[-1].add_y(y_vals) sketches.append(ScatterSketch()) sketches[-1].add_x(list(self.training_x)) sketches[-1].add_y(list(self.training_y)) for i in range(len(self.training_x)): if self.f(self.training_x[i]) > self.training_y[i]: y_max = self.f(self.training_x[i]) y_min = self.training_y[i] else: y_min = self.f(self.training_x[i]) y_max = self.training_y[i] sketches.append(VerticalLineSketch()) sketches[-1].set_y_max(y_max) sketches[-1].set_y_min(y_min) sketches[-1].set_x(self.training_x[i]) plotter.load(sketches) plotter.save() plotter.close() image_manager.scale(g.files['least-squares'], g.files['least-squares'], 250) del plotter del image_manager
def f_dist(self, model_type, trials): plotter = Plotter() image_manager = ImageManager() plotter.set_title('F Distribution') plotter.set_axis_labels('Frequency', 'F Score') plotter.set_output_filename(g.files['least-squares-f']) histogram = HistogramSketch() for i in range(trials): x_vals = g.randomizer.random_list(g.points_to_gen, g.lower_x_bound, g.upper_x_bound) y_vals = g.randomizer.random_list(g.points_to_gen, g.lower_y_bound, g.upper_y_bound) if model_type == LinearModel: slope, yint = self.least_squares_slope_yint_eqn(x_vals, y_vals) func = lambda x: slope * x + yint else: g.debug.prn(self, 'Incompatible model type.', 1) break ss_fit = self.get_ss_res(zip(x_vals, y_vals), func) ss_mean = self.get_ss_res(zip(x_vals, y_vals), lambda x: np.mean(x_vals)) p_fit = 2 # TODO: Update for Dataframe p_mean = 1 # "" n = len(x_vals) if ss_fit == 0 or (n - p_fit) == 0 or (p_fit - p_mean) == 0: self.debug.prn(self, 'F distribution cannot divide by zero.', 1) continue numerator = (ss_mean - ss_fit) / (p_fit - p_mean) denominator = ss_fit / (n - p_fit) histogram.add_x(numerator / denominator) histogram.set_bins() plotter.load(histogram) plotter.save() plotter.close() image_manager.scale(g.files['least-squares-f'], g.files['least-squares-f'], 250) self.debug.prn(self, 'F distribution created.')
def ssr_curve(self, x, y, slopes=[ 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 7.5, 10.0 ]): ssrs = [] for slope in slopes: yint = (np.mean(y) - slope * np.mean(x)) ssrs.append( self.get_ss_res(zip(x, y), lambda val: slope * val + yint)) image_manager = ImageManager() plotter = Plotter() plotter.set_title('Sum of Squared Residuals') plotter.set_axis_labels('Slope Selected', 'Sum of Squared Residual') plotter.set_output_filename(g.files['ls-ssr']) ssr_plot = ScatterSketch() ssr_plot.add_x(slopes) ssr_plot.add_y(ssrs) plotter.load(ssr_plot) plotter.save() plotter.close() g.debug.prn(self, 'Drawn Sum of Squared Residuals Plot') image_manager.scale(g.files['ls-ssr'], g.files['ls-ssr'], 250)
def train(self): evaluator = self.evaluator episode_rewards = self.episode_rewards args = self.args actor_critic = self.actor_critic rollouts = self.rollouts agent = self.agent envs = self.envs plotter = self.plotter n_train = self.n_train start = self.start plotter = self.plotter n_cols = self.n_cols model = self.model device = self.device vec_norm = self.vec_norm n_frames = self.n_frames if self.reset_eval: obs = envs.reset() rollouts.obs[0].copy_(obs) rollouts.to(device) self.reset_eval = False if args.model == 'FractalNet' and args.drop_path: model.set_drop_path() if args.model == 'fixed' and model.RAND: model.num_recursions = random.randint(1, model.map_width * 2) self.player_act = None for self.n_step in range(args.num_steps): # Sample actions self.step() with torch.no_grad(): next_value = actor_critic.get_value( rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) if args.curiosity: value_loss, action_loss, dist_entropy, fwd_loss, inv_loss = agent.update( rollouts) else: value_loss, action_loss, dist_entropy = agent.update(rollouts) envs.dist_entropy = dist_entropy rollouts.after_update() total_num_steps = (n_train + 1) * args.num_processes * args.num_steps if not dist_entropy: dist_entropy = 0 #print(episode_rewards) #if torch.max(rollouts.rewards) > 0: # print(rollouts.rewards) if args.log and n_train % args.log_interval == 0 and len( episode_rewards) > 1: end = time.time() print( "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.6f}/{:.6f}, min/max reward {:.6f}/{:.6f}\n \ dist entropy {:.6f}, val/act loss {:.6f}/{:.6f},".format( n_train, total_num_steps, int((self.n_frames - self.past_frames) / (end - start)), len(episode_rewards), round(np.mean(episode_rewards), 6), round(np.median(episode_rewards), 6), round(np.min(episode_rewards), 6), round(np.max(episode_rewards), 6), round(dist_entropy, 6), round(value_loss, 6), round(action_loss, 6))) if args.curiosity: print("fwd/inv icm loss {:.1f}/{:.1f}\n".format( fwd_loss, inv_loss)) if (args.eval_interval is not None and len(episode_rewards) > 1 and n_train % args.eval_interval == 0): if evaluator is None: evaluator = Evaluator(args, actor_critic, device, envs=envs, vec_norm=vec_norm, fieldnames=self.fieldnames) self.evaluator = evaluator col_idx = [-1, *[i for i in range(0, n_cols, self.col_step)]] for i in col_idx: evaluator.evaluate(column=i) #num_eval_frames = (args.num_frames // (args.num_steps * args.eval_interval * args.num_processes)) * args.num_processes * args.max_step # making sure the evaluator plots the '-1'st column (the overall net) viz = self.viz win_eval = self.win_eval graph_name = self.graph_name if args.vis: #and n_train % args.vis_interval == 0: try: # Sometimes monitor doesn't properly flush the outputs win_eval = evaluator.plotter.visdom_plot( viz, win_eval, evaluator.eval_log_dir, graph_name, args.algo, args.num_frames, n_graphs=col_idx) except IOError: pass #elif args.model == 'fixed' and model.RAND: # for i in model.eval_recs: # evaluator.evaluate(num_recursions=i) # win_eval = visdom_plot(viz, win_eval, evaluator.eval_log_dir, graph_name, # args.algo, args.num_frames, n_graphs=model.eval_recs) #else: # evaluator.evaluate(column=-1) # win_eval = visdom_plot(viz, win_eval, evaluator.eval_log_dir, graph_name, # args.algo, args.num_frames) self.reset_eval = True if args.save and n_train % args.save_interval == 0 and args.save_dir != "": save_path = os.path.join(args.save_dir) try: os.makedirs(save_path) except OSError: pass # A really ugly way to save a model to CPU save_model = actor_critic ob_rms = getattr(get_vec_normalize(envs), 'ob_rms', None) save_model = copy.deepcopy(actor_critic) save_agent = copy.deepcopy(agent) if args.cuda: save_model.cpu() optim_save = save_agent.optimizer.state_dict() self.agent = agent self.save_model = save_model self.optim_save = optim_save self.args = args self.ob_rms = ob_rms torch.save(self.get_save_dict(), os.path.join(save_path, args.env_name + ".tar")) #save_model = [save_model, # getattr(get_vec_normalize(envs), 'ob_rms', None)] #torch.save(save_model, os.path.join(save_path, args.env_name + ".pt")) #save_agent = copy.deepcopy(agent) #torch.save(save_agent, os.path.join(save_path, args.env_name + '_agent.pt')) #torch.save(actor_critic.state_dict(), os.path.join(save_path, args.env_name + "_weights.pt")) print('model saved at {}'.format(save_path)) if args.vis and n_train % args.vis_interval == 0: if plotter is None: plotter = Plotter(n_cols, args.log_dir, args.num_processes) try: # Sometimes monitor doesn't properly flush the outputs viz = self.viz win = self.win graph_name = self.graph_name win = plotter.visdom_plot(viz, win, args.log_dir, graph_name, args.algo, args.num_frames) except IOError: pass
def plot(self): plotter = Plotter() image_manager = ImageManager() sketches = [] plotter.set_output_filename(g.files['ridge-regression']) plotter.set_title('Ridge Regression') g.debug.prn(self, 'Plot basics set.') min_x = min(self.training_x) max_x = max(self.training_x) x_vals = [] y_vals = [] for x in range(min_x * 100, max_x * 100): x_vals.append([x / 100]) y_vals.append([self.f(x / 100)]) sketches.append(SmoothSketch()) sketches[-1].add_x(x_vals) sketches[-1].add_y(y_vals) g.debug.prn(self, 'Linear curve saved as SmoothSketch.') sketches.append(ScatterSketch()) sketches[-1].add_x(list(self.training_x)) sketches[-1].add_y(list(self.training_y)) g.debug.prn(self, 'Points saved as ScatterSketch.') for i in range(len(self.training_x)): if self.f(self.training_x[i]) > self.training_y[i]: y_max = self.f(self.training_x[i]) y_min = self.training_y[i] else: y_min = self.f(self.training_x[i]) y_max = self.training_y[i] sketches.append(VerticalLineSketch()) sketches[-1].set_y_max(y_max) sketches[-1].set_y_min(y_min) sketches[-1].set_x(self.training_x[i]) g.debug.prn(self, 'Vertical line appended.', 3) g.debug.prn(self, 'SSR lines drawn as VerticalLineSketch(s).') plotter.load(sketches) plotter.save() plotter.close() g.debug.prn(self, 'All sketches loaded and saved.') image_manager.scale(g.files['ridge-regression'], g.files['ridge-regression'], 250) del plotter del image_manager g.debug.prn(self, 'Plotter and ImageManager objects deleted', 3)
def plot(self): plotter = Plotter() image_manager = ImageManager() sketches = [] plotter.set_output_filename(g.files['logistic-regression']) plotter.set_title('Logistic Regression') g.debug.prn(self, 'Plot basics set.') min_x = min(self.training_x) max_x = max(self.training_x) min_y = min(self.training_y) max_y = max(self.training_y) x_vals = [] y_vals = [] for x in range(min_x * 100, max_x * 100): y_adjust = self.f(x / 100) * (max_y - min_y) + min_y x_vals.append([x / 100]) y_vals.append([y_adjust]) sketches.append(SmoothSketch()) sketches[-1].add_x(x_vals) sketches[-1].add_y(y_vals) g.debug.prn(self, 'Curve added to sketches list.') sketches.append(ScatterSketch()) sketches[-1].add_x(list(self.training_x)) sketches[-1].add_y(list(self.training_y)) g.debug.prn(self, 'Scatter of points added to sketches list.') plotter.load(sketches) plotter.save() plotter.close() g.debug.prn(self, 'All sketches loaded and saved.') image_manager.scale(g.files['logistic-regression'], g.files['logistic-regression'], 250) del plotter del image_manager g.debug.prn(self, 'Plotter and ImageManager objects deleted', 3)