def load_agent(path): # possible actions are # move[-1, 1], # strafe[-1, 1] # pitch[-1, 1] # turn[-1, 1] # jump 0/1 # discreet actions # "move -0.5" "jump_forward", action_names = ["turn 0.15", "turn -0.15", "turn 0.01", "turn 0.01", 'pitch 0.1', 'pitch -0.1', 'pitch 0.01', 'pitch -0.01'] actionSet = [network.CategoricalAction(action_names)] policy_net = network.QVisualNetwork(actionSet, 2, 20, n_channels=3, activation=nn.ReLU(), batchnorm=True) target_net = network.QVisualNetwork(actionSet, 2, 20, n_channels=3, activation=nn.ReLU(), batchnorm=True) batch_size = 20 my_simple_agent = network.DQN(policy_net, target_net, 0.9, batch_size, 450, capacity=2000) location = 'cuda' if torch.cuda.is_available() else 'cpu' if os.path.exists(path): logging.info('loading model from %s', path) data = torch.load(path, map_location=location) my_simple_agent.load_state_dict(data, strict=False) return my_simple_agent.to(location)
def load_agent(path): # possible actions are # move[-1, 1], # strafe[-1, 1] # pitch[-1, 1] # turn[-1, 1] # jump 0/1 # for example: # actionSet = [network.ContiniousAction('move', -1, 1), # network.ContiniousAction('strafe', -1, 1), # network.ContiniousAction('pitch', -1, 1), # network.ContiniousAction('turn', -1, 1), # network.BinaryAction('jump')] # discreet actions action_names = ["turn 0.1", "turn -0.1", "move 0.9", "jump_forward" ] actionSet = [network.CategoricalAction(action_names)] policy_net = QVisualNetworkV2(3, actionSet, 0, 41, n_channels=3, activation=nn.LeakyReLU(), batchnorm=False, num=256) target_net = QVisualNetworkV2(3, actionSet, 0, 41, n_channels=3, activation=nn.LeakyReLU(), batchnorm=False, num=256) batch_size = 18 transformer = common.make_noisy_transformers() my_simple_agent = network.DQN(policy_net, target_net, 0.99, batch_size, 450, capacity=7000, transform=transformer) location = 'cuda' if torch.cuda.is_available() else 'cpu' if os.path.exists(path): logging.info('loading model from %s', path) data = torch.load(path, map_location=location) my_simple_agent.load_state_dict(data, strict=False) return my_simple_agent.to(location)
def load_agent(path): # possible actions are # move[-1, 1], # strafe[-1, 1] # pitch[-1, 1] # turn[-1, 1] # jump 0/1 # discreet actions # "move -0.5" "jump_forward", action_names = ["turn 0.15", "turn -0.15", "turn 0.01", "turn 0.01", 'pitch 0.1', 'pitch -0.1', 'pitch 0.01', 'pitch -0.01'] actionSet = [network.CategoricalAction(action_names)] transformer = common.make_noisy_transformers() policy_net = QVisualNetworkTree(1, actionSet, 0, 34, n_channels=3, activation=nn.LeakyReLU(), batchnorm=False, num=256) target_net = QVisualNetworkTree(1, actionSet, 0, 34, n_channels=3, activation=nn.LeakyReLU(), batchnorm=False, num=256) batch_size = 20 my_simple_agent = network.DQN(policy_net, target_net, 0.9, batch_size, 450, capacity=2000, transform=transformer) if os.path.exists('agent_tree.pth'): location = 'cuda' if torch.cuda.is_available() else 'cpu' logging.info('loading model from agent_tree.pth') data = torch.load('agent_tree.pth', map_location=location) my_simple_agent.load_state_dict(data, strict=False) return my_simple_agent
def load_agent(path): # possible actions are # move[-1, 1], # strafe[-1, 1] # pitch[-1, 1] # turn[-1, 1] # jump 0/1 # discreet actions # "move -0.5" "jump_forward", action_names = [ "turn 0.20", "turn -0.20", "turn 0.01", "turn 0.01", 'pitch 0.1', 'pitch -0.1', 'pitch 0.01', 'pitch -0.01' ] actionSet = [network.CategoricalAction(action_names)] n_out = len(common.visible_blocks) + 1 location = 'cuda' if torch.cuda.is_available() else 'cpu' net = GoodPoint(8, n_out, n_channels=3, depth=False) model_weights = torch.load('goodpoint.pt', map_location=location)['model'] net.load_checkpoint(model_weights) net.to(location) policy_net = SearchTree(actionSet, 2, n_channels=3, activation=nn.LeakyReLU(), block_net=net) target_net = SearchTree(actionSet, 2, n_channels=3, activation=nn.LeakyReLU(), block_net=net) batch_size = 20 my_simple_agent = network.DQN(policy_net, target_net, 0.9, batch_size, 450, capacity=2000) if os.path.exists(path): logging.info('loading model from ' + path) data = torch.load(path, map_location=location) my_simple_agent.load_state_dict(data, strict=False) return my_simple_agent
def load_agent(path): # possible actions are # move[-1, 1], # strafe[-1, 1] # pitch[-1, 1] # turn[-1, 1] # jump 0/1 # for example: # actionSet = [network.ContiniousAction('move', -1, 1), # network.ContiniousAction('strafe', -1, 1), # network.ContiniousAction('pitch', -1, 1), # network.ContiniousAction('turn', -1, 1), # network.BinaryAction('jump')] # discreet actions action_names = ["turn 0.15", "turn -0.15", "move 0.5", "jump_forward"] actionSet = [network.CategoricalAction(action_names)] policy_net = network.QNetwork(actionSet, grid_len=27, grid_w=5, target_enc_len=3, pos_enc_len=5) target_net = network.QNetwork(actionSet, grid_len=27, grid_w=5, target_enc_len=3, pos_enc_len=5) my_simple_agent = network.DQN(policy_net, target_net, 0.9, 70, 450, capacity=2000) if os.path.exists(path): data = torch.load(path) my_simple_agent.load_state_dict(data, strict=False) return my_simple_agent
transform=TRANSFORM, batch_size=BATCH_SIZE, shuffle=True) loader_test, idx_to_class_test = f.loader(root=DATA_PATH_TEST, transform=TRANSFORM, batch_size=BATCH_SIZE, shuffle=False) loader_test2, idx_to_class_test2 = f.loader(root=DATA_PATH_TEST2, transform=TRANSFORM, batch_size=BATCH_SIZE, shuffle=False) n_batches = len(loader_train) n_batches_test = len(loader_test) # Networks m = network.DQN(RESOLUTION, RESOLUTION, N_ACTIONS) m = m.to(DEVICE) o = optim.Adam(m.parameters(), lr=1e-5) start_epoch = 0 run = f.Run(CHECKPOINT_DIR) start_epoch, m, o = f.load_checkpoint(run.get_checkpoint('32'), m, o) # validate_model(m, loader_test, idx_to_class_test) for epoch in range(N_EPOCHS): print('\n Epoch {}'.format(start_epoch + epoch)) train_model(m, o, loader_train, idx_to_class_train) checkpoint = { 'epoch': start_epoch + epoch + 1,
def __init__(self, game, settings): self.env = game self.settings = settings # Constants defining our neural network self.input_size = self.env.observation_space.shape[0] self.output_size = self.env.action_space.n print( 'input_size : ', self.input_size ) # [position of cart, velocity of cart, angle of pole, rotation rate of pole] print('output_size : ', self.output_size) # Left, Right self.transition = namedtuple( 'Transition', ('state', 'action', 'reward', 'next_state', 'terminal')) self.EPS_START = 0.9 self.EPS_END = 0.05 self.EPS_DECAY = 200 self.steps_done = 0 model_folder_name = "models/" createFolder(model_folder_name) self.save_folder_path = model_folder_name + self.settings.save_folder_file[ 0] createFolder(self.save_folder_path) self.checkpoint_state = "checkpoint_state" self.save_model_path = self.save_folder_path + self.settings.save_folder_file[ 1] self.optimal_model_path = self.save_folder_path + "optimal" self.load_folder_path = model_folder_name + self.settings.load_folder_file[ 0] self.load_model_path = self.load_folder_path + self.settings.load_folder_file[ 1] + ".meta" # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,allow_soft_placement=True) ) self.sess = tf.Session() # declare model self.policyNet = network.DQN(self.sess, self.input_size, self.output_size, name="policy") self.targetNet = network.DQN(self.sess, self.input_size, self.output_size, name="target") # if 'session' in locals() and self.sess is not None: # print('Close interactive session') # session.close() self.saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state( self.load_folder_path, latest_filename=self.checkpoint_state) self.sess.run(tf.global_variables_initializer()) if checkpoint and checkpoint.model_checkpoint_path: print(checkpoint) print(checkpoint.model_checkpoint_path) # self.saver = tf.train.import_meta_graph(self.load_model_path) # self.saver.restore(self.sess,tf.train.latest_checkpoint('./')) self.saver.restore(self.sess, checkpoint.model_checkpoint_path) print("%s has been loaded." % checkpoint.model_checkpoint_path) else: print("First learning.")
# fig = plt.figure(dpi=200, facecolor='w', edgecolor='k') # plt.plot(df['predicted'], 'ro', markersize=3, fillstyle='none') # plt.plot(df['random'], 'go', markersize=3, fillstyle='none') # plt.plot(df['target'], 'bo', markersize=3) # plt.ylabel('cross-entropy loss') # plt.xlabel('test images') # plt.legend(['predicted', 'random', 'target']) # plt.show() # fig.savefig("testdatascatterplot", bbox_inches='tight') if __name__ == '__main__': NETWORK_PATH = sys.argv[1] # DATA_PATH = sys.argv[2] model = net.DQN(RESOLUTION, RESOLUTION, N_ACTIONS) model.load_state_dict(torch.load(NETWORK_PATH)) # if gpu is to be used model.to(DEVICE) model.eval() loader_test, idx_to_class = f.loader(DATA_PATH_TEST, transform=TRANSFORM, batch_size=BATCH_SIZE, shuffle=False) random_losses = generate_random(idx_to_class, loader_test) predicted_losses, target_losses, center_locations = generate_predictions(idx_to_class, loader_test, model) losses = pd.DataFrame([np.array(target_losses), np.array(predicted_losses), np.array(random_losses)]).transpose() losses.columns = ['target', 'predicted', 'random'] losses = losses.sort_values('target') losses = losses.reset_index(drop=True)