def __init__(self, model, action_space, meta_data_util, config, constants): self.max_epoch = constants["max_epochs"] self.model = model self.action_space = action_space self.meta_data_util = meta_data_util self.config = config self.constants = constants self.tensorboard = Tensorboard() self.entropy_coef = constants["entropy_coefficient"] self.optimizer = optim.Adam(model.get_parameters(), lr=constants["learning_rate"]) AbstractLearning.__init__(self, self.model, self.calc_loss, self.optimizer, self.config, self.constants)
def __init__(self, args): self.args = args self.model = None self.optimizer = None self.scheduler = None self.epoch = 0 # s = State(args) set_seed(self.args.seed, self.args.cudnn_behavoir) self.log = Log(self.args.log_path) self.writer = Tensorboard(self.args.tensorboard_path) self.stati = Statistic(self.args.expernameid, self.args.experid_path, self.args.root_path) self.stati.add('hparam', self.args.dict()) # s.writer.add_hparams(hparam_dict=s.args.dict(), metric_dict={}) self.record = Record()
def train_from_learned_homing_policies(self, env, load_folder, train_episodes, experiment_name, logger, use_pushover, trial=1): horizon = self.config["horizon"] actions = self.config["actions"] num_state_budget = self.constants["num_homing_policy"] logger.log("Training episodes %d" % train_episodes) tensorboard = Tensorboard(log_dir=self.config["save_path"]) homing_policies = dict( ) # Contains a set of homing policies for every time step # Load homing policy from folder logger.log("Loading Homing policies...") for step in range(1, horizon + 1): homing_policies[step] = [] for i in range(0, num_state_budget): # TODO can fail if the policy doesn't exist. Add checks to prevent that. policy_folder_name = load_folder + "/trial_%d_horizon_%d_homing_policy_%d/" % ( trial, step, i) if not os.path.exists(policy_folder_name): logger.log("Did not find %s" % policy_folder_name) continue previous_step_homing_policy = None if step == 1 else homing_policies[ step - 1] policy = self.reward_free_planner.read_policy( policy_folder_name, step, previous_step_homing_policy) homing_policies[step].append(policy) logger.log("Loaded Homing policy.") logger.log( "Reward Sensitive Learning: Computing the optimal policy for the given reward" ) # Compute the optimal policy psdp_start = time.time() approx_optimal_policy, _, info = self.reward_sensitive_planner.train( None, env, actions, horizon, None, homing_policies, logger, tensorboard, True, use_pushover) logger.log("PSDP Time %r" % (time.time() - psdp_start)) train_episodes = train_episodes + info["total_episodes"] train_reward = info["sum_rewards"] # Evaluate the optimal policy return policy_evaluate.evaluate(env, approx_optimal_policy, horizon, logger, train_episodes, train_reward)
def handler(context): dataset_alias = context.datasets dataset_id = dataset_alias['train'] # set alias specified in console data = list(load_dataset_from_api(dataset_id)) np.random.seed(0) data = np.random.permutation(data) nb_data = len(data) nb_train = int(7 * nb_data // 10) train_data_raw = data[:nb_train] test_data_raw = data[nb_train:] simple_net = SimpleNet(num_classes) model = L.Classifier(simple_net) if USE_GPU >= 0: chainer.cuda.get_device(USE_GPU).use() # Make a specified GPU current model.to_gpu() def make_optimizer(model, alpha=0.001, beta1=0.9): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) return optimizer optimizer = make_optimizer(model) train_data = ImageDatasetFromAPI(train_data_raw) train_iter = chainer.iterators.SerialIterator(train_data, batch_size) test_data = ImageDatasetFromAPI(test_data_raw) test_iter = chainer.iterators.SerialIterator(test_data, batch_size, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (epochs, 'epoch'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.Evaluator(test_iter, model, device=USE_GPU)) trainer.extend(extensions.snapshot_object(simple_net, 'simple_net.model'), trigger=(epochs, 'epoch')) report_entries = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ] trainer.extend(extensions.LogReport()) trainer.extend(Statistics(report_entries, epochs), trigger=(1, 'epoch')) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(report_entries)) trainer.run()
def do_test_(shared_model, config, action_space, meta_data_util, constants, test_dataset, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") tune_dataset_size = len(test_dataset) local_model.load_from_state_dict(shared_model.get_state_dict()) if tune_dataset_size > 0: # Test on tuning data agent.test(test_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def do_test_(house_id, goal_prediction_model, navigation_model, action_type_model, config, action_space, meta_data_util, constants, test_dataset, experiment_name, rank, server, logger, vocab, goal_type, use_pushover=False): logger.log("In Testing...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds.") server.initialize_server() logger.log("Server Initialized.") # Test policy test_policy = gp.get_argmax_action if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server.') else: tensorboard = None if use_pushover: pushover_logger = None else: pushover_logger = None # Create the Agent tmp_agent = HouseDecoupledPredictorNavigatorAgent(server=server, goal_prediction_model=goal_prediction_model, navigation_model=navigation_model, action_type_model=action_type_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent.") tune_dataset_size = len(test_dataset) if tune_dataset_size > 0: # Test on tuning data # tmp_agent.test_single_step(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard, # logger=logger, pushover_logger=pushover_logger) # tmp_agent.test_multi_step(test_dataset, vocab, num_outer_loop_steps=10, num_inner_loop_steps=4, # goal_type=goal_type, tensorboard=tensorboard, logger=logger, # pushover_logger=pushover_logger) # tmp_agent.test_multi_step_action_types(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard, # logger=logger, pushover_logger=pushover_logger) tmp_agent.test_goal_distance(house_id, test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def do_test(house_id, chaplot_baseline, config, action_space, meta_data_util, constants, test_dataset, experiment_name, rank, server, logger): # torch.manual_seed(args.seed + rank) # Launch the Unity Build launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") # Initialize Server server.initialize_server() server.clear_metadata() logger.log("Server Initialized") # Test policy test_policy = gp.get_argmax_action # Create the Agent agent = TmpHouseAgent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # Create tensorboard server if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server...') else: tensorboard = None agent.test(test_dataset, vocab=None, tensorboard=tensorboard, logger=logger)
def handler(context): # Triggers log_trigger = (50, 'iteration') validation_trigger = (2000, 'iteration') end_trigger = (nb_iterations, 'iteration') # Dataset dataset_alias = context.datasets train_dataset_id = dataset_alias['train'] val_dataset_id = dataset_alias['val'] train = SegmentationDatasetFromAPI(train_dataset_id) val = SegmentationDatasetFromAPI(val_dataset_id) class_weight = calc_weight(train) print(class_weight) train = TransformDataset(train, transform) # Iterator train_iter = iterators.SerialIterator(train, BATCHSIZE) val_iter = iterators.SerialIterator(val, BATCHSIZE, shuffle=False, repeat=False) # Model model = SegNetBasic(n_class=len(camvid_label_names)) model = PixelwiseSoftmaxClassifier(model, class_weight=class_weight) if USE_GPU >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() # Copy the model to the GPU # Optimizer optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005)) # Updater updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) # Trainer trainer = training.Trainer(updater, end_trigger, out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.LogReport(trigger=log_trigger)) trainer.extend(extensions.observe_lr(), trigger=log_trigger) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot_object( model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=end_trigger) print_entries = [ 'iteration', 'main/loss', 'validation/main/miou', 'validation/main/mean_class_accuracy', 'validation/main/pixel_accuracy' ] report_entries = [ 'epoch', 'iteration', 'lr', 'main/loss', 'validation/main/miou', 'validation/main/mean_class_accuracy', 'validation/main/pixel_accuracy' ] trainer.extend(Statistics(report_entries, nb_iterations, obs_key='iteration'), trigger=log_trigger) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_trigger) trainer.extend(SemanticSegmentationEvaluator(val_iter, model.predictor, camvid_label_names), trigger=validation_trigger) trainer.run()
def do_train_(simulator_file, shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] + constants[ "max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) state = AgentObservedState(instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) meta_data_util.start_state_update_metadata(state, metadata) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile) batch_replay_items.append(replay_item) # Update the agent state state = state.update(image, action, data_point=data_point) meta_data_util.state_update_metadata(state, metadata) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if tensorboard is not None: meta_data_util.state_update_metadata(tensorboard, metadata) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile) batch_replay_items.append(replay_item) # Perform update if len(batch_replay_items) > 0: loss_val = learner.do_update(batch_replay_items) if tensorboard is not None: entropy = float( learner.entropy.data[0]) / float(num_actions + 1) tensorboard.log_scalar("loss", loss_val) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
logging.log(logging.DEBUG, "CREATING MODEL") model = IncrementalModelChaplot(config, constants) model.load_saved_model( "./results/model-folder-name/contextual_bandit_5_epoch_4") logging.log(logging.DEBUG, "MODEL CREATED") # Create the agent logging.log(logging.DEBUG, "STARTING AGENT") agent = HumanDrivenAgent(server=server, model=model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # create tensorboard tensorboard = Tensorboard("Human-Driven-Agent") dev_dataset = DatasetParser.parse("data/nav_drone/dev_annotations_6000.json", config) agent.test(dev_dataset, tensorboard) server.kill() except Exception: server.kill() exc_info = sys.exc_info() traceback.print_exception(*exc_info) # raise e
def do_train_(shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([ config["port"] ], "/home/dipendra/Downloads/NavDroneLinuxBuild/NavDroneLinuxBuild.x86_64" ) for epoch in range(1, max_epochs + 1): if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) logging.info("Training data action counts %r", action_counts) num_actions = 0 # max_num_actions = len(data_point.get_trajectory()) # max_num_actions += self.constants["max_extra_horizon"] max_num_actions = constants["horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list rewards = learner.get_all_rewards(metadata) replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, all_rewards=rewards) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) rewards = learner.get_all_rewards(metadata) total_reward += reward if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, all_rewards=rewards) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float(learner.entropy.data[0]) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logging.info("Training data action counts %r", action_counts)
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousSupervisedLearning(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") for epoch in range(1, max_epochs + 1): learner.epoch = epoch for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 trajectory = data_point.get_trajectory() image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 for action in trajectory: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) action_counts[action] += 1 # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, 8, 8) # learner.goal_prediction_calculator.save_attention_prob(image, volatile) # time.sleep(5) else: goal = None # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 total_reward += reward # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, 8, 8) # learner.goal_prediction_calculator.save_attention_prob(image, volatile) # time.sleep(5) else: goal = None # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) ###########################################3 AsynchronousSupervisedLearning.save_goal( batch_replay_items, data_point_ix, trajectory) ###########################################3 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float( learner.entropy.data[0]) / float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.goal_prob is not None: goal_prob = float(learner.goal_prob.data[0]) tensorboard.log_scalar("goal_prob", goal_prob) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/supervised_learning_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data agent.test_goal_prediction(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
# Load settings if args.conf_file: cfg_from_file(args.conf_file) # For train and test, usually we do not need cache; unless overridden by amend cfg.TEST.NO_CACHE = True if args.set_cfgs: cfg_from_list(args.set_cfgs) # Record logs into cfg cfg.LOG.CMD = ' '.join(sys.argv) cfg.LOG.TIME = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') np.random.seed(int(cfg.RNG_SEED)) if cfg.TENSORBOARD.ENABLE: tb.client = Tensorboard(hostname=cfg.TENSORBOARD.HOSTNAME, port=cfg.TENSORBOARD.PORT) tb.sess = tb.client.create_experiment(cfg.NAME + '_' + cfg.LOG.TIME) if args.train == 'true' or args.train == 'True': # the training entrance # Get training imdb imdb = get_imdb(cfg.TRAIN.DB) roidb = get_training_roidb(imdb) # Redirect stderr output_dir = get_output_dir(imdb.name, cfg.NAME + '_' + cfg.LOG.TIME) f = open(osp.join(output_dir, 'stderr.log'), 'w', 0) os.dup2(f.fileno(), sys.stderr.fileno()) os.dup2(sys.stderr.fileno(), sys.stderr.fileno()) # Edit solver and train prototxts target_sw = osp.join(output_dir, 'solver.prototxt')
def train(self, experiment, env, env_name, num_processes, experiment_name, logger, use_pushover, debug, homing_policy_validation_fn, trial=1, do_reward_sensitive_learning=False): """ Execute HOMER algorithm on an environment using :param experiment: :param env: :param env_name: :param num_processes: :param experiment_name: :param logger: :param use_pushover: True/False based on whether pushover is used :param debug: :param homing_policy_validation_fn: :param trial: :param do_reward_sensitive_learning: :return: """ horizon = self.config["horizon"] actions = self.config["actions"] num_samples = self.constants["encoder_training_num_samples"] tensorboard = Tensorboard(log_dir=self.config["save_path"]) homing_policies = dict( ) # Contains a set of homing policies for every time step encoding_function = None # Learned encoding function for the current time step dataset = [] # Dataset of samples collected for training the encoder replay_memory = dict( ) # Replay memory of *all* deviation transitions indexed by time step for step in range(1, horizon + 1): logger.log("Running Homer: Step %r out of %r " % (step, horizon)) homing_policies[step] = [] # Homing policies for this time step replay_memory[step] = [] # Replay memory for this time step # Step 1: Create dataset for learning the encoding function. A single datapoint consists of a transition # (x, a, x') and a 0-1 label y. If y=1 then transition was observed and y=0 otherwise. time_collection_start = time.time() dataset = self.encoder_sampler.gather_samples( env, actions, step, homing_policies, num_samples, dataset) replay_memory[step] = [ dp for dp in dataset if dp.is_valid() == 1 and dp.get_timestep() == step ] logger.log("Encoder: %r samples collected in %r sec" % (num_samples, time.time() - time_collection_start)) # Step 2: Perform binary classification on the dataset. The classifier f(x, a, x') is trained to predict # the probability that a transition (x, a, x') was observed. There are two type of classifiers that we # support. The first classifier has an internal bottleneck feature that allows for recovering state # abstraction function while other performs clustering on top of a train model without discretization. time_encoder_start = time.time() encoding_function, num_state_budget = self.train_encoding_function.do_train( dataset, logger, tensorboard, debug, bootstrap_model=encoding_function, undiscretized_initialization=True, category="backward") self.util.save_encoder_model(encoding_function, experiment, trial, step, "backward") logger.log("Encoder: Training time %r" % (time.time() - time_encoder_start)) # Step 3: Find which abstract states should be explored. This is basically done based on which # abstract states have a non-zero count. Example, one can specify a really high budget for abstract # states but most of them are never used. This is not a problem when using the clustering oracle. count_stats, observation_samples = self.util.get_abstract_state_counts( encoding_function, dataset) abstract_states_to_explore = self.find_abstract_states_to_explore( count_stats, num_state_budget, step) logger.log("Abstract State by Counts: %r" % count_stats) logger.debug("Abstract States to explore %r" % abstract_states_to_explore) # Step 4: Learn homing policies by planning to reach different abstract states if num_processes == 1: # Single process needed. Run it on the current process. self.single_process_ps(env, actions, step, replay_memory, homing_policies, abstract_states_to_explore, tensorboard, encoding_function, logger, use_pushover) else: self.multi_processing_ps(experiment, env, env_name, actions, step, replay_memory, homing_policies, abstract_states_to_explore, num_processes, encoding_function, logger, use_pushover, trial) logger.log("Homer step %r took time %r" % (step, time.time() - time_collection_start)) # Step 5 (Optional): Automatic evaluation of homing policies if possible. A validation function can # check if homing policy has good coverage over the underline state. if homing_policy_validation_fn is not None: state_dist, _ = self.util.evaluate_homing_policy( env, homing_policies, step, logger) if not homing_policy_validation_fn(state_dist, step): logger.log( "Didn't find a useful policy cover for step %r" % step) return policy_evaluate.generate_failure_result( env, env.num_eps) else: logger.log("Found useful policy cover for step %r " % step) # Step 6 (Optional): Performing debugging based on learned state abstraction and # policy cover for this time step. if debug: # Log the environment reward received by the policy self.util.log_homing_policy_reward(env, homing_policies, step, logger) if self.config["feature_type"] == "image": # For environments generating image, it is often not possible to get access to the underline state # therefore we save images for debugging. self.util.save_homing_policy_figures( env, env_name, homing_policies, step) # Save the abstract state and an image if observation_samples is not None: self.util.save_abstract_state_figures( env_name, observation_samples, step) # Save newly explored states self.util.save_newly_explored_states( env_name, dataset, step) if not do_reward_sensitive_learning: return dict() else: logger.log( "Reward Sensitive Learning: Computing the optimal policy for the environment reward function" ) # Compute the optimal policy reward_planning_start_time = time.time() approx_optimal_policy, _, info = self.reward_sensitive_planner.train( replay_memory=replay_memory, env=env, actions=actions, horizon=horizon, reward_func=None, homing_policies=homing_policies, logger=logger, tensorboard=tensorboard, debug=True, use_pushover=use_pushover) logger.log("Reward Sensitive Learning: Time %r" % (time.time() - reward_planning_start_time)) logger.log( "Actual: Total number of episodes used %d. Total return %f." % (env.num_eps, env.sum_total_reward)) # Evaluate the optimal policy return policy_evaluate.evaluate(env, approx_optimal_policy, horizon, logger, env.num_eps, env.sum_total_reward)
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousAdvantageActorGAECritic(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") for epoch in range(1, max_epochs + 1): learner.epoch = epoch task_completion_accuracy = 0 mean_stop_dist_error = 0 stop_dist_errors = [] for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] + constants[ "max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) state.goal = GoalPrediction.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) else: goal = None if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) state.goal = GoalPrediction.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 mean_stop_dist_error += metadata["stop_dist_error"] stop_dist_errors.append(metadata["stop_dist_error"]) if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float( learner.entropy.data[0]) / float(num_actions + 1) v_value_loss_per_step = float( learner.value_loss.data[0]) / float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("v_value_loss_per_step", v_value_loss_per_step) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) mean_stop_dist_error = mean_stop_dist_error / float( len(train_dataset)) task_completion_accuracy = (task_completion_accuracy * 100.0) / float(len(train_dataset)) logger.log("Training: Mean stop distance error %r" % mean_stop_dist_error) logger.log("Training: Task completion accuracy %r " % task_completion_accuracy) bins = range(0, 80, 3) # range of distance histogram, _ = np.histogram(stop_dist_errors, bins) logger.log("Histogram of train errors %r " % histogram) if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def main(): tensorboard_directory = './tmp/tensorboard/001' tensorboard_paths = [ r'C:\Users\parth\Documents\GitHub\Kaggle-Santander-Value-Prediction-Challenge\tmp\tensorboard\001' ] tensorboard_names = ['rmse'] # Model Parameters # -------------------------------------------------------------------------- use_dropout = False use_batch_norm = False # Dropout inputs # use : to use dropout in this layer # rate : dropout rate dropout_parameters = [{ 'use': True, 'rate': 0.5 }, { 'use': True, 'rate': 0.5 }, { 'use': True, 'rate': 0.5 }, { 'use': True, 'rate': 0.5 }] # Fully Connected Layers unit size fc_parameters = [{ 'units': 5000 }, { 'units': 5000 }, { 'units': 5000 }, { 'units': 5000 }] num_dense = len(fc_parameters) data_shape = [None, 4990] batch_size = 500 val_size = 5000 epochs = 100000 learning_rate = 0.001 session = tf.Session() Tensorboard.make(paths=tensorboard_paths, names=tensorboard_names, host='127.0.0.1', port='6006', output=True, start=False) dropout_parameters = [] model = Model(sess=session, data_shape=data_shape, num_classes=1, num_dense=2, learning_rate=learning_rate, use_batch_norm=use_batch_norm, use_dropout=use_dropout, dropout_parameters=dropout_parameters, fc_parameters=fc_parameters, tensorboard_directory=tensorboard_directory) train_data, train_labels = get_data() train_data, val_data, train_labels, val_labels = train_test_split( train_data, train_labels, test_size=0.30) print('> Training Data: {} {}'.format(train_data.shape, train_labels.shape)) print('> Val Data: {} {}'.format(val_data.shape, val_labels.shape)) # print('> Test Data: {} {}'.format(test_data.shape, test_labels.shape)) model.train_data(data=train_data, labels=train_labels) model.val_data(data=val_data, labels=val_labels) model.train(batch_size=batch_size, epochs=epochs)
def handler(context): class_labels = 10 dataset_alias = context.datasets train_dataset_id = dataset_alias['train'] test_dataset_id = dataset_alias['test'] train_data = list(load_dataset_from_api(train_dataset_id)) test_data = list(load_dataset_from_api(test_dataset_id)) train = ImageDatasetFromAPI(train_data, train=True) test = ImageDatasetFromAPI(test_data) net = utils.VGG.VGG(class_labels) model = L.Classifier(net) if USE_GPU >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) stop_trigger = (epochs, 'epoch') # Early stopping option if early_stopping: stop_trigger = triggers.EarlyStoppingTrigger(monitor=early_stopping, verbose=True, max_trigger=(epochs, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, stop_trigger, out=ABEJA_TRAINING_RESULT_DIR) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=USE_GPU)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot_object(net, 'net.model'), trigger=(epochs, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. report_entries = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ] trainer.extend(Statistics(report_entries, epochs), trigger=(1, 'epoch')) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(report_entries)) trainer.run()
def main(): experiment_name = "blocks_experiments" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 # Test policy test_policy = gp.get_argmax_action # Create tensorboard tensorboard = Tensorboard("Agent Test") try: # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelEmnlp shared_model = model_type(config, constants) shared_model.load_saved_model( "./results/model-folder-name/model-file-name") # Read the dataset test_data = DatasetParser.parse("devset.json", config) master_logger.log("Created test dataset of size %d " % len(test_data)) # Create server and launch a client simulator_file = "./simulators/blocks/retro_linux_build.x86_64" config["port"] = find_k_ports(1)[0] server = BlocksServer(config, action_space, vocab=vocab) # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Create the agent master_logger.log("CREATING AGENT") agent = Agent(server=server, model=shared_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) agent.test(test_data, tensorboard) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def handler(context): dataset_alias = context.datasets data = list(load_dataset_from_api(dataset_alias['train'])) np.random.seed(0) data = np.random.permutation(data) nb_data = len(data) nb_train = int(7 * nb_data // 10) train_data_raw = data[:nb_train] test_data_raw = data[nb_train:] premodel = SSD300(n_fg_class=20, pretrained_model='voc0712') model = SSD300(n_fg_class=1) copy_ssd(model, premodel) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) fix_ssd(train_chain) train_data = DetectionDatasetFromAPI(train_data_raw) test_data = DetectionDatasetFromAPI(test_data_raw, use_difficult=True, return_difficult=True) train_data = TransformDataset( train_data, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train_data, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_data, BATCHSIZE, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_epochs, 'epoch'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([1200, 1600], 'epoch')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=['cup']), trigger=(1, 'epoch')) log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) print_entries = [ 'epoch', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_epochs), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(nb_epochs, 'epoch')) trainer.run()
logging.log(logging.DEBUG, "CREATING MODEL") model = PolicyNetwork(128, 4) logging.log(logging.DEBUG, "MODEL CREATED") # Create the agent logging.log(logging.DEBUG, "STARTING AGENT") agent = Agent(server=server, model=model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # create tensorboard tensorboard = Tensorboard() # Read the dataset train_dataset = DatasetParser.parse( "data/nav_drone/train_annotations.json", config) # train_dataset = train_dataset[0:10] logging.info("Created train dataset of size %d ", len(train_dataset)) test_dataset = DatasetParser.parse("data/nav_drone/test_annotations.json", config) tune_dataset = test_dataset[0:int(0.05 * len(test_dataset))] # tune_dataset = test_dataset[0:10] logging.info("Created tuning dataset of size %d ", len(tune_dataset)) # Train on this dataset learning_alg = ContextualBandit(model=model, action_space=action_space,
"./results/oracle_gold_prob_cb_6000/contextual_bandit_5_epoch_17") logging.log(logging.DEBUG, "MODEL CREATED") # Create the agent logging.log(logging.DEBUG, "STARTING AGENT") agent = Agent(server=server, model=model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # create tensorboard tensorboard = Tensorboard("dummy") # Launch Unity Build launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") test_data = DatasetParser.parse("data/nav_drone/dev_annotations_6000.json", config) agent.test(test_data, tensorboard) server.kill() except Exception: server.kill() exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def handler(context): dataset_alias = context.datasets trainval_2007_dataset_id = dataset_alias['trainval2007'] trainval_2012_dataset_id = dataset_alias['trainval2012'] test_2007_dataset_id = dataset_alias['test2007'] trainval_2007_dataset = list( load_dataset_from_api(trainval_2007_dataset_id)) trainval_2012_dataset = list( load_dataset_from_api(trainval_2012_dataset_id)) test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id)) if network_model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif network_model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset) trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset) test_2007 = DetectionDatasetFromAPI(test_2007_dataset, use_difficult=True, return_difficult=True) train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_2007, BATCHSIZE, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_iterations, 'iteration'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) print_entries = [ 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_iterations, obs_key='iteration'), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(nb_iterations, 'iteration')) trainer.run()
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): print("In training...") launch_k_unity_builds([config["port"]], "./simulators/house_3_elmer.x86_64") server.initialize_server() print("launched builds") # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: # pushover_logger = PushoverLogger(experiment_name) pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = TmpSupervisedLearning(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) image, metadata = tmp_agent.server.reset_receive_feedback( data_point) # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab) instruction = data_point.get_instruction() # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 # trajectory = metadata["trajectory"] trajectory = data_point.get_trajectory()[0:300] for action in trajectory: # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) # Sample action from the probability action_counts[action] += 1 # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) total_reward += reward # Send final STOP action and get feedback # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) image, reward, metadata = tmp_agent.server.halt_and_receive_feedback( ) total_reward += reward # if tensorboard is not None: # tensorboard.log_all_train_errors( # metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) num_actions = len(trajectory) + 1 tensorboard.log_scalar( "loss_val", loss_val) # /float(num_actions)) entropy = float( learner.entropy.data[0]) # /float(num_actions) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data print("Going for testing") tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) print("Done testing")
model = ModelSymbolicTextPrediction(config, constants) # model.load_saved_model("./results/train_symbolic_text_prediction_1clock/ml_learning_symbolic_text_prediction_epoch_3") logging.log(logging.DEBUG, "MODEL CREATED") # Create the agent logging.log(logging.DEBUG, "STARTING AGENT") agent = Agent(server=server, model=model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # create tensorboard tensorboard = Tensorboard("synthetic_easy_text_prediction") # Read the dataset all_train_data = DatasetParser.parse( "data/nav_drone/train_annotations_4000.json", config) num_train = (len(all_train_data) * 19) // 20 while all_train_data[num_train].get_scene_name().split( "_")[1] == all_train_data[num_train - 1].get_scene_name().split("_")[1]: num_train += 1 train_split = all_train_data[:num_train] tune_split = all_train_data[num_train:] logging.info("Created train dataset of size %d ", len(train_split)) logging.info("Created tuning dataset of size %d ", len(tune_split))
def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): logger.log("In Training...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds.") server.initialize_server() logger.log("Server Initialized.") # Test policy test_policy = gp.get_argmax_action if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server.') else: tensorboard = None if use_pushover: pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent.") action_counts = [0] * action_space.num_actions() max_epochs = 100000 # constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) # Create the learner to compute the loss learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up learner.logger = logger for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" %(data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] max_num_actions += constants["max_extra_horizon"] image, metadata = tmp_agent.server.reset_receive_feedback(data_point) instruction = data_point.get_instruction() # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab) # print("Instruction str is ", instruction_str) # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) state.goal = learner.get_goal(metadata) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # logger.log("Training: Meta Data %r " % metadata) # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action) # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities)) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) state.goal = learner.get_goal(metadata) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = tmp_agent.server.halt_and_receive_feedback() total_reward += reward # Store it in the replay memory list if not forced_stop: # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities)) replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) tensorboard.log_scalar("loss", loss_val) entropy = float(learner.entropy.data[0])/float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error']) if learner.action_prediction_loss is not None: action_prediction_loss = float(learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss(action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float(learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss(object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0]) tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float(learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def learn_model(self, env, load_folder, experiment_name, experiment, logger, use_pushover, trial=1): horizon = self.config["horizon"] actions = self.config["actions"] num_samples = self.constants["encoder_training_num_samples"] num_state_budget = self.constants["num_homing_policy"] tensorboard = Tensorboard(log_dir=self.config["save_path"]) homing_policies = dict( ) # Contains a set of homing policies for every time step # Load homing policy from folder logger.log("Loading Homing policies...") for step in range(1, horizon + 1): homing_policies[step] = [] for i in range(0, num_state_budget): # TODO can fail if the policy doesn't exist. Add checks to prevent that. policy_folder_name = load_folder + "/trial_%d_horizon_%d_homing_policy_%d/" % ( trial, step, i) if not os.path.exists(policy_folder_name): logger.log("Did not find %s" % policy_folder_name) continue previous_step_homing_policy = None if step == 1 else homing_policies[ step - 1] policy = self.reward_free_planner.read_policy( policy_folder_name, step, previous_step_homing_policy) homing_policies[step].append(policy) logger.log("Loaded Homing policy.") # Load the encoder models backward_models = dict() backward_models[1] = None for step in range(1, horizon + 1): backward_model = EncoderModelWrapper.get_encoder_model( self.constants["model_type"], self.config, self.constants) backward_model.load( load_folder + "/trial_%d_encoder_model/" % trial, "encoder_model_%d" % step) backward_models[step + 1] = backward_model encoding_function = None # Learned encoding function for the current time step dataset = [] # Dataset of samples collected for training the encoder selection_weights = None # A distribution over homing policies from the previous time step (can be None) # Learn Forward Model and Estimate the Model forward_models = dict() forward_models[horizon + 1] = None prev_dataset = None for step in range(1, horizon + 1): logger.log("Step %r out of %r " % (step, horizon)) # Step 1: Create dataset for learning the encoding function. A single datapoint consists of a transition # (x, a, x') and a 0-1 label y. If y=1 then transition was observed and y=0 otherwise. time_collection_start = time.time() dataset = self.encoder_sampler.gather_samples( env, actions, step, homing_policies, num_samples, dataset, selection_weights) logger.log("Encoder: %r sample collected in %r sec" % (num_samples, time.time() - time_collection_start)) # Step 2: Train a binary classifier on this dataset. The classifier f(x, a, x') is trained to predict # the probability that the transition (x, a, x') was observed. Importantly, the classifier has a special # structure f(x, a, x') = p(x, a, \phi(x')) where \phi maps x' to a set of discrete values. time_encoder_start = time.time() if not self.constants["bootstrap_encoder_model"]: encoding_function = None encoding_function, _ = self.train_encoding_function.do_train_with_discretized_models( dataset, logger, tensorboard, False, bootstrap_model=encoding_function, undiscretized_initialization=True, category="forward") self.util.save_encoder_model(encoding_function, experiment, trial, step, "forward") forward_models[step] = encoding_function logger.log("Encoder: Training time %r" % (time.time() - time_encoder_start)) if step > 1: self._estimate_and_save_transition_dynamics( env, experiment, prev_dataset, step, forward_models[step - 1], backward_models[step - 1], forward_models[step], backward_models[step], logger, trial) prev_dataset = dataset
def do_train(chaplot_baseline, shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, contextual_bandit, use_pushover=False): sys.stderr = sys.stdout server.initialize_server() # Local Config Variables lstm_size = 256 # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() chaplot_baseline.shared_model = local_model local_model.train() # Our Environment Interface env = NavDroneServerInterface(agent, local_model, experiment, config, constants, None, train_dataset, tune_dataset, rank, logger, use_pushover) env.game_init() # logging.info("Contextual bandit is %r and horizon is %r", self.contextual_bandit, args.max_episode_length) logger.log("Created NavDroneServerInterface") # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer optimizer = optim.Adam(shared_model.parameters(), lr=0.00025) p_losses = [] v_losses = [] launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") (image, instr), _, _ = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) image = torch.from_numpy(image).float() curr_instruction_idx = torch.from_numpy(curr_instruction_idx).view( 1, -1) prev_instruction_idx = torch.from_numpy(prev_instruction_idx).view( 1, -1) next_instruction_idx = torch.from_numpy(next_instruction_idx).view( 1, -1) done = True episode_length = 0 num_iters = 0 while True: # Sync with the shared model local_model.load_state_dict(shared_model.state_dict()) if done: episode_length = 0 cx = Variable(torch.zeros(1, lstm_size).cuda()) hx = Variable(torch.zeros(1, lstm_size).cuda()) else: # assert False, "Assertion put by Max and Dipendra. Code shouldn't reach here." cx = Variable(cx.data.cuda()) hx = Variable(hx.data.cuda()) values = [] log_probs = [] rewards = [] entropies = [] cached_information = None for step in range(args.num_steps): episode_length += 1 tx = Variable( torch.from_numpy(np.array([episode_length])).long().cuda()) value, logit, (hx, cx), cached_information = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx)), cached_information) prob = F.softmax(logit, dim=1) log_prob = F.log_softmax(logit, dim=1) entropy = -(log_prob * prob).sum(1) entropies.append(entropy) action = prob.multinomial().data log_prob = log_prob.gather(1, Variable(action.cuda())) action = action.cpu().numpy()[0, 0] (image, _), reward, done, _ = env.step(action) # done = done or (episode_length >= self.args.max_episode_length) if not done and (episode_length >= args.max_episode_length): # If the agent has not taken _, _, done, _ = env.step( env.client.agent.action_space.get_stop_action_index()) done = True if done: (image, instr), _, _ = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) curr_instruction_idx = torch.from_numpy( curr_instruction_idx).view(1, -1) prev_instruction_idx = torch.from_numpy( prev_instruction_idx).view(1, -1) next_instruction_idx = torch.from_numpy( next_instruction_idx).view(1, -1) image = torch.from_numpy(image).float() values.append(value) log_probs.append(log_prob) rewards.append(reward) if done: break if rank == 0 and tensorboard is not None: # Log total reward and entropy tensorboard.log_scalar("Total_Reward", sum(rewards)) mean_entropy = sum(entropies).data[0] / float( max(episode_length, 1)) tensorboard.log_scalar("Chaplot_Baseline_Entropy", mean_entropy) R = torch.zeros(1, 1) if not done: tx = Variable( torch.from_numpy(np.array([episode_length])).long().cuda()) value, _, _, _ = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx))) R = value.data values.append(Variable(R.cuda())) policy_loss = 0 value_loss = 0 R = Variable(R.cuda()) gae = torch.zeros(1, 1).cuda() for i in reversed(range(len(rewards))): R = args.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) if contextual_bandit: # Just focus on immediate reward gae = torch.from_numpy(np.array([[rewards[i]]])).float() else: # Generalized Advantage Estimataion delta_t = rewards[i] + args.gamma * \ values[i + 1].data - values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ log_probs[i] * Variable(gae.cuda()) - 0.02 * entropies[i] optimizer.zero_grad() p_losses.append(policy_loss.data[0, 0]) v_losses.append(value_loss.data[0, 0]) if len(p_losses) > 1000: num_iters += 1 logger.log(" ".join([ # "Training thread: {}".format(rank), "Num iters: {}K".format(num_iters), "Avg policy loss: {}".format(np.mean(p_losses)), "Avg value loss: {}".format(np.mean(v_losses)) ])) p_losses = [] v_losses = [] (policy_loss + 0.5 * value_loss).backward() torch.nn.utils.clip_grad_norm(local_model.parameters(), 40) ChaplotBaseline.ensure_shared_grads(local_model, shared_model) optimizer.step()
def do_supervsed_train(chaplot_baseline, shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): raise NotImplementedError() sys.stderr = sys.stdout server.initialize_server() # Local Config Variables lstm_size = 256 # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() chaplot_baseline.shared_model = local_model local_model.train() env = StreetViewServerInterface(agent, local_model, experiment, config, constants, None, train_dataset, tune_dataset, rank, logger, use_pushover) env.game_init() shared_model.train() # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) optimizer = optim.Adam(shared_model.parameters(), lr=0.00025) p_losses = [] v_losses = [] num_iters = 0 while True: # Get datapoint (image, instr), _, _ = env.reset() instruction_idx = np.array(instr) image = torch.from_numpy(image).float() instruction_idx = torch.from_numpy(instruction_idx).view(1, -1) # Sync with the shared model # model.load_state_dict(shared_model.state_dict()) episode_length = 0 cx = Variable(torch.zeros(1, lstm_size).cuda()) hx = Variable(torch.zeros(1, lstm_size).cuda()) log_probs = [] rewards = [] entropies = [] trajectory = env.get_trajectory() min_length = min(len(trajectory), args.max_episode_length - 1) trajectory = trajectory[0:min_length] trajectory.append(agent.action_space.get_stop_action_index()) for action in trajectory: episode_length += 1 tx = Variable( torch.from_numpy(np.array([episode_length])).long().cuda()) value, logit, (hx, cx) = shared_model( (Variable(image.unsqueeze(0).cuda()), Variable(instruction_idx.cuda()), None, None, (tx, hx, cx))) prob = F.softmax(logit) log_prob = F.log_softmax(logit) entropy = -(log_prob * prob).sum(1) entropies.append(entropy) action_tensor = torch.from_numpy(np.array([[action]])) log_prob = log_prob.gather(1, Variable(action_tensor.cuda())) (image, _), reward, done, _ = env.step(action) image = torch.from_numpy(image).float() log_probs.append(log_prob) rewards.append(reward) if done: break policy_loss = 0 for i in range(0, len(rewards)): policy_loss = policy_loss - log_probs[i] - 0.01 * entropies[i] # Log total reward and entropy if tensorboard is not None: tensorboard.log_scalar("Total_Reward", sum(rewards)) mean_entropy = sum(entropies) / float(max(episode_length, 1)) tensorboard.log_scalar("Chaplot_Baseline_Entropy", mean_entropy) tensorboard.log_scalar("Policy_Loss", policy_loss) optimizer.zero_grad() p_losses.append(policy_loss.data[0, 0]) if len(p_losses) > 1000: num_iters += 1 logger.log(" ".join([ # "Training thread: {}".format(rank), "Num iters: {}K".format(num_iters), "Avg policy loss: {}".format(np.mean(p_losses)), "Avg value loss: {}".format(np.mean(v_losses)) ])) p_losses = [] v_losses = [] policy_loss.backward() torch.nn.utils.clip_grad_norm(shared_model.parameters(), 40) # ensure_shared_grads(model, shared_model) optimizer.step()
for line in f.xreadlines(): logging.info(">>> " + line.strip()) logging.info("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() # Create the server logging.log(logging.DEBUG, "STARTING SERVER") server = NavDroneServer(config, action_space) logging.log(logging.DEBUG, "STARTED SERVER") print("Launched Server...") try: # create tensorboard tensorboard = Tensorboard(experiment_name) # Create the model logging.log(logging.DEBUG, "CREATING MODEL") # shared_model = a3c_lstm_ga_concat_instructions(args, config=config) shared_model = a3c_lstm_ga_concat_gavector(args, config=config) # shared_model = a3c_lstm_ga_attention_multigru(args, config=config) lstm_size = 256 if isinstance(shared_model, a3c_lstm_ga_concat_gavector): lstm_size *= 3 # if isinstance(shared_model, A3C_LSTM_GA): # args.input_size -= 2 model = ChaplotBaseline(args, shared_model, config, constants,
class State(object): def __init__(self, args): self.args = args self.model = None self.optimizer = None self.scheduler = None self.epoch = 0 # s = State(args) set_seed(self.args.seed, self.args.cudnn_behavoir) self.log = Log(self.args.log_path) self.writer = Tensorboard(self.args.tensorboard_path) self.stati = Statistic(self.args.expernameid, self.args.experid_path, self.args.root_path) self.stati.add('hparam', self.args.dict()) # s.writer.add_hparams(hparam_dict=s.args.dict(), metric_dict={}) self.record = Record() def show_args(self): print('----------------------------------------------------------------------------------------------') print('args:') print(self.args) print('----------------------------------------------------------------------------------------------') def close(self): self.stati.close() self.log.close() def exit(self): self.writer.close() def save(self, dir_path, filename, last_epoch=None, best_epoch=None): checkpoint = { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), 'scheduler': self.scheduler.state_dict(), 'record': self.record, 'epoch': self.epoch } torch.save(checkpoint, os.path.join(dir_path, filename)) if last_epoch: symlink_force('epoch_' + str(last_epoch) + '.pth', os.path.join(dir_path, 'epoch_last.pth')) if best_epoch: symlink_force('epoch_' + str(best_epoch) + '.pth', os.path.join(dir_path, 'epoch_best.pth')) def load(self, path): if os.path.isfile(path): checkpoint = torch.load(path, map_location=self.args.device) assert self.model, 'self.model is not defined before laoding a checkpoint' self.model.load_state_dict(checkpoint['model']) if self.optimizer: self.optimizer.load_state_dict(checkpoint['optimizer']) if self.scheduler: self.scheduler.load_state_dict(checkpoint['scheduler']) self.record = checkpoint['record'] # checkpoint['epoch'] else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) # warnings.warn('checkpoint path '+path+' not exist; go on without load it.') def deploy(self): self.model = nn.DataParallel(self.model) self.model.to(self.args.device) if self.optimizer: self.optimizer.to(self.args.device) # if self.scheduler: self.scheduler.to(self.args.device) def show_para(self): # Print model'self state_dict print("Net's state_dict:") for param_tensor in self.model.state_dict(): print(param_tensor, "\t", self.model.state_dict()[param_tensor].size()) # Print optimizer's state_dict print("Optimizer's state_dict:") for var_name in self.optimizer.state_dict(): print(var_name, "\t", self.optimizer.state_dict()[var_name])