def main(): experiment_name = "train_a3c_ga_chaplot_baseline_streetview" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") parser = argparse.ArgumentParser(description='Gated-Attention for Grounding') # Environment arguments parser.add_argument('-l', '--max-episode-length', type=int, default=50, help='maximum length of an episode (default: 40)') parser.add_argument('-d', '--difficulty', type=str, default="hard", help="""Difficulty of the environment, "easy", "medium" or "hard" (default: hard)""") parser.add_argument('--living-reward', type=float, default=0, help="""Default reward at each time step (default: 0, change to -0.005 to encourage shorter paths)""") parser.add_argument('--frame-width', type=int, default=300, help='Frame width (default: 300)') parser.add_argument('--frame-height', type=int, default=168, help='Frame height (default: 168)') parser.add_argument('-v', '--visualize', type=int, default=0, help="""Visualize the envrionment (default: 0, use 0 for faster training)""") parser.add_argument('--sleep', type=float, default=0, help="""Sleep between frames for better visualization (default: 0)""") parser.add_argument('--scenario-path', type=str, default="maps/room.wad", help="""Doom scenario file to load (default: maps/room.wad)""") parser.add_argument('--interactive', type=int, default=0, help="""Interactive mode enables human to play (default: 0)""") parser.add_argument('--all-instr-file', type=str, default="data/instructions_all.json", help="""All instructions file (default: data/instructions_all.json)""") parser.add_argument('--train-instr-file', type=str, default="data/instructions_train.json", help="""Train instructions file (default: data/instructions_train.json)""") parser.add_argument('--test-instr-file', type=str, default="data/instructions_test.json", help="""Test instructions file (default: data/instructions_test.json)""") parser.add_argument('--object-size-file', type=str, default="data/object_sizes.txt", help='Object size file (default: data/object_sizes.txt)') # A3C arguments parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--gamma', type=float, default=0.99, metavar='G', help='discount factor for rewards (default: 0.99)') parser.add_argument('--tau', type=float, default=1.00, metavar='T', help='parameter for GAE (default: 1.00)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('-n', '--num-processes', type=int, default=6, metavar='N', help='how many training processes to use (default: 6)') parser.add_argument('--num-steps', type=int, default=20, metavar='NS', help='number of forward steps in A3C (default: 20)') parser.add_argument('--load', type=str, default="0", help='model path to load, 0 to not reload (default: 0)') parser.add_argument('-e', '--evaluate', type=int, default=0, help="""0:Train, 1:Evaluate MultiTask Generalization 2:Evaluate Zero-shot Generalization (default: 0)""") parser.add_argument('--dump-location', type=str, default="./saved/", help='path to dump models and log (default: ./saved/)') args = parser.parse_args() with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config,indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) args.input_size = config['vocab_size'] + 2 # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() try: # create tensorboard tensorboard = None # Tensorboard(experiment_name) # Create the model master_logger.log("CREATING MODEL") model_type = a3c_lstm_ga_default shared_model = model_type(args, config=config, final_image_height=3, final_image_width=3) # make the shared model use share memory shared_model.share_memory() lstm_size = 256 if isinstance(shared_model, a3c_lstm_ga_concat_gavector): lstm_size *= 3 contextual_bandit = False model = ChaplotBaselineStreetView(args, shared_model, config, constants, tensorboard, use_contextual_bandit=contextual_bandit, lstm_size=lstm_size) master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset train_split = DatasetParser.parse("data/streetview/navigation_train.json", config) tune_split = DatasetParser.parse("data/streetview/navigation_dev.json", config) master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) processes = [] # Split the train data between processes train_split_process_chunks = [] tune_split_process_chunks = [] train_chunk_size = int(len(train_split) / args.num_processes) tune_chunk_size = int(len(tune_split) / args.num_processes) train_pad = 0 tune_pad = 0 for i in range(0, args.num_processes): train_split_process_chunks.append(train_split[train_pad: train_pad + train_chunk_size]) tune_split_process_chunks.append(tune_split[tune_pad: tune_pad + tune_chunk_size]) train_pad += train_chunk_size tune_pad += tune_chunk_size # Start the training thread(s) for i in range(args.num_processes): train_chunk = train_split_process_chunks[i] tune_chunk = tune_split_process_chunks[i] print ("Client " + str(i) + " receives train-split of size %d and tune-split of size %d " % (len(train_chunk), len(tune_chunk))) server = StreetViewServer(config, action_space, forward_setting_strict=False) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=ChaplotBaselineStreetView.do_train, args=(model, shared_model, config, action_space, meta_data_util, args, constants, train_chunk, tune_chunk, experiment, experiment_name, i, server, client_logger, model_type, contextual_bandit)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(args): experiment_name = "train_house_chaplot_house_baseline_postbugfix" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) data_filename = "simulators/house/AssetsHouse" supervised = False # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") # Test policy test_policy = gp.get_argmax_action with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) setup_validator = HouseSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() # vocab[token] = i vocab[i] = token # vocab["$UNK$"] = len(vocab_list) vocab[len(vocab_list)] = "$UNK$" args.input_size = config['vocab_size'] + 2 # Number of processes house_ids = [1, 2, 3, 4, 5] num_processes = len(house_ids) args.num_processes = num_processes try: # Create the model master_logger.log("CREATING MODEL") model_type = a3c_lstm_ga_default shared_model = model_type(args, action_space=action_space, config=config) # shared_model = model_type(config, constants) # make the shared model use share memory shared_model.share_memory() lstm_size = 256 if isinstance(shared_model, a3c_lstm_ga_concat_gavector): lstm_size *= 3 model = ChaplotBaselineHouse(args, shared_model, config, constants, tensorboard=None, use_contextual_bandit=False, lstm_size=lstm_size) master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset tune_split, train_split = [], [] for hid in house_ids: all_train_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_train.json", config) all_dev_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config) train_split.append(all_train_data) tune_split.append(all_dev_data) master_logger.log("Created train dataset of size {} ".format(len(all_train_data))) master_logger.log("Created tuning dataset of size {} ".format(len(all_dev_data))) # Start the training thread(s) ports = find_k_ports(num_processes) master_logger.log("Found K Ports") processes = [] for i, port in enumerate(ports): train_chunk = train_split[i] print("Size of training data: {}".format(len(train_chunk))) tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port tmp_tune_split = tune_split[i] print("Client " + str(house_ids[i]) + " getting a validation set of size ", len(tmp_tune_split)) server = HouseServer(tmp_config, action_space, port) client_logger = multiprocess_logging_manager.get_logger(i) # Run the Training p = mp.Process(target=ChaplotBaselineHouse.do_train, args=(house_ids[i], model, shared_model, tmp_config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type, vocab, args, False, lstm_size)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: # server.kill() exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "debug_oracle_trajectory" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) try: # Read the dataset train_split = DatasetParser.parse( "data/streetview/navigation_train.json", config) server = StreetViewServer(config, action_space, forward_setting_strict=False) for data_point_ix, data_point in enumerate(train_split): _, metadata = server.reset_receive_feedback(data_point) trajectory = server.get_trajectory_exact(data_point.trajectory) trajectory = trajectory[:min(len(trajectory), constants["horizon"] )] traj_node_ids = [ server.fsa.panorama_to_node_dict[pano_id] for pano_id in data_point.trajectory ] total_reward = 0 master_logger.log("Route ID: %r " % traj_node_ids) node_ix = 0 for action in trajectory: route_id = traj_node_ids[node_ix] _, reward, metadata = server.send_action_receive_feedback( action) total_reward += reward master_logger.log("Reward %r, Action %r, Metadata %r" % (reward, action, metadata)) # current node id should be either same or next if route_id != metadata["panorama_id"]: # hopefully updated if node_ix >= len(traj_node_ids) - 1: master_logger.log( "Failed. Went to a node beyond the trajectory") raise AssertionError() elif traj_node_ids[node_ix + 1] != metadata["panorama_id"]: master_logger.log( "Supposed to go to %r but went to %r " % (traj_node_ids[node_ix + 1], metadata["panorama_id"])) raise AssertionError() else: node_ix += 1 _, reward, metadata = server.halt_and_receive_feedback() total_reward += reward master_logger.log("Reward %r, Action stop, Metadata %r" % (reward, metadata)) master_logger.log("Total reward %r, Nav Error %r " % (total_reward, metadata["navigation_error"])) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "lani-asynchronous-training" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/nav_drone/config_localmoves_6000.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = NavDroneSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() # Number of processes num_processes = 6 try: # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelOracleGoldProb shared_model = model_type(config, constants) # Initialize the model using random weights or from a file shared_model.init_weights() # shared_model.load_saved_model( # "./results/model-folder-name/contextual_bandit_5_epoch_19") # Make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset all_train_data = DatasetParser.parse( "data/nav_drone/train_annotations_6000.json", config) num_train = (len(all_train_data) * 19) // 20 while all_train_data[num_train].get_scene_name().split("_")[1] \ == all_train_data[num_train - 1].get_scene_name().split("_")[1]: num_train += 1 train_split = all_train_data[:num_train] tune_split = all_train_data[num_train:] master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) processes = [] # The simulator file is used to launch the client simulator_file = "./simulators/NavDroneLinuxBuild.x86_64" # Split the train data between processes train_split_process_chunks = [] chunk_size = int(len(train_split) / num_processes) pad = 0 for i in range(0, num_processes): chunk = train_split[pad:pad + chunk_size] pad += chunk_size train_split_process_chunks.append(chunk) # Start the training thread(s) ports = find_k_ports(num_processes) for i, port in enumerate(ports): train_chunk = train_split_process_chunks[i] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port if i == num_processes - 1: # Master client which does testing. Don't want each client to do testing. tmp_tune_split = tune_split else: tmp_tune_split = [] print("Client " + str(i) + " getting a validation set of size ", len(tmp_tune_split)) server = NavDroneServerPy3(tmp_config, action_space, multi_client=True) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=AsynchronousContextualBandit.do_train, args=(simulator_file, shared_model, tmp_config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "blocks_save_image-test" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config,indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 # Number of processes num_processes = 6 try: # create tensorboard tensorboard = None # Tensorboard(experiment_name) # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelEmnlp shared_model = model_type(config, constants) # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset all_train_data = DatasetParser.parse("testset.json", config) tune_split = [] # all_train_data[:num_tune] train_split = list(all_train_data[:]) master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) # Start the training thread(s) ports = find_k_ports(num_processes) tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = ports[0] server = BlocksServer(tmp_config, action_space) launch_k_unity_builds([ports[0]], "./simulators/blocks/retro_linux_build.x86_64") server.initialize_server() # Create a local model for rollouts local_model = model_type(config, constants) # Create the Agent tmp_agent = TmpBlockAgent(server=server, model=local_model, test_policy=None, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) tmp_agent.save_numpy_image(all_train_data, vocab, "test") except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): data_filename = "simulators/house/AssetsHouse" experiment_name = "house_unet_cb_navigation_gold_goal_no_RNN" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) # Validate the setting setup_validator = HouseSetupValidator() setup_validator.validate(config, constants) # Log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() # vocab[token] = i vocab[i] = token # vocab["$UNK$"] = len(vocab_list) vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 # Number of processes house_ids = [1, 2, 3, 4, 5] num_processes = len(house_ids) try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpHouseMisraBaseline #TmpHouseIncrementalModelOracleGoldProb shared_model = model_type(config, constants, use_image=False) # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3") # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset tune_split, train_split = [], [] for hid in house_ids: all_train_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_train.json", config) all_dev_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config) # num_tune = int(len(all_train_data) * 0.1) # train_split.append(list(all_train_data[num_tune:])) # tune_split.append(list(all_train_data[:num_tune])) # Extract type of the dataset # lines = open("./simulators/house/datapoint_type_house" + str(hid) + "_v5_110.txt").readlines() # datapoint_id_type = {} # for line in lines: # datapoint_id, datapoint_type = line.split() # datapoint_id_type[int(datapoint_id)] = datapoint_type.strip() # Filter manipulation type # all_train_data = list(filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_train_data)) train_split.append(all_train_data) tune_split.append(all_dev_data) # train_split.append(all_train_data) # tune_split.append(all_dev_data) processes = [] # Start the training thread(s) ports = find_k_ports(num_processes) master_logger.log("Found K Ports") for i, port in enumerate(ports): train_chunk = train_split[i] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port tmp_tune_split = tune_split[i] print("Client " + str(i) + " getting a validation set of size ", len(tmp_tune_split)) server = HouseServer(tmp_config, action_space, port) master_logger.log("Server Initialized") client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=TmpAsynchronousContextualBandit.do_train, args=(house_ids[i], shared_model, tmp_config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type, vocab)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--num_processes", default=6, type=int, help="number of policy search (PS) processes to be launched at a given time") parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction") parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction") parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model") parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)") parser.add_argument("--name", default="neurips", help="Name of the experiment") parser.add_argument("--horizon", default="1", type=int, help="Horizon") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") args = parser.parse_args() env_name = args.env num_processes = args.num_processes exp_name = args.name experiment_name = "ppo-%s-model-%s-horizon-%d-%s" % (exp_name, args.model, args.horizon, env_name) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. config["horizon"] = args.horizon GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) constants["model_type"] = args.model print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") # Create the environment env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") print("Created Environment...") homing_policy_validation_fn = env.generate_homing_policy_validation_fn() performance = [] for attempt in range(1, 6): master_logger.log("========= STARTING EXPERIMENT %d ======== " % attempt) num_samples_half_regret = ppo_feature(experiment, env, config, constants, master_logger, use_pushover=False, debug=False) performance.append(num_samples_half_regret) master_logger.log("Median Performance %r. All performance %r" % (statistics.median(performance), performance)) print("All performance ", performance) print("Median performance ",statistics.median(performance))
def main(): experiment_name = "blocks_experiments" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 # Test policy test_policy = gp.get_argmax_action # Create tensorboard tensorboard = Tensorboard("Agent Test") try: # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelEmnlp shared_model = model_type(config, constants) shared_model.load_saved_model( "./results/model-folder-name/model-file-name") # Read the dataset test_data = DatasetParser.parse("devset.json", config) master_logger.log("Created test dataset of size %d " % len(test_data)) # Create server and launch a client simulator_file = "./simulators/blocks/retro_linux_build.x86_64" config["port"] = find_k_ports(1)[0] server = BlocksServer(config, action_space, vocab=vocab) # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Create the agent master_logger.log("CREATING AGENT") agent = Agent(server=server, model=shared_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) agent.test(test_data, tensorboard) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): data_filename = "simulators/house/AssetsHouse" experiment_name = "emnlp-camera-ready-figure-plot" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline_inferred_oos.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) # Validate the setting setup_validator = HouseSetupValidator() setup_validator.validate(config, constants) # Log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() # vocab[token] = i vocab[i] = token # vocab["$UNK$"] = len(vocab_list) vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 # Number of processes house_ids = [1, 2, 3, 4, 5] num_processes = len(house_ids) try: master_logger.log("CREATING MODEL") # Create the goal prediction model # shared_goal_prediction_model = IncrementalModelAttentionChaplotResNet( # config, constants, final_model_type="m4jksum1", final_dimension=(64, 32, 32 * 6)) shared_goal_prediction_model = IncrementalModelAttentionChaplotResNet( config, constants, final_model_type="unet-positional-encoding", final_dimension=(64, 32, 32 * 6)) shared_goal_prediction_model.load_saved_model( "./results/house_goal_prediction/goal_prediction_single_supervised_epoch_4") # shared_goal_prediction_model.load_saved_model( # "./results/train_house_goal_prediction_m4jksum1_repeat/goal_prediction_single_supervised_epoch_4") # shared_goal_prediction_model.load_saved_model( # "./results/train_house_two_stage_model/predictor_contextual_bandit_2_epoch_2") # shared_goal_prediction_model.load_saved_model( # "./results/train_house_goal_prediction_dummy_token/goal_prediction_single_supervised_epoch_9") # Create the navigation model model_type = TmpHouseIncrementalModelOracleGoldProb # TmpHouseIncrementalModelChaplot shared_navigator_model = model_type(config, constants, use_image=False) # shared_navigator_model.load_saved_model( # "./results/train_house_two_stage_model/navigator_contextual_bandit_2_epoch_2") shared_navigator_model.load_saved_model( "./results/house_unet_cb_navigation_gold_goal/contextual_bandit_0_epoch_5") # shared_navigator_model.load_saved_model( # "./results/house_unet_cb_navigation_gold_goal_no_RNN/contextual_bandit_0_epoch_10") # Create the action type model shared_action_type_model = ActionTypeModule() shared_action_type_model.cuda() shared_action_type_model.load_state_dict( torch.load("./results/train_house_action_types/goal_prediction_single_supervised_epoch_7")) # make the shared models use share memory shared_goal_prediction_model.share_memory() shared_navigator_model.share_memory() shared_action_type_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset test_split = [] for hid in house_ids: all_test_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config) # # Extract type of the dataset # lines = open("./simulators/house/datapoint_type_house" + str(hid) + ".txt").readlines() # datapoint_id_type = {} # for line in lines: # words = line.split() # datapoint_id, datapoint_type = words[0], words[1:] # datapoint_id_type[int(datapoint_id)] = datapoint_type # .strip() # # # Filter manipulation type # all_test_data = list(filter(lambda datapoint: "manipulation" not in datapoint_id_type[datapoint.get_id()], # all_test_data)) test_split.append(all_test_data) processes = [] # Start the training thread(s) ports = find_k_ports(num_processes) master_logger.log("Found K Ports") for i, port in enumerate(ports): test_chunk = test_split[i] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port print("Client " + str(i) + " getting a test set of size ", len(test_chunk)) server = HouseServer(tmp_config, action_space, port) master_logger.log("Server Initialized") client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=HouseDecoupledPredictorNavigatorAgent.do_test, args=(house_ids[i], shared_goal_prediction_model, shared_navigator_model, shared_action_type_model, tmp_config, action_space, meta_data_util, constants, test_chunk, experiment_name, i, server, client_logger, vocab, "inferred")) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): data_filename = "./simulators/house/AssetsHouse" experiment_name = "emnlp_camera_ready_test_human_performance" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Number of processes house_id = 3 # Define log settings log_path = experiment + '/test_baseline_%d.log' % house_id multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) # TODO: HouseSetupValidator() # setup_validator = BlocksSetupValidator() # setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") config["use_manipulation"] = True # debug manipulation action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[i] = token vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpHouseIncrementalModelChaplot shared_model = model_type(config, constants) # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3") # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset test_split = DatasetParser.parse( data_filename + "/tokenized_house" + str(house_id) + "_discrete_dev.json", config) test_split = test_split[2:20] # Launch the server ports = find_k_ports(1) port = ports[0] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port print("Client " + str(0) + " getting a validation set of size ", len(test_split)) server = HouseServer(tmp_config, action_space, port) launch_k_unity_builds([tmp_config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") server.initialize_server() # Create a local model for rollouts local_model = model_type(tmp_config, constants) # local_model.train() # Create the Agent print("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=None, action_space=action_space, meta_data_util=meta_data_util, config=tmp_config, constants=constants) print("Created Agent...") tmp_agent.test_human_performance(test_split, vocab, master_logger) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "test_block_baselines" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config,indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 try: # Read the dataset if args.split == "train": test_data = DatasetParser.parse("trainset.json", config) elif args.split == "dev": test_data = DatasetParser.parse("devset.json", config) elif args.split == "test": test_data = DatasetParser.parse("testset.json", config) else: raise AssertionError("Unhandled dataset split %r. Only support train, dev and test." % args.split) master_logger.log("Created test dataset of size %d " % len(test_data)) # Create server and launch a client simulator_file = "./simulators/blocks/retro_linux_build.x86_64" config["port"] = find_k_ports(1)[0] server = BlocksServer(config, action_space, vocab=vocab) # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Create the agent master_logger.log("CREATING AGENT") if args.baseline == "stop": agent_type = Agent.STOP elif args.baseline == "random": agent_type = Agent.RANDOM_WALK elif args.baseline == "frequent": agent_type = Agent.MOST_FREQUENT # TODO compute most frequent action from the dataset else: raise AssertionError("Unhandled agent type %r. Only support stop, random and frequent." % args.baseline) agent = Agent(agent_type=agent_type, server=server, action_space=action_space, meta_data_util=meta_data_util, constants=constants) agent.test(test_data) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--model", default='gumbeldouble', help="Model for training the encoding function") parser.add_argument("--name", default="debug-encoder", help="Name of the experiment") parser.add_argument("--horizon", default=-1, type=int, help="Horizon") parser.add_argument("--samples", default=-1, type=int, help="Samples") parser.add_argument("--learn_type", default="vanilla", type=str, help="Either vanilla, coordinate, transfer") args = parser.parse_args() env_name = args.env exp_name = args.name with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.horizon != -1: config["horizon"] = args.horizon config["encoder_training_type"] = args.learn_type GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) if args.samples != -1: constants["encoder_training_num_samples"] = args.samples constants["model_type"] = args.model print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # Create file experiment_name = "%s-model-%s-horizon-%d-samples-%d-%s" % ( exp_name, args.model, config["horizon"], constants["encoder_training_num_samples"], env_name) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") # performance = [] num_runs = 100 for attempt in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % attempt) p = mp.Process(target=DebugTrainEncodingFunction.do_train, args=(config, constants, env_name, experiment_name, master_logger, False, True)) p.daemon = False p.start() p.join() # for key in performance[0]: # Assumes the keys are same across all runes # results = [result[key] for result in performance] # master_logger.log("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % # (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), # num_runs, results)) # print("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % # (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), # num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--name", default="run-psdp", help="Name of the experiment") parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction") parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction") parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model") parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)") parser.add_argument("--load", help="Name of the result folder containing homing policies and environment") parser.add_argument("--train_eps", type=int, help="Number of training episodes used for learning the policy set") parser.add_argument("--noise", default=None, type=str, help="Noise") parser.add_argument("--save_trace", default="False", help="Save traces") parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") args = parser.parse_args() env_name = args.env exp_name = args.name load_folder = args.load experiment_name = "%s-%s-model-%s-noise-%s" % (exp_name, env_name, args.model, args.noise) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.noise is not None: config["noise"] = args.noise config["save_trace"] = args.save_trace == "True" config["trace_sample_rate"] = args.trace_sample_rate config["save_path"] = args.save_path config["exp_name"] = experiment_name config["policy_type"] = args.policy_type GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) constants["model_type"] = args.model print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") performance = [] num_runs = 5 for trial in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial) # Create a new environment print("Created Environment...") env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") # Load the environment env_folder = load_folder + "/trial_%d_env" % trial env_folders = [join(env_folder, f) for f in listdir(env_folder) if isdir(join(env_folder, f))] assert len(env_folders) == 1, "Found more than environment. Specify the folder manually %r" % env_folders env.load_environment_from_folder(env_folders[0]) master_logger.log("Loaded Environment from %r" % env_folders[0]) # Fix config to match the env. # TODO implement the next block of code in a scalable manner config["horizon"] = env.env.horizon config["obs_dim"] = -1 GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) master_logger.log("Environment horizon %r, Observation dimension %r" % (config["horizon"], config["obs_dim"])) learning_alg = DistributedHomerAlgorithm(config, constants) policy_result = learning_alg.train_from_learned_homing_policies(env=env, load_folder=load_folder, train_episodes=args.train_eps, experiment_name=experiment_name, logger=master_logger, use_pushover=False, trial=trial) performance.append(policy_result) for key in performance[0]: # Assumes the keys are same across all runes results = [result[key] for result in performance] master_logger.log("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) print("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()
def main(): # Learning algorithm parser = argparse.ArgumentParser(description='Parser Values') parser.add_argument('--name', type=str, help='name of the experiment') parser.add_argument('--num_processes', type=int, default=6, help='num of process') parser.add_argument('--split', type=str, help='learning alg ("train", "dev", "test")') parser.add_argument('--model', type=str, help='model ("chaplot", "concat")') parser.add_argument('--learning_alg', type=str, help='learning alg ("cb", "sup", "mix"') args = parser.parse_args() experiment_name = args.name experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline_%s.log' % args.split multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() num_processes = args.num_processes model_name = args.model data_split = args.split learning_alg = args.learning_alg # Number of processes master_logger.log("Num processes %r, Model %r, Alg %r, Split %r " % (num_processes, model_name, learning_alg, data_split)) try: # Create the model master_logger.log("CREATING MODEL") if model_name == "concat": model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork elif model_name == "chaplot": model_type = IncrementalModelChaplot else: raise AssertionError("Model name not known. %r " % model_name) shared_model = model_type(config, constants) shared_model.init_weights() if model_name == "concat": if learning_alg == "sup": shared_model.load_saved_model( "./results/train-streetview-deepmind-supervised-learning/supervised_learning0_epoch_13" ) elif learning_alg == "cb": shared_model.load_saved_model( "./results/train-streetview-deepmind-cb/contextual_bandit_0_epoch_38" ) elif learning_alg == "mix": shared_model.load_saved_model( "./results/train-streetview-deepmind-mixture-algorithm/supervised_learning5_epoch_54" ) else: raise AssertionError("Unregistered learning algorithm %r " % learning_alg) elif model_name == "chaplot": if learning_alg == "sup": shared_model.load_saved_model( "./results/train-streetview-chaplot-supervised-learning/supervised_learning0_epoch_36" ) elif learning_alg == "cb": shared_model.load_saved_model( "./results/train-streetview-chaplot-cb/contextual_bandit_0_epoch_66" ) elif learning_alg == "mix": shared_model.load_saved_model( "./results/train-streetview-chaplot-mixture-repeat2/contextual_bandit_0_epoch_34" ) else: raise AssertionError("Unregistered learning algorithm %r " % learning_alg) else: raise AssertionError("Unregistered model %r " % model_name) # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset test_split = DatasetParser.parse( "data/streetview/navigation_%s.json" % data_split, config) master_logger.log("Created tuning dataset of size %d " % len(test_split)) processes = [] # Split the train data between processes test_split_process_chunks = [] tune_chunk_size = int(len(test_split) / num_processes) tune_pad = 0 for i in range(0, num_processes): if i < num_processes - 1: test_split_process_chunks.append(test_split[tune_pad:tune_pad + tune_chunk_size]) else: test_split_process_chunks.append(test_split[tune_pad:]) tune_pad += tune_chunk_size assert sum([ len(chunk) for chunk in test_split_process_chunks ]) == len(test_split), "Test dataset not properly partitioned." # Start the training thread(s) for i in range(0, num_processes): test_chunk = test_split_process_chunks[i] print("Client " + str(i) + " getting a test set of size ", len(test_chunk)) server = StreetViewServer(config, action_space, forward_setting_strict=False) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process( target=TmpStreetViewAsynchronousContextualBandit.do_test, args=(shared_model, config, action_space, meta_data_util, constants, test_chunk, experiment_name, i, server, client_logger, model_type)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): data_filename = "./simulators/house/AssetsHouse" experiment_name = "tmp_house_1_debug_manual_control" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) # TODO: HouseSetupValidator() # setup_validator = BlocksSetupValidator() # setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") config["use_manipulation"] = True # debug manipulation action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() # vocab[token] = i vocab[i] = token # vocab["$UNK$"] = len(vocab_list) vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 # Number of processes house_ids = [1] # [1,2,3] num_processes = len(house_ids) try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpHouseIncrementalModelChaplot shared_model = model_type(config, constants) # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3") # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset tune_split, train_split = [], [] for hid in house_ids: all_train_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete.json", config) all_dev_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config) # num_tune = int(len(all_train_data) * 0.1) # train_split.append(list(all_train_data[num_tune:])) # tune_split.append(list(all_train_data[:num_tune])) # Extract type of the dataset # lines = open("./simulators/house/datapoint_type_house" + str(hid) + "_v5_110.txt").readlines() # datapoint_id_type = {} # for line in lines: # datapoint_id, datapoint_type = line.split() # datapoint_id_type[int(datapoint_id)] = datapoint_type.strip() # # # Filter manipulation type # all_train_data = list( # filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_train_data)) all_train_data = all_train_data[0:50] train_split.append(all_train_data) tune_split.append(all_train_data) # train_split.append(all_train_data) # tune_split.append(all_dev_data) # Launch the server ports = find_k_ports(1) port = ports[0] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port tmp_tune_split = tune_split[0] print("Client " + str(0) + " getting a validation set of size ", len(tmp_tune_split)) server = HouseServer(tmp_config, action_space, port) house_id = house_ids[0] launch_k_unity_builds([tmp_config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") server.initialize_server() # Create a local model for rollouts local_model = model_type(tmp_config, constants) # local_model.train() # Create the Agent print("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=None, action_space=action_space, meta_data_util=meta_data_util, config=tmp_config, constants=constants) print("Created Agent...") index = 0 while True: print("Giving another data %r ", len(train_split[0])) # index = random.randint(0, len(train_split[0]) - 1) index = (index + 1) % len(train_split[0]) print("Dataset id is " + str(train_split[0][index].get_id())) tmp_agent.debug_manual_control(train_split[0][index], vocab) # tmp_agent.debug_tracking(train_split[0][index], vocab) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): data_filename = "./simulators/house/AssetsHouse" experiment_name = "house_test_most_frequent_test_dataset" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) setup_validator = HouseSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() # vocab[token] = i vocab[i] = token # vocab["$UNK$"] = len(vocab_list) vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 # Number of processes house_ids = [1, 2, 3, 4, 5] num_processes = len(house_ids) try: # Read the dataset test_data = [] for hid in house_ids: all_dev_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_test.json", config) # Extract type of the dataset # lines = open("./datapoint_type_house" + str(hid) + ".txt").readlines() # datapoint_id_type = {} # for line in lines: # datapoint_id, datapoint_type = line.split() # datapoint_id_type[int(datapoint_id)] = datapoint_type.strip() # Filter manipulation type # all_dev_data = filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_dev_data) test_data.append(list(all_dev_data)) processes = [] # Start the testing thread(s) ports = find_k_ports(num_processes) master_logger.log("Found K Ports") for i, port in enumerate(ports): test_chunk = test_data[i] # Simulator i runs house i and uses the dataset for house i tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port print("Client " + str(i) + " getting test set of size ", len(test_chunk)) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=Agent.test_multiprocess, args=(house_ids[i], test_chunk, tmp_config, action_space, port, Agent.MOST_FREQUENT, meta_data_util, constants, vocab, client_logger, None)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "train-streetview-deepmind-mixture-learning-repeat" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/streetview/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = StreetViewSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() # Learning algorithm parser = argparse.ArgumentParser(description='Parser Values') parser.add_argument('--num_processes', type=int, default=6, help='num of process') parser.add_argument('--learning_alg', type=str, default="cb", help='learning alg ("cb", "sup", "mix"') args = parser.parse_args() num_processes = args.num_processes learning_alg = args.learning_alg master_logger.log("Num processes %r, Learning Algorithm %r " % (num_processes, learning_alg)) try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork # model_type = TmpStreetviewIncrementalModelConcatRecurrentPolicyNetwork # model_type = IncrementalModelChaplot shared_model = model_type(config, constants) shared_model.init_weights() # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset train_split = DatasetParser.parse( "data/streetview/navigation_train.json", config) tune_split = DatasetParser.parse("data/streetview/navigation_dev.json", config) master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) processes = [] # Split the train data between processes train_split_process_chunks = [] tune_split_process_chunks = [] train_chunk_size = int(len(train_split) / num_processes) tune_chunk_size = int(len(tune_split) / num_processes) train_pad = 0 tune_pad = 0 for i in range(0, num_processes): train_split_process_chunks.append(train_split[train_pad:train_pad + train_chunk_size]) tune_split_process_chunks.append(tune_split[tune_pad:tune_pad + tune_chunk_size]) train_pad += train_chunk_size tune_pad += tune_chunk_size # Start the training thread(s) for i in range(0, num_processes): train_chunk = train_split_process_chunks[i] if i == num_processes - 1: # Don't want each client to do testing. tmp_tune_split = tune_split_process_chunks[i] else: tmp_tune_split = tune_split_process_chunks[i] print("Client " + str(i) + " getting a validation set of size ", len(tmp_tune_split)) server = StreetViewServer(config, action_space, forward_setting_strict=False) client_logger = multiprocess_logging_manager.get_logger(i) if learning_alg == "cb" or (learning_alg == "mix" and i < num_processes - 2): p = mp.Process( target=TmpStreetViewAsynchronousContextualBandit.do_train, args=(shared_model, config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type)) elif learning_alg == "sup" or (learning_alg == "mix" and i >= num_processes - 2): p = mp.Process( target=TmpStreetViewAsynchronousSupervisedLearning. do_train, args=(shared_model, config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type)) else: raise NotImplementedError() p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='diabcombolock', help="name of the environment e.g., montezuma") parser.add_argument( "--num_processes", default=6, type=int, help= "number of policy search (PS) processes to be launched at a given time" ) parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction") parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction") parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model") parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)") parser.add_argument("--name", default="run-homer", help="Name of the experiment") parser.add_argument("--horizon", default=-1, type=int, help="Horizon") parser.add_argument("--samples", default=-1, type=int, help="Samples") parser.add_argument("--env_seed", default=None, type=int, help="Environment Seed") parser.add_argument("--noise", default=None, type=str, help="Noise") parser.add_argument("--save_trace", default="False", help="Save traces") parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") parser.add_argument("--debug", default="False", help="Debug the run") parser.add_argument("--pushover", default="False", help="Use pushover to send results on phone") args = parser.parse_args() env_name = args.env num_processes = args.num_processes exp_name = args.name experiment_name = "%s-%s-model-%s-horizon-%d-samples-%d-noise-%s" % \ (exp_name, env_name, args.backwardmodel, args.horizon, args.samples, args.noise) experiment = "%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) # Read configuration and constant files. Configuration contain environment information and # constant file contains hyperparameters for the model and learning algorithm. with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.horizon != -1: config["horizon"] = args.horizon if args.noise is not None: config["noise"] = args.noise config["save_trace"] = args.save_trace == "True" config["trace_sample_rate"] = args.trace_sample_rate config["save_path"] = experiment config["exp_name"] = experiment_name config["env_seed"] = args.env_seed config["policy_type"] = args.policy_type GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) if args.samples != -1: constants["encoder_training_num_samples"] = args.samples constants["forward_model_type"] = args.forwardmodel constants["backward_model_type"] = args.backwardmodel constants["discretization"] = args.discretization == "True" print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") performance = [] num_runs = 1 for trial in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial) # Create a new environment env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") print("Created Environment...") # Save the environment for reproducibility env.save_environment(experiment, trial_name=trial) print("Saving Environment...") homing_policy_validation_fn = env.generate_homing_policy_validation_fn( ) learning_alg = DistributedHomerAlgorithm(config, constants) policy_result = learning_alg.train( experiment=experiment, env=env, env_name=env_name, num_processes=num_processes, experiment_name=experiment_name, logger=master_logger, use_pushover=args.pushover == "True", debug=args.debug == "True", homing_policy_validation_fn=homing_policy_validation_fn, trial=trial, do_reward_sensitive_learning=True) performance.append(policy_result) for key in performance[0]: # Assumes the keys are same across all runes results = [result[key] for result in performance] if len(results) <= 1: stdev = 0.0 else: stdev = statistics.stdev(results) master_logger.log( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), stdev, num_runs, results)) print( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), stdev, num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()
def main(): experiment_name = "blocks_experiments" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 # Number of processes num_processes = 6 try: # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelEmnlp shared_model = model_type(config, constants) # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset all_train_data = DatasetParser.parse("trainset.json", config) num_train = int(0.8 * len(all_train_data)) train_split = all_train_data[:num_train] tune_split = list(all_train_data[num_train:]) shuffle(train_split) # shuffle the split to break ties master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning/validation dataset of size %d " % len(tune_split)) processes = [] # Split the train data between processes train_split_process_chunks = [] chunk_size = int(len(train_split) / num_processes) pad = 0 for i in range(0, num_processes): chunk = train_split[pad:pad + chunk_size] pad += chunk_size train_split_process_chunks.append(chunk) simulator_file = "./simulators/blocks/retro_linux_build.x86_64" # Start the training thread(s) ports = find_k_ports(num_processes) for i, port in enumerate(ports): train_chunk = train_split_process_chunks[i] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port if i == num_processes - 1: # Master client which does testing. Don't want each client to do testing. tmp_tune_split = tune_split else: tmp_tune_split = [] print("Client " + str(i) + " getting a validation set of size ", len(tmp_tune_split)) server = BlocksServer(tmp_config, action_space, vocab=vocab) client_logger = multiprocess_logging_manager.get_logger(i) p = mp.Process(target=AsynchronousContextualBandit.do_train, args=(simulator_file, shared_model, tmp_config, action_space, meta_data_util, constants, train_chunk, tmp_tune_split, experiment, experiment_name, i, server, client_logger, model_type)) p.daemon = False p.start() processes.append(p) for p in processes: p.join() except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): parser = argparse.ArgumentParser(description='du_baselines Experiments') parser.add_argument("--env", default='diabcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--name", default="run-du-baselines", help="Name of the experiment") parser.add_argument("--horizon", default=-1, type=int, help="Horizon") parser.add_argument("--noise", default=None, type=str, help="Noise") parser.add_argument("--save_trace", default="False", help="Save traces") parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") parser.add_argument("--debug", default="False", help="Debug the run") parser.add_argument("--pushover", default="False", help="Use pushover to send results on phone") # Options for Du Baselines parser.add_argument('--seed', type=int, default=367, metavar='N', help='random seed (default: 367)') parser.add_argument('--episodes', type=int, default=10000000, help='Training Episodes') parser.add_argument('--alg', type=str, default='decoding', help='Learning Algorithm', choices=["oracleq", "decoding", "qlearning"]) parser.add_argument('--model_type', type=str, default='linear', help='What model class for function approximation', choices=['linear', 'nn']) parser.add_argument('--lr', type=float, help='Learning Rate for optimization-based algorithms', default=3e-2) parser.add_argument('--epsfrac', type=float, help='Exploration fraction for Baseline DQN.', default=0.1) parser.add_argument('--conf', type=float, help='Exploration Bonus Parameter for Oracle Q.', default=3e-2) parser.add_argument( '--n', type=int, default=200, help="Data collection parameter for decoding algoithm.") parser.add_argument( '--num_cluster', type=int, default=3, help="Num of hidden state parameter for decoding algoithm.") args = parser.parse_args() env_name = args.env exp_name = args.name experiment_name = "%s-%s-model-%s-horizon-%d-samples-%d-noise-%s" % \ (exp_name, env_name, args.model_type, args.horizon, args.episodes, args.noise) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) # Read configuration and constant files. Configuration contain environment information and # constant file contains hyperparameters for the model and learning algorithm. with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.horizon != -1: config["horizon"] = args.horizon if args.noise is not None: config["noise"] = args.noise config["save_trace"] = args.save_trace == "True" config["trace_sample_rate"] = args.trace_sample_rate config["save_path"] = args.save_path config["exp_name"] = experiment_name GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") performance = [] num_runs = 5 for trial in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial) random.seed(args.seed + trial * 29) np.random.seed(args.seed + trial * 29) torch.manual_seed(args.seed + trial * 37) # Create a new environment env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") print("Created Environment...") # Save the environment for reproducibility env.save_environment(experiment, trial_name=trial) print("Saving Environment...") learning_alg = du_baseline.get_alg(args, config) policy_result = du_baseline.train(env, learning_alg, args, master_logger) performance.append(policy_result) for key in performance[0]: # Assumes the keys are same across all runes results = [result[key] for result in performance] master_logger.log( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) print( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()