def main():

    experiment_name = "train_a3c_ga_chaplot_baseline_streetview"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    parser = argparse.ArgumentParser(description='Gated-Attention for Grounding')

    # Environment arguments
    parser.add_argument('-l', '--max-episode-length', type=int, default=50,
                        help='maximum length of an episode (default: 40)')
    parser.add_argument('-d', '--difficulty', type=str, default="hard",
                        help="""Difficulty of the environment,
                        "easy", "medium" or "hard" (default: hard)""")
    parser.add_argument('--living-reward', type=float, default=0,
                        help="""Default reward at each time step (default: 0,
                        change to -0.005 to encourage shorter paths)""")
    parser.add_argument('--frame-width', type=int, default=300,
                        help='Frame width (default: 300)')
    parser.add_argument('--frame-height', type=int, default=168,
                        help='Frame height (default: 168)')
    parser.add_argument('-v', '--visualize', type=int, default=0,
                        help="""Visualize the envrionment (default: 0,
                        use 0 for faster training)""")
    parser.add_argument('--sleep', type=float, default=0,
                        help="""Sleep between frames for better
                        visualization (default: 0)""")
    parser.add_argument('--scenario-path', type=str, default="maps/room.wad",
                        help="""Doom scenario file to load
                        (default: maps/room.wad)""")
    parser.add_argument('--interactive', type=int, default=0,
                        help="""Interactive mode enables human to play
                        (default: 0)""")
    parser.add_argument('--all-instr-file', type=str,
                        default="data/instructions_all.json",
                        help="""All instructions file
                        (default: data/instructions_all.json)""")
    parser.add_argument('--train-instr-file', type=str,
                        default="data/instructions_train.json",
                        help="""Train instructions file
                        (default: data/instructions_train.json)""")
    parser.add_argument('--test-instr-file', type=str,
                        default="data/instructions_test.json",
                        help="""Test instructions file
                        (default: data/instructions_test.json)""")
    parser.add_argument('--object-size-file', type=str,
                        default="data/object_sizes.txt",
                        help='Object size file (default: data/object_sizes.txt)')

    # A3C arguments
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
                        help='discount factor for rewards (default: 0.99)')
    parser.add_argument('--tau', type=float, default=1.00, metavar='T',
                        help='parameter for GAE (default: 1.00)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('-n', '--num-processes', type=int, default=6, metavar='N',
                        help='how many training processes to use (default: 6)')
    parser.add_argument('--num-steps', type=int, default=20, metavar='NS',
                        help='number of forward steps in A3C (default: 20)')
    parser.add_argument('--load', type=str, default="0",
                        help='model path to load, 0 to not reload (default: 0)')
    parser.add_argument('-e', '--evaluate', type=int, default=0,
                        help="""0:Train, 1:Evaluate MultiTask Generalization
                        2:Evaluate Zero-shot Generalization (default: 0)""")
    parser.add_argument('--dump-location', type=str, default="./saved/",
                        help='path to dump models and log (default: ./saved/)')

    args = parser.parse_args()

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config,indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)
    args.input_size = config['vocab_size'] + 2

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    try:
        # create tensorboard
        tensorboard = None  # Tensorboard(experiment_name)

        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = a3c_lstm_ga_default
        shared_model = model_type(args, config=config, final_image_height=3, final_image_width=3)

        # make the shared model use share memory
        shared_model.share_memory()

        lstm_size = 256
        if isinstance(shared_model, a3c_lstm_ga_concat_gavector):
            lstm_size *= 3
        contextual_bandit = False
        model = ChaplotBaselineStreetView(args, shared_model, config, constants, tensorboard,
                                          use_contextual_bandit=contextual_bandit, lstm_size=lstm_size)

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        train_split = DatasetParser.parse("data/streetview/navigation_train.json", config)
        tune_split = DatasetParser.parse("data/streetview/navigation_dev.json", config)
        master_logger.log("Created train dataset of size %d " % len(train_split))
        master_logger.log("Created tuning dataset of size %d " % len(tune_split))

        processes = []

        # Split the train data between processes
        train_split_process_chunks = []
        tune_split_process_chunks = []
        train_chunk_size = int(len(train_split) / args.num_processes)
        tune_chunk_size = int(len(tune_split) / args.num_processes)
        train_pad = 0
        tune_pad = 0
        for i in range(0, args.num_processes):
            train_split_process_chunks.append(train_split[train_pad: train_pad + train_chunk_size])
            tune_split_process_chunks.append(tune_split[tune_pad: tune_pad + tune_chunk_size])
            train_pad += train_chunk_size
            tune_pad += tune_chunk_size

        # Start the training thread(s)
        for i in range(args.num_processes):
            train_chunk = train_split_process_chunks[i]
            tune_chunk = tune_split_process_chunks[i]
            print ("Client " + str(i) + " receives train-split of size %d and tune-split of size %d " %
                   (len(train_chunk), len(tune_chunk)))
            server = StreetViewServer(config, action_space, forward_setting_strict=False)
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(target=ChaplotBaselineStreetView.do_train, args=(model, shared_model, config,
                                                                            action_space, meta_data_util, args,
                                                                            constants, train_chunk, tune_chunk,
                                                                            experiment, experiment_name, i, server,
                                                                            client_logger, model_type,
                                                                            contextual_bandit))

            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #2
0
def main(args):

    experiment_name = "train_house_chaplot_house_baseline_postbugfix"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    data_filename = "simulators/house/AssetsHouse"

    supervised = False

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    # Test policy
    test_policy = gp.get_argmax_action

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    setup_validator = HouseSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"],
                               config["num_manipulation_row"], config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        # vocab[token] = i
        vocab[i] = token
    # vocab["$UNK$"] = len(vocab_list)
    vocab[len(vocab_list)] = "$UNK$"

    args.input_size = config['vocab_size'] + 2

    # Number of processes
    house_ids = [1, 2, 3, 4, 5]
    num_processes = len(house_ids)
    args.num_processes = num_processes

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = a3c_lstm_ga_default
        shared_model = model_type(args, action_space=action_space, config=config)
        # shared_model = model_type(config, constants)

        # make the shared model use share memory
        shared_model.share_memory()

        lstm_size = 256
        if isinstance(shared_model, a3c_lstm_ga_concat_gavector):
            lstm_size *= 3
        model = ChaplotBaselineHouse(args, shared_model, config, constants, tensorboard=None,
                                     use_contextual_bandit=False, lstm_size=lstm_size)

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        tune_split, train_split = [], []
        for hid in house_ids:
            all_train_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) + "_discrete_train.json", config)
            all_dev_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config)

            train_split.append(all_train_data)
            tune_split.append(all_dev_data)

            master_logger.log("Created train dataset of size {} ".format(len(all_train_data)))
            master_logger.log("Created tuning dataset of size {} ".format(len(all_dev_data)))

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        master_logger.log("Found K Ports")
        processes = []
        for i, port in enumerate(ports):
            train_chunk = train_split[i]
            print("Size of training data: {}".format(len(train_chunk)))
            tmp_config = {k: v for k, v in config.items()}
            tmp_config["port"] = port
            tmp_tune_split = tune_split[i]
            print("Client " + str(house_ids[i]) + " getting a validation set of size ", len(tmp_tune_split))
            server = HouseServer(tmp_config, action_space, port)
            client_logger = multiprocess_logging_manager.get_logger(i)

            # Run the Training
            p = mp.Process(target=ChaplotBaselineHouse.do_train, args=(house_ids[i], model, shared_model, tmp_config,
                                                                       action_space, meta_data_util,
                                                                       constants, train_chunk, tmp_tune_split,
                                                                       experiment, experiment_name, i, server,
                                                                       client_logger, model_type, vocab, args,
                                                                       False, lstm_size))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        # server.kill()
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #3
0
def main():

    experiment_name = "debug_oracle_trajectory"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])

    try:

        # Read the dataset
        train_split = DatasetParser.parse(
            "data/streetview/navigation_train.json", config)
        server = StreetViewServer(config,
                                  action_space,
                                  forward_setting_strict=False)

        for data_point_ix, data_point in enumerate(train_split):

            _, metadata = server.reset_receive_feedback(data_point)
            trajectory = server.get_trajectory_exact(data_point.trajectory)
            trajectory = trajectory[:min(len(trajectory), constants["horizon"]
                                         )]
            traj_node_ids = [
                server.fsa.panorama_to_node_dict[pano_id]
                for pano_id in data_point.trajectory
            ]
            total_reward = 0

            master_logger.log("Route ID: %r " % traj_node_ids)
            node_ix = 0

            for action in trajectory:
                route_id = traj_node_ids[node_ix]
                _, reward, metadata = server.send_action_receive_feedback(
                    action)
                total_reward += reward
                master_logger.log("Reward %r, Action %r, Metadata %r" %
                                  (reward, action, metadata))

                # current node id should be either same or next
                if route_id != metadata["panorama_id"]:  # hopefully updated
                    if node_ix >= len(traj_node_ids) - 1:
                        master_logger.log(
                            "Failed. Went to a node beyond the trajectory")
                        raise AssertionError()
                    elif traj_node_ids[node_ix + 1] != metadata["panorama_id"]:
                        master_logger.log(
                            "Supposed to go to %r but went to %r " %
                            (traj_node_ids[node_ix + 1],
                             metadata["panorama_id"]))
                        raise AssertionError()
                    else:
                        node_ix += 1

            _, reward, metadata = server.halt_and_receive_feedback()
            total_reward += reward
            master_logger.log("Reward %r, Action stop, Metadata %r" %
                              (reward, metadata))
            master_logger.log("Total reward %r, Nav Error %r " %
                              (total_reward, metadata["navigation_error"]))

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #4
0
def main():

    experiment_name = "lani-asynchronous-training"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/nav_drone/config_localmoves_6000.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = NavDroneSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    # Number of processes
    num_processes = 6

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelOracleGoldProb
        shared_model = model_type(config, constants)

        # Initialize the model using random weights or from a file
        shared_model.init_weights()
        # shared_model.load_saved_model(
        #     "./results/model-folder-name/contextual_bandit_5_epoch_19")

        # Make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        all_train_data = DatasetParser.parse(
            "data/nav_drone/train_annotations_6000.json", config)
        num_train = (len(all_train_data) * 19) // 20
        while all_train_data[num_train].get_scene_name().split("_")[1] \
                == all_train_data[num_train - 1].get_scene_name().split("_")[1]:
            num_train += 1
        train_split = all_train_data[:num_train]
        tune_split = all_train_data[num_train:]

        master_logger.log("Created train dataset of size %d " %
                          len(train_split))
        master_logger.log("Created tuning dataset of size %d " %
                          len(tune_split))

        processes = []

        # The simulator file is used to launch the client
        simulator_file = "./simulators/NavDroneLinuxBuild.x86_64"

        # Split the train data between processes
        train_split_process_chunks = []
        chunk_size = int(len(train_split) / num_processes)
        pad = 0
        for i in range(0, num_processes):
            chunk = train_split[pad:pad + chunk_size]
            pad += chunk_size
            train_split_process_chunks.append(chunk)

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        for i, port in enumerate(ports):
            train_chunk = train_split_process_chunks[i]
            tmp_config = {k: v for k, v in config.items()}
            tmp_config["port"] = port
            if i == num_processes - 1:
                # Master client which does testing. Don't want each client to do testing.
                tmp_tune_split = tune_split
            else:
                tmp_tune_split = []

            print("Client " + str(i) + " getting a validation set of size ",
                  len(tmp_tune_split))
            server = NavDroneServerPy3(tmp_config,
                                       action_space,
                                       multi_client=True)
            client_logger = multiprocess_logging_manager.get_logger(i)

            p = mp.Process(target=AsynchronousContextualBandit.do_train,
                           args=(simulator_file, shared_model, tmp_config,
                                 action_space, meta_data_util, constants,
                                 train_chunk, tmp_tune_split, experiment,
                                 experiment_name, i, server, client_logger,
                                 model_type))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #5
0
def main():

    experiment_name = "blocks_save_image-test"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config,indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    num_processes = 6

    try:
        # create tensorboard
        tensorboard = None  # Tensorboard(experiment_name)

        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelEmnlp
        shared_model = model_type(config, constants)

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        all_train_data = DatasetParser.parse("testset.json", config)
        tune_split = []  # all_train_data[:num_tune]
        train_split = list(all_train_data[:])

        master_logger.log("Created train dataset of size %d " % len(train_split))
        master_logger.log("Created tuning dataset of size %d " % len(tune_split))

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        tmp_config = {k: v for k, v in config.items()}
        tmp_config["port"] = ports[0]

        server = BlocksServer(tmp_config, action_space)
        launch_k_unity_builds([ports[0]], "./simulators/blocks/retro_linux_build.x86_64")
        server.initialize_server()

        # Create a local model for rollouts
        local_model = model_type(config, constants)

        # Create the Agent
        tmp_agent = TmpBlockAgent(server=server,
                                  model=local_model,
                                  test_policy=None,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        tmp_agent.save_numpy_image(all_train_data, vocab, "test")

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #6
0
def main():

    data_filename = "simulators/house/AssetsHouse"
    experiment_name = "house_unet_cb_navigation_gold_goal_no_RNN"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    # Validate the setting
    setup_validator = HouseSetupValidator()
    setup_validator.validate(config, constants)

    # Log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"],
                               config["use_manipulation"],
                               config["num_manipulation_row"],
                               config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        # vocab[token] = i
        vocab[i] = token
    # vocab["$UNK$"] = len(vocab_list)
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    house_ids = [1, 2, 3, 4, 5]
    num_processes = len(house_ids)

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpHouseMisraBaseline  #TmpHouseIncrementalModelOracleGoldProb
        shared_model = model_type(config, constants, use_image=False)
        # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3")

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        tune_split, train_split = [], []
        for hid in house_ids:
            all_train_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) +
                "_discrete_train.json", config)
            all_dev_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) +
                "_discrete_dev.json", config)
            # num_tune = int(len(all_train_data) * 0.1)
            # train_split.append(list(all_train_data[num_tune:]))
            # tune_split.append(list(all_train_data[:num_tune]))

            # Extract type of the dataset
            # lines = open("./simulators/house/datapoint_type_house" + str(hid) + "_v5_110.txt").readlines()
            # datapoint_id_type = {}
            # for line in lines:
            #     datapoint_id, datapoint_type = line.split()
            #     datapoint_id_type[int(datapoint_id)] = datapoint_type.strip()

            # Filter manipulation type
            # all_train_data = list(filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_train_data))

            train_split.append(all_train_data)
            tune_split.append(all_dev_data)
            # train_split.append(all_train_data)
            # tune_split.append(all_dev_data)

        processes = []

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        master_logger.log("Found K Ports")
        for i, port in enumerate(ports):
            train_chunk = train_split[i]
            tmp_config = {k: v for k, v in config.items()}
            tmp_config["port"] = port
            tmp_tune_split = tune_split[i]
            print("Client " + str(i) + " getting a validation set of size ",
                  len(tmp_tune_split))
            server = HouseServer(tmp_config, action_space, port)
            master_logger.log("Server Initialized")
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(target=TmpAsynchronousContextualBandit.do_train,
                           args=(house_ids[i], shared_model, tmp_config,
                                 action_space, meta_data_util, constants,
                                 train_chunk, tmp_tune_split, experiment,
                                 experiment_name, i, server, client_logger,
                                 model_type, vocab))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #7
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma")
    parser.add_argument("--num_processes", default=6, type=int,
                        help="number of policy search (PS) processes to be launched at a given time")
    parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction")
    parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction")
    parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model")
    parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)")
    parser.add_argument("--name", default="neurips", help="Name of the experiment")
    parser.add_argument("--horizon", default="1", type=int, help="Horizon")
    parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results")
    args = parser.parse_args()

    env_name = args.env
    num_processes = args.num_processes
    exp_name = args.name

    experiment_name = "ppo-%s-model-%s-horizon-%d-%s" % (exp_name, args.model, args.horizon, env_name)
    experiment = "./%s/%s" % (args.save_path, experiment_name)
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_homer.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name))

    with open("data/%s/config.json" % env_name) as f:
        config = json.load(f)
        # Add command line arguments. Command line arguments supersede file settings.
        config["horizon"] = args.horizon
        GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config)
    with open("data/%s/constants.json" % env_name) as f:
        constants = json.load(f)
        constants["model_type"] = args.model
    print(json.dumps(config, indent=2))

    # Validate the keys
    validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    # Create the environment
    env = GenerateEnvironmentWrapper(env_name, config)
    master_logger.log("Environment Created")
    print("Created Environment...")

    homing_policy_validation_fn = env.generate_homing_policy_validation_fn()

    performance = []
    for attempt in range(1, 6):
        master_logger.log("========= STARTING EXPERIMENT %d ======== " % attempt)

        num_samples_half_regret = ppo_feature(experiment, env, config, constants, master_logger,
                                              use_pushover=False, debug=False)
        performance.append(num_samples_half_regret)
    master_logger.log("Median Performance %r. All performance %r" % (statistics.median(performance),
                                                                     performance))
    print("All performance ", performance)
    print("Median performance ",statistics.median(performance))
예제 #8
0
def main():

    experiment_name = "blocks_experiments"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    # Test policy
    test_policy = gp.get_argmax_action

    # Create tensorboard
    tensorboard = Tensorboard("Agent Test")

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelEmnlp
        shared_model = model_type(config, constants)
        shared_model.load_saved_model(
            "./results/model-folder-name/model-file-name")

        # Read the dataset
        test_data = DatasetParser.parse("devset.json", config)
        master_logger.log("Created test dataset of size %d " % len(test_data))

        # Create server and launch a client
        simulator_file = "./simulators/blocks/retro_linux_build.x86_64"
        config["port"] = find_k_ports(1)[0]
        server = BlocksServer(config, action_space, vocab=vocab)

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Create the agent
        master_logger.log("CREATING AGENT")
        agent = Agent(server=server,
                      model=shared_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)

        agent.test(test_data, tensorboard)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #9
0
def main():

    data_filename = "simulators/house/AssetsHouse"
    experiment_name = "emnlp-camera-ready-figure-plot"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline_inferred_oos.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    # Validate the setting
    setup_validator = HouseSetupValidator()
    setup_validator.validate(config, constants)

    # Log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"],
                               config["num_manipulation_row"], config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        # vocab[token] = i
        vocab[i] = token
    # vocab["$UNK$"] = len(vocab_list)
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    house_ids = [1, 2, 3, 4, 5]
    num_processes = len(house_ids)

    try:
        master_logger.log("CREATING MODEL")

        # Create the goal prediction model
        # shared_goal_prediction_model = IncrementalModelAttentionChaplotResNet(
        #     config, constants, final_model_type="m4jksum1", final_dimension=(64, 32, 32 * 6))
        shared_goal_prediction_model = IncrementalModelAttentionChaplotResNet(
            config, constants, final_model_type="unet-positional-encoding", final_dimension=(64, 32, 32 * 6))
        shared_goal_prediction_model.load_saved_model(
            "./results/house_goal_prediction/goal_prediction_single_supervised_epoch_4")
        # shared_goal_prediction_model.load_saved_model(
        #     "./results/train_house_goal_prediction_m4jksum1_repeat/goal_prediction_single_supervised_epoch_4")
        # shared_goal_prediction_model.load_saved_model(
        #     "./results/train_house_two_stage_model/predictor_contextual_bandit_2_epoch_2")
        # shared_goal_prediction_model.load_saved_model(
        #     "./results/train_house_goal_prediction_dummy_token/goal_prediction_single_supervised_epoch_9")

        # Create the navigation model
        model_type = TmpHouseIncrementalModelOracleGoldProb  # TmpHouseIncrementalModelChaplot
        shared_navigator_model = model_type(config, constants, use_image=False)
        # shared_navigator_model.load_saved_model(
        #     "./results/train_house_two_stage_model/navigator_contextual_bandit_2_epoch_2")
        shared_navigator_model.load_saved_model(
            "./results/house_unet_cb_navigation_gold_goal/contextual_bandit_0_epoch_5")
        # shared_navigator_model.load_saved_model(
        #     "./results/house_unet_cb_navigation_gold_goal_no_RNN/contextual_bandit_0_epoch_10")

        # Create the action type model
        shared_action_type_model = ActionTypeModule()
        shared_action_type_model.cuda()
        shared_action_type_model.load_state_dict(
            torch.load("./results/train_house_action_types/goal_prediction_single_supervised_epoch_7"))

        # make the shared models use share memory
        shared_goal_prediction_model.share_memory()
        shared_navigator_model.share_memory()
        shared_action_type_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        test_split = []
        for hid in house_ids:
            all_test_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config)

            # # Extract type of the dataset
            # lines = open("./simulators/house/datapoint_type_house" + str(hid) + ".txt").readlines()
            # datapoint_id_type = {}
            # for line in lines:
            #     words = line.split()
            #     datapoint_id, datapoint_type = words[0], words[1:]
            #     datapoint_id_type[int(datapoint_id)] = datapoint_type  # .strip()
            #
            # # Filter manipulation type
            # all_test_data = list(filter(lambda datapoint: "manipulation" not in datapoint_id_type[datapoint.get_id()],
            #                       all_test_data))

            test_split.append(all_test_data)

        processes = []

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        master_logger.log("Found K Ports")
        for i, port in enumerate(ports):
            test_chunk = test_split[i]
            tmp_config = {k: v for k, v in config.items()}
            tmp_config["port"] = port
            print("Client " + str(i) + " getting a test set of size ", len(test_chunk))
            server = HouseServer(tmp_config, action_space, port)
            master_logger.log("Server Initialized")
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(target=HouseDecoupledPredictorNavigatorAgent.do_test, args=(house_ids[i],
                                                                                       shared_goal_prediction_model,
                                                                                       shared_navigator_model,
                                                                                       shared_action_type_model,
                                                                                       tmp_config, action_space,
                                                                                       meta_data_util, constants,
                                                                                       test_chunk, experiment_name,
                                                                                       i, server,
                                                                                       client_logger, vocab, "inferred"))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #10
0
def main():

    data_filename = "./simulators/house/AssetsHouse"
    experiment_name = "emnlp_camera_ready_test_human_performance"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Number of processes
    house_id = 3

    # Define log settings
    log_path = experiment + '/test_baseline_%d.log' % house_id
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    # TODO: HouseSetupValidator()
    # setup_validator = BlocksSetupValidator()
    # setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    config["use_manipulation"] = True  # debug manipulation
    action_space = ActionSpace(config["action_names"], config["stop_action"],
                               config["use_manipulation"],
                               config["num_manipulation_row"],
                               config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[i] = token
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpHouseIncrementalModelChaplot
        shared_model = model_type(config, constants)
        # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3")

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        test_split = DatasetParser.parse(
            data_filename + "/tokenized_house" + str(house_id) +
            "_discrete_dev.json", config)
        test_split = test_split[2:20]

        # Launch the server
        ports = find_k_ports(1)
        port = ports[0]
        tmp_config = {k: v for k, v in config.items()}
        tmp_config["port"] = port
        print("Client " + str(0) + " getting a validation set of size ",
              len(test_split))
        server = HouseServer(tmp_config, action_space, port)

        launch_k_unity_builds([tmp_config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")

        server.initialize_server()

        # Create a local model for rollouts
        local_model = model_type(tmp_config, constants)
        # local_model.train()

        # Create the Agent
        print("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=None,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=tmp_config,
                                  constants=constants)
        print("Created Agent...")
        tmp_agent.test_human_performance(test_split, vocab, master_logger)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #11
0
def main():

    experiment_name = "test_block_baselines"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config,indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    try:
        # Read the dataset
        if args.split == "train":
            test_data = DatasetParser.parse("trainset.json", config)
        elif args.split == "dev":
            test_data = DatasetParser.parse("devset.json", config)
        elif args.split == "test":
            test_data = DatasetParser.parse("testset.json", config)
        else:
            raise AssertionError("Unhandled dataset split %r. Only support train, dev and test." % args.split)
        master_logger.log("Created test dataset of size %d " % len(test_data))

        # Create server and launch a client
        simulator_file = "./simulators/blocks/retro_linux_build.x86_64"
        config["port"] = find_k_ports(1)[0]
        server = BlocksServer(config, action_space, vocab=vocab)

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Create the agent
        master_logger.log("CREATING AGENT")
        if args.baseline == "stop":
            agent_type = Agent.STOP
        elif args.baseline == "random":
            agent_type = Agent.RANDOM_WALK
        elif args.baseline == "frequent":
            agent_type = Agent.MOST_FREQUENT
            # TODO compute most frequent action from the dataset
        else:
            raise AssertionError("Unhandled agent type %r. Only support stop, random and frequent." % args.baseline)

        agent = Agent(agent_type=agent_type,
                      server=server,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      constants=constants)

        agent.test(test_data)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #12
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--env",
                        default='stochcombolock',
                        help="name of the environment e.g., montezuma")
    parser.add_argument("--model",
                        default='gumbeldouble',
                        help="Model for training the encoding function")
    parser.add_argument("--name",
                        default="debug-encoder",
                        help="Name of the experiment")
    parser.add_argument("--horizon", default=-1, type=int, help="Horizon")
    parser.add_argument("--samples", default=-1, type=int, help="Samples")
    parser.add_argument("--learn_type",
                        default="vanilla",
                        type=str,
                        help="Either vanilla, coordinate, transfer")
    args = parser.parse_args()

    env_name = args.env
    exp_name = args.name

    with open("data/%s/config.json" % env_name) as f:
        config = json.load(f)
        # Add command line arguments. Command line arguments supersede file settings.
        if args.horizon != -1:
            config["horizon"] = args.horizon
        config["encoder_training_type"] = args.learn_type
        GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config)
    with open("data/%s/constants.json" % env_name) as f:
        constants = json.load(f)
        if args.samples != -1:
            constants["encoder_training_num_samples"] = args.samples
        constants["model_type"] = args.model
    print(json.dumps(config, indent=2))

    # Validate the keys
    validate(config, constants)

    # Create file
    experiment_name = "%s-model-%s-horizon-%d-samples-%d-%s" % (
        exp_name, args.model, config["horizon"],
        constants["encoder_training_num_samples"], env_name)
    experiment = "./%s/%s" % (args.save_path, experiment_name)
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_homer.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log("Environment Name %r. Experiment Name %r" %
                      (env_name, exp_name))

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    # performance = []
    num_runs = 100
    for attempt in range(1, num_runs + 1):
        master_logger.log("========= STARTING EXPERIMENT %d ======== " %
                          attempt)

        p = mp.Process(target=DebugTrainEncodingFunction.do_train,
                       args=(config, constants, env_name, experiment_name,
                             master_logger, False, True))
        p.daemon = False
        p.start()
        p.join()

    # for key in performance[0]:  # Assumes the keys are same across all runes
    #     results = [result[key] for result in performance]
    #     master_logger.log("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
    #                       (key, statistics.mean(results), statistics.median(results), statistics.stdev(results),
    #                        num_runs, results))
    #     print("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
    #                       (key, statistics.mean(results), statistics.median(results), statistics.stdev(results),
    #                        num_runs, results))

    # Cleanup
    multiprocess_logging_manager.cleanup()
예제 #13
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma")
    parser.add_argument("--name", default="run-psdp", help="Name of the experiment")
    parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction")
    parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction")
    parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model")
    parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)")
    parser.add_argument("--load", help="Name of the result folder containing homing policies and environment")
    parser.add_argument("--train_eps", type=int, help="Number of training episodes used for learning the policy set")
    parser.add_argument("--noise", default=None, type=str, help="Noise")
    parser.add_argument("--save_trace", default="False", help="Save traces")
    parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces")
    parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results")
    args = parser.parse_args()

    env_name = args.env
    exp_name = args.name
    load_folder = args.load

    experiment_name = "%s-%s-model-%s-noise-%s" % (exp_name, env_name, args.model, args.noise)
    experiment = "./%s/%s" % (args.save_path, experiment_name)
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_homer.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name))

    with open("data/%s/config.json" % env_name) as f:
        config = json.load(f)
        # Add command line arguments. Command line arguments supersede file settings.
        if args.noise is not None:
            config["noise"] = args.noise

        config["save_trace"] = args.save_trace == "True"
        config["trace_sample_rate"] = args.trace_sample_rate
        config["save_path"] = args.save_path
        config["exp_name"] = experiment_name
        config["policy_type"] = args.policy_type

        GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config)
    with open("data/%s/constants.json" % env_name) as f:
        constants = json.load(f)
        constants["model_type"] = args.model
    print(json.dumps(config, indent=2))

    # Validate the keys
    validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    performance = []
    num_runs = 5
    for trial in range(1, num_runs + 1):

        master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial)

        # Create a new environment
        print("Created Environment...")
        env = GenerateEnvironmentWrapper(env_name, config)
        master_logger.log("Environment Created")

        # Load the environment
        env_folder = load_folder + "/trial_%d_env" % trial
        env_folders = [join(env_folder, f) for f in listdir(env_folder) if isdir(join(env_folder, f))]
        assert len(env_folders) == 1, "Found more than environment. Specify the folder manually %r" % env_folders
        env.load_environment_from_folder(env_folders[0])
        master_logger.log("Loaded Environment from %r" % env_folders[0])

        # Fix config to match the env.
        # TODO implement the next block of code in a scalable manner
        config["horizon"] = env.env.horizon
        config["obs_dim"] = -1
        GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config)
        master_logger.log("Environment horizon %r, Observation dimension %r" % (config["horizon"], config["obs_dim"]))

        learning_alg = DistributedHomerAlgorithm(config, constants)

        policy_result = learning_alg.train_from_learned_homing_policies(env=env,
                                                                        load_folder=load_folder,
                                                                        train_episodes=args.train_eps,
                                                                        experiment_name=experiment_name,
                                                                        logger=master_logger,
                                                                        use_pushover=False,
                                                                        trial=trial)

        performance.append(policy_result)

    for key in performance[0]:  # Assumes the keys are same across all runes
        results = [result[key] for result in performance]
        master_logger.log("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
                          (key, statistics.mean(results), statistics.median(results), statistics.stdev(results),
                           num_runs, results))
        print("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
                          (key, statistics.mean(results), statistics.median(results), statistics.stdev(results),
                           num_runs, results))

    # Cleanup
    multiprocess_logging_manager.cleanup()
예제 #14
0
def main():

    # Learning algorithm
    parser = argparse.ArgumentParser(description='Parser Values')
    parser.add_argument('--name', type=str, help='name of the experiment')
    parser.add_argument('--num_processes',
                        type=int,
                        default=6,
                        help='num of process')
    parser.add_argument('--split',
                        type=str,
                        help='learning alg ("train", "dev", "test")')
    parser.add_argument('--model',
                        type=str,
                        help='model ("chaplot", "concat")')
    parser.add_argument('--learning_alg',
                        type=str,
                        help='learning alg ("cb", "sup", "mix"')
    args = parser.parse_args()

    experiment_name = args.name
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline_%s.log' % args.split
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    num_processes = args.num_processes
    model_name = args.model
    data_split = args.split
    learning_alg = args.learning_alg

    # Number of processes
    master_logger.log("Num processes %r, Model %r, Alg %r, Split %r " %
                      (num_processes, model_name, learning_alg, data_split))

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        if model_name == "concat":
            model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork
        elif model_name == "chaplot":
            model_type = IncrementalModelChaplot
        else:
            raise AssertionError("Model name not known. %r " % model_name)

        shared_model = model_type(config, constants)
        shared_model.init_weights()

        if model_name == "concat":
            if learning_alg == "sup":
                shared_model.load_saved_model(
                    "./results/train-streetview-deepmind-supervised-learning/supervised_learning0_epoch_13"
                )
            elif learning_alg == "cb":
                shared_model.load_saved_model(
                    "./results/train-streetview-deepmind-cb/contextual_bandit_0_epoch_38"
                )
            elif learning_alg == "mix":
                shared_model.load_saved_model(
                    "./results/train-streetview-deepmind-mixture-algorithm/supervised_learning5_epoch_54"
                )
            else:
                raise AssertionError("Unregistered learning algorithm %r " %
                                     learning_alg)
        elif model_name == "chaplot":
            if learning_alg == "sup":
                shared_model.load_saved_model(
                    "./results/train-streetview-chaplot-supervised-learning/supervised_learning0_epoch_36"
                )
            elif learning_alg == "cb":
                shared_model.load_saved_model(
                    "./results/train-streetview-chaplot-cb/contextual_bandit_0_epoch_66"
                )
            elif learning_alg == "mix":
                shared_model.load_saved_model(
                    "./results/train-streetview-chaplot-mixture-repeat2/contextual_bandit_0_epoch_34"
                )
            else:
                raise AssertionError("Unregistered learning algorithm %r " %
                                     learning_alg)
        else:
            raise AssertionError("Unregistered model %r " % model_name)

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        test_split = DatasetParser.parse(
            "data/streetview/navigation_%s.json" % data_split, config)

        master_logger.log("Created tuning dataset of size %d " %
                          len(test_split))

        processes = []

        # Split the train data between processes
        test_split_process_chunks = []
        tune_chunk_size = int(len(test_split) / num_processes)
        tune_pad = 0
        for i in range(0, num_processes):
            if i < num_processes - 1:
                test_split_process_chunks.append(test_split[tune_pad:tune_pad +
                                                            tune_chunk_size])
            else:
                test_split_process_chunks.append(test_split[tune_pad:])
            tune_pad += tune_chunk_size

        assert sum([
            len(chunk) for chunk in test_split_process_chunks
        ]) == len(test_split), "Test dataset not properly partitioned."

        # Start the training thread(s)
        for i in range(0, num_processes):
            test_chunk = test_split_process_chunks[i]
            print("Client " + str(i) + " getting a test set of size ",
                  len(test_chunk))
            server = StreetViewServer(config,
                                      action_space,
                                      forward_setting_strict=False)
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(
                target=TmpStreetViewAsynchronousContextualBandit.do_test,
                args=(shared_model, config, action_space, meta_data_util,
                      constants, test_chunk, experiment_name, i, server,
                      client_logger, model_type))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #15
0
def main():

    data_filename = "./simulators/house/AssetsHouse"
    experiment_name = "tmp_house_1_debug_manual_control"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    # TODO: HouseSetupValidator()
    # setup_validator = BlocksSetupValidator()
    # setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    config["use_manipulation"] = True  # debug manipulation
    action_space = ActionSpace(config["action_names"], config["stop_action"],
                               config["use_manipulation"],
                               config["num_manipulation_row"],
                               config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        # vocab[token] = i
        vocab[i] = token
    # vocab["$UNK$"] = len(vocab_list)
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    house_ids = [1]  # [1,2,3]
    num_processes = len(house_ids)

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpHouseIncrementalModelChaplot
        shared_model = model_type(config, constants)
        # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3")

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        tune_split, train_split = [], []
        for hid in house_ids:
            all_train_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) +
                "_discrete.json", config)
            all_dev_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) +
                "_discrete_dev.json", config)
            # num_tune = int(len(all_train_data) * 0.1)
            # train_split.append(list(all_train_data[num_tune:]))
            # tune_split.append(list(all_train_data[:num_tune]))

            # Extract type of the dataset
            # lines = open("./simulators/house/datapoint_type_house" + str(hid) + "_v5_110.txt").readlines()
            # datapoint_id_type = {}
            # for line in lines:
            #     datapoint_id, datapoint_type = line.split()
            #     datapoint_id_type[int(datapoint_id)] = datapoint_type.strip()
            #
            # # Filter manipulation type
            # all_train_data = list(
            #     filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_train_data))

            all_train_data = all_train_data[0:50]
            train_split.append(all_train_data)
            tune_split.append(all_train_data)
            # train_split.append(all_train_data)
            # tune_split.append(all_dev_data)

        # Launch the server
        ports = find_k_ports(1)
        port = ports[0]
        tmp_config = {k: v for k, v in config.items()}
        tmp_config["port"] = port
        tmp_tune_split = tune_split[0]
        print("Client " + str(0) + " getting a validation set of size ",
              len(tmp_tune_split))
        server = HouseServer(tmp_config, action_space, port)

        house_id = house_ids[0]
        launch_k_unity_builds([tmp_config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")

        server.initialize_server()

        # Create a local model for rollouts
        local_model = model_type(tmp_config, constants)
        # local_model.train()

        # Create the Agent
        print("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=None,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=tmp_config,
                                  constants=constants)
        print("Created Agent...")
        index = 0
        while True:
            print("Giving another data %r ", len(train_split[0]))
            # index = random.randint(0, len(train_split[0]) - 1)
            index = (index + 1) % len(train_split[0])
            print("Dataset id is " + str(train_split[0][index].get_id()))
            tmp_agent.debug_manual_control(train_split[0][index], vocab)
            # tmp_agent.debug_tracking(train_split[0][index], vocab)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #16
0
def main():

    data_filename = "./simulators/house/AssetsHouse"
    experiment_name = "house_test_most_frequent_test_dataset"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    setup_validator = HouseSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"],
                               config["num_manipulation_row"], config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        # vocab[token] = i
        vocab[i] = token
    # vocab["$UNK$"] = len(vocab_list)
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    house_ids = [1, 2, 3, 4, 5]
    num_processes = len(house_ids)

    try:
        # Read the dataset
        test_data = []
        for hid in house_ids:
            all_dev_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) + "_discrete_test.json", config)

            # Extract type of the dataset
            # lines = open("./datapoint_type_house" + str(hid) + ".txt").readlines()
            # datapoint_id_type = {}
            # for line in lines:
            #     datapoint_id, datapoint_type = line.split()
            #     datapoint_id_type[int(datapoint_id)] = datapoint_type.strip()

            # Filter manipulation type
            # all_dev_data = filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_dev_data)
            test_data.append(list(all_dev_data))

        processes = []

        # Start the testing thread(s)
        ports = find_k_ports(num_processes)
        master_logger.log("Found K Ports")
        for i, port in enumerate(ports):
            test_chunk = test_data[i]  # Simulator i runs house i and uses the dataset for house i
            tmp_config = {k: v for k, v in config.items()}
            tmp_config["port"] = port
            print("Client " + str(i) + " getting test set of size ", len(test_chunk))
            client_logger = multiprocess_logging_manager.get_logger(i)
            p = mp.Process(target=Agent.test_multiprocess, args=(house_ids[i], test_chunk, tmp_config, action_space, port,
                                                                 Agent.MOST_FREQUENT, meta_data_util, constants, vocab,
                                                                 client_logger, None))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #17
0
def main():

    experiment_name = "train-streetview-deepmind-mixture-learning-repeat"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/streetview/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = StreetViewSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config["action_names"], config["stop_action"])
    meta_data_util = MetaDataUtil()

    # Learning algorithm
    parser = argparse.ArgumentParser(description='Parser Values')
    parser.add_argument('--num_processes',
                        type=int,
                        default=6,
                        help='num of process')
    parser.add_argument('--learning_alg',
                        type=str,
                        default="cb",
                        help='learning alg ("cb", "sup", "mix"')
    args = parser.parse_args()

    num_processes = args.num_processes
    learning_alg = args.learning_alg
    master_logger.log("Num processes %r, Learning Algorithm %r " %
                      (num_processes, learning_alg))

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpStreetviewIncrementalModelDeepMindPolicyNetwork
        # model_type = TmpStreetviewIncrementalModelConcatRecurrentPolicyNetwork
        # model_type = IncrementalModelChaplot
        shared_model = model_type(config, constants)
        shared_model.init_weights()

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        train_split = DatasetParser.parse(
            "data/streetview/navigation_train.json", config)
        tune_split = DatasetParser.parse("data/streetview/navigation_dev.json",
                                         config)
        master_logger.log("Created train dataset of size %d " %
                          len(train_split))
        master_logger.log("Created tuning dataset of size %d " %
                          len(tune_split))

        processes = []

        # Split the train data between processes
        train_split_process_chunks = []
        tune_split_process_chunks = []
        train_chunk_size = int(len(train_split) / num_processes)
        tune_chunk_size = int(len(tune_split) / num_processes)
        train_pad = 0
        tune_pad = 0
        for i in range(0, num_processes):
            train_split_process_chunks.append(train_split[train_pad:train_pad +
                                                          train_chunk_size])
            tune_split_process_chunks.append(tune_split[tune_pad:tune_pad +
                                                        tune_chunk_size])
            train_pad += train_chunk_size
            tune_pad += tune_chunk_size

        # Start the training thread(s)
        for i in range(0, num_processes):
            train_chunk = train_split_process_chunks[i]
            if i == num_processes - 1:
                # Don't want each client to do testing.
                tmp_tune_split = tune_split_process_chunks[i]
            else:
                tmp_tune_split = tune_split_process_chunks[i]
            print("Client " + str(i) + " getting a validation set of size ",
                  len(tmp_tune_split))
            server = StreetViewServer(config,
                                      action_space,
                                      forward_setting_strict=False)
            client_logger = multiprocess_logging_manager.get_logger(i)

            if learning_alg == "cb" or (learning_alg == "mix"
                                        and i < num_processes - 2):
                p = mp.Process(
                    target=TmpStreetViewAsynchronousContextualBandit.do_train,
                    args=(shared_model, config, action_space, meta_data_util,
                          constants, train_chunk, tmp_tune_split, experiment,
                          experiment_name, i, server, client_logger,
                          model_type))
            elif learning_alg == "sup" or (learning_alg == "mix"
                                           and i >= num_processes - 2):
                p = mp.Process(
                    target=TmpStreetViewAsynchronousSupervisedLearning.
                    do_train,
                    args=(shared_model, config, action_space, meta_data_util,
                          constants, train_chunk, tmp_tune_split, experiment,
                          experiment_name, i, server, client_logger,
                          model_type))
            else:
                raise NotImplementedError()
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #18
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--env",
                        default='diabcombolock',
                        help="name of the environment e.g., montezuma")
    parser.add_argument(
        "--num_processes",
        default=6,
        type=int,
        help=
        "number of policy search (PS) processes to be launched at a given time"
    )
    parser.add_argument("--forwardmodel",
                        default='forwardmodel',
                        help="Model for training the forwad abstraction")
    parser.add_argument("--backwardmodel",
                        default='backwardmodel',
                        help="Model for learning the backward abstraction")
    parser.add_argument("--discretization",
                        default="True",
                        help="Train with discretized/undiscretized model")
    parser.add_argument("--policy_type",
                        default="linear",
                        type=str,
                        help="Type of policy (linear, non-linear)")
    parser.add_argument("--name",
                        default="run-homer",
                        help="Name of the experiment")
    parser.add_argument("--horizon", default=-1, type=int, help="Horizon")
    parser.add_argument("--samples", default=-1, type=int, help="Samples")
    parser.add_argument("--env_seed",
                        default=None,
                        type=int,
                        help="Environment Seed")
    parser.add_argument("--noise", default=None, type=str, help="Noise")
    parser.add_argument("--save_trace", default="False", help="Save traces")
    parser.add_argument("--trace_sample_rate",
                        default=500,
                        type=int,
                        help="How often to save traces")
    parser.add_argument("--save_path",
                        default="./results/",
                        type=str,
                        help="Folder where to save results")
    parser.add_argument("--debug", default="False", help="Debug the run")
    parser.add_argument("--pushover",
                        default="False",
                        help="Use pushover to send results on phone")
    args = parser.parse_args()

    env_name = args.env
    num_processes = args.num_processes
    exp_name = args.name

    experiment_name = "%s-%s-model-%s-horizon-%d-samples-%d-noise-%s" % \
                      (exp_name, env_name, args.backwardmodel, args.horizon, args.samples, args.noise)
    experiment = "%s/%s" % (args.save_path, experiment_name)
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_homer.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log("Environment Name %r. Experiment Name %r" %
                      (env_name, exp_name))

    # Read configuration and constant files. Configuration contain environment information and
    # constant file contains hyperparameters for the model and learning algorithm.
    with open("data/%s/config.json" % env_name) as f:
        config = json.load(f)
        # Add command line arguments. Command line arguments supersede file settings.
        if args.horizon != -1:
            config["horizon"] = args.horizon
        if args.noise is not None:
            config["noise"] = args.noise

        config["save_trace"] = args.save_trace == "True"
        config["trace_sample_rate"] = args.trace_sample_rate
        config["save_path"] = experiment
        config["exp_name"] = experiment_name
        config["env_seed"] = args.env_seed
        config["policy_type"] = args.policy_type

        GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config)

    with open("data/%s/constants.json" % env_name) as f:
        constants = json.load(f)
        if args.samples != -1:
            constants["encoder_training_num_samples"] = args.samples
        constants["forward_model_type"] = args.forwardmodel
        constants["backward_model_type"] = args.backwardmodel
        constants["discretization"] = args.discretization == "True"
    print(json.dumps(config, indent=2))

    # Validate the keys
    validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    performance = []
    num_runs = 1
    for trial in range(1, num_runs + 1):

        master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial)

        # Create a new environment
        env = GenerateEnvironmentWrapper(env_name, config)
        master_logger.log("Environment Created")
        print("Created Environment...")

        # Save the environment for reproducibility
        env.save_environment(experiment, trial_name=trial)
        print("Saving Environment...")

        homing_policy_validation_fn = env.generate_homing_policy_validation_fn(
        )

        learning_alg = DistributedHomerAlgorithm(config, constants)

        policy_result = learning_alg.train(
            experiment=experiment,
            env=env,
            env_name=env_name,
            num_processes=num_processes,
            experiment_name=experiment_name,
            logger=master_logger,
            use_pushover=args.pushover == "True",
            debug=args.debug == "True",
            homing_policy_validation_fn=homing_policy_validation_fn,
            trial=trial,
            do_reward_sensitive_learning=True)

        performance.append(policy_result)

    for key in performance[0]:  # Assumes the keys are same across all runes
        results = [result[key] for result in performance]

        if len(results) <= 1:
            stdev = 0.0
        else:
            stdev = statistics.stdev(results)
        master_logger.log(
            "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
            (key, statistics.mean(results), statistics.median(results), stdev,
             num_runs, results))
        print(
            "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
            (key, statistics.mean(results), statistics.median(results), stdev,
             num_runs, results))

    # Cleanup
    multiprocess_logging_manager.cleanup()
예제 #19
0
def main():

    experiment_name = "blocks_experiments"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    num_processes = 6

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelEmnlp
        shared_model = model_type(config, constants)

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        all_train_data = DatasetParser.parse("trainset.json", config)
        num_train = int(0.8 * len(all_train_data))
        train_split = all_train_data[:num_train]
        tune_split = list(all_train_data[num_train:])
        shuffle(train_split)  # shuffle the split to break ties

        master_logger.log("Created train dataset of size %d " %
                          len(train_split))
        master_logger.log("Created tuning/validation dataset of size %d " %
                          len(tune_split))

        processes = []

        # Split the train data between processes
        train_split_process_chunks = []
        chunk_size = int(len(train_split) / num_processes)
        pad = 0
        for i in range(0, num_processes):
            chunk = train_split[pad:pad + chunk_size]
            pad += chunk_size
            train_split_process_chunks.append(chunk)

        simulator_file = "./simulators/blocks/retro_linux_build.x86_64"

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        for i, port in enumerate(ports):

            train_chunk = train_split_process_chunks[i]
            tmp_config = {k: v for k, v in config.items()}
            tmp_config["port"] = port
            if i == num_processes - 1:
                # Master client which does testing. Don't want each client to do testing.
                tmp_tune_split = tune_split
            else:
                tmp_tune_split = []
            print("Client " + str(i) + " getting a validation set of size ",
                  len(tmp_tune_split))
            server = BlocksServer(tmp_config, action_space, vocab=vocab)
            client_logger = multiprocess_logging_manager.get_logger(i)

            p = mp.Process(target=AsynchronousContextualBandit.do_train,
                           args=(simulator_file, shared_model, tmp_config,
                                 action_space, meta_data_util, constants,
                                 train_chunk, tmp_tune_split, experiment,
                                 experiment_name, i, server, client_logger,
                                 model_type))
            p.daemon = False
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #20
0
def main():

    parser = argparse.ArgumentParser(description='du_baselines Experiments')

    parser.add_argument("--env",
                        default='diabcombolock',
                        help="name of the environment e.g., montezuma")
    parser.add_argument("--name",
                        default="run-du-baselines",
                        help="Name of the experiment")
    parser.add_argument("--horizon", default=-1, type=int, help="Horizon")
    parser.add_argument("--noise", default=None, type=str, help="Noise")
    parser.add_argument("--save_trace", default="False", help="Save traces")
    parser.add_argument("--trace_sample_rate",
                        default=500,
                        type=int,
                        help="How often to save traces")
    parser.add_argument("--save_path",
                        default="./results/",
                        type=str,
                        help="Folder where to save results")
    parser.add_argument("--debug", default="False", help="Debug the run")
    parser.add_argument("--pushover",
                        default="False",
                        help="Use pushover to send results on phone")

    # Options for Du Baselines
    parser.add_argument('--seed',
                        type=int,
                        default=367,
                        metavar='N',
                        help='random seed (default: 367)')
    parser.add_argument('--episodes',
                        type=int,
                        default=10000000,
                        help='Training Episodes')
    parser.add_argument('--alg',
                        type=str,
                        default='decoding',
                        help='Learning Algorithm',
                        choices=["oracleq", "decoding", "qlearning"])
    parser.add_argument('--model_type',
                        type=str,
                        default='linear',
                        help='What model class for function approximation',
                        choices=['linear', 'nn'])
    parser.add_argument('--lr',
                        type=float,
                        help='Learning Rate for optimization-based algorithms',
                        default=3e-2)
    parser.add_argument('--epsfrac',
                        type=float,
                        help='Exploration fraction for Baseline DQN.',
                        default=0.1)
    parser.add_argument('--conf',
                        type=float,
                        help='Exploration Bonus Parameter for Oracle Q.',
                        default=3e-2)
    parser.add_argument(
        '--n',
        type=int,
        default=200,
        help="Data collection parameter for decoding algoithm.")
    parser.add_argument(
        '--num_cluster',
        type=int,
        default=3,
        help="Num of hidden state parameter for decoding algoithm.")

    args = parser.parse_args()

    env_name = args.env
    exp_name = args.name

    experiment_name = "%s-%s-model-%s-horizon-%d-samples-%d-noise-%s" % \
                      (exp_name, env_name, args.model_type, args.horizon, args.episodes, args.noise)
    experiment = "./%s/%s" % (args.save_path, experiment_name)
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_homer.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log("Environment Name %r. Experiment Name %r" %
                      (env_name, exp_name))

    # Read configuration and constant files. Configuration contain environment information and
    # constant file contains hyperparameters for the model and learning algorithm.
    with open("data/%s/config.json" % env_name) as f:
        config = json.load(f)
        # Add command line arguments. Command line arguments supersede file settings.
        if args.horizon != -1:
            config["horizon"] = args.horizon
        if args.noise is not None:
            config["noise"] = args.noise

        config["save_trace"] = args.save_trace == "True"
        config["trace_sample_rate"] = args.trace_sample_rate
        config["save_path"] = args.save_path
        config["exp_name"] = experiment_name

        GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config)
    with open("data/%s/constants.json" % env_name) as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))

    # Validate the keys
    validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    performance = []
    num_runs = 5
    for trial in range(1, num_runs + 1):

        master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial)

        random.seed(args.seed + trial * 29)
        np.random.seed(args.seed + trial * 29)
        torch.manual_seed(args.seed + trial * 37)

        # Create a new environment
        env = GenerateEnvironmentWrapper(env_name, config)
        master_logger.log("Environment Created")
        print("Created Environment...")

        # Save the environment for reproducibility
        env.save_environment(experiment, trial_name=trial)
        print("Saving Environment...")

        learning_alg = du_baseline.get_alg(args, config)
        policy_result = du_baseline.train(env, learning_alg, args,
                                          master_logger)

        performance.append(policy_result)

    for key in performance[0]:  # Assumes the keys are same across all runes
        results = [result[key] for result in performance]
        master_logger.log(
            "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
            (key, statistics.mean(results), statistics.median(results),
             statistics.stdev(results), num_runs, results))
        print(
            "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" %
            (key, statistics.mean(results), statistics.median(results),
             statistics.stdev(results), num_runs, results))

    # Cleanup
    multiprocess_logging_manager.cleanup()