Пример #1
0
 def test_save_and_load(self):
     self.dataset.save_dataset('tmp.pickle')
     new_dataset = Dataset_Counts.load_dataset('tmp.pickle')
     self.assertSequenceAlmostEqual(self.dataset.a[0:self.dataset.size],
                                    new_dataset.a[0:self.dataset.size])
     self.assertSequenceAlmostEqual(self.dataset.t[0:self.dataset.size],
                                    new_dataset.t[0:self.dataset.size])
     self.assertSequenceAlmostEqual(self.dataset.r[0:self.dataset.size],
                                    new_dataset.r[0:self.dataset.size])
     for i in range(self.dataset.size):
         self.assertSequenceAlmostEqual(self.dataset.s[i], new_dataset.s[i])
         self.assertSequenceAlmostEqual(self.dataset.c[i], new_dataset.c[i])
     new_dataset.add(*transitions[0])
     import os
     os.remove('tmp.pickle')
Пример #2
0
    def __init__(self, training_steps, validation_steps, validation_size,
                 mini_batch_size, learning_rate, number_of_epochs,
                 network_size, folder_location, dataset_file,
                 cloned_network_path, sample_from_env, entropy_coefficient,
                 device, seed, experiment_name, config_file,
                 update_learning_rate):

        self.sample_from_env = sample_from_env
        self.smaller_validation_loss = None
        self.seed = seed
        try:
            self.params = yaml.safe_load(open(config_file, 'r'))
        except FileNotFoundError as e:
            print(
                "Configuration file not found; Define a config_file to be able to sample from environment"
            )
            raise e

        # initialize seeds for reproducibility
        np.random.seed(seed)
        torch.manual_seed(seed)

        # set paths for data and output path
        log_path = os.path.join('./logs/' + experiment_name)
        data_dir = folder_location
        dataset_path = dataset_file
        self.output_folder = os.path.dirname(dataset_path)
        self.cloned_network_path = os.path.join(os.path.dirname(dataset_path),
                                                cloned_network_path)

        # start
        self.logger = SummaryWriter(log_path)

        # import data
        full_dataset = Dataset_Counts.load_dataset(dataset_path)
        self.dataset_train, self.dataset_validation = full_dataset.train_validation_split(
            test_size=validation_size)

        # set training parameters
        self.mini_batch_size = mini_batch_size
        self.number_of_epochs = number_of_epochs
        self.network_size = network_size
        self.entropy_coefficient = entropy_coefficient
        self.device = device
        self.learning_rate = learning_rate
        self.update_learning_rate = update_learning_rate

        if training_steps != 0:
            self.training_steps = training_steps
        else:
            self.training_steps = int(self.dataset_train.size /
                                      self.mini_batch_size)
        if validation_steps != 0:
            self.validation_steps = validation_steps
        else:
            self.validation_steps = int(self.dataset_validation.size /
                                        self.mini_batch_size)
        self.log_frequency = int(self.training_steps / 10)
        print(
            "Training with {} training steps and {} validation steps ".format(
                self.training_steps, self.validation_steps))

        # create model
        self.cloned_baseline_policy = ClonedBaseline(
            network_size=network_size,
            network_path=None,
            state_shape=self.params['state_shape'],
            nb_actions=self.params['nb_actions'],
            device=device,
            seed=seed,
            temperature=0)
        self.best_policy = ClonedBaseline(
            network_size=network_size,
            network_path=None,
            state_shape=self.params['state_shape'],
            nb_actions=self.params['nb_actions'],
            device=device,
            seed=seed,
            temperature=0,
            results_folder=self.output_folder)
        self.best_policy._copy_weight_from(
            self.best_policy.network.state_dict())

        # define loss and optimizer
        self.nll_loss_function = nn.NLLLoss()
        self.optimizer = torch.optim.SGD(
            self.cloned_baseline_policy.network.parameters(), lr=learning_rate)
        # optimizer = torch.optim.RMSprop(network.parameters(), lr=learning_rate, alpha=0.95, eps=1e-07)

        # instantiate environment for policy evaluation
        self.env = environment.Environment(self.params['domain'], self.params)

        if sample_from_env:
            print("sampling from environment")
            baseline_network_path = os.path.join(data_dir,
                                                 self.params["network_path"])
            self.baseline = Baseline(self.params['network_size'],
                                     network_path=baseline_network_path,
                                     state_shape=self.params['state_shape'],
                                     nb_actions=self.params['nb_actions'],
                                     device=device,
                                     seed=seed,
                                     temperature=self.params.get(
                                         "baseline_temp", 0.1),
                                     normalize=self.params['normalize'])
        else:
            self.baseline = None
Пример #3
0
def run(config_file, options):
    try:
        params = yaml.safe_load(open(config_file, 'r'))
    except FileNotFoundError as e:
        print("Configuration file not found")
        raise e


    # replacing params with command line options
    for opt in options:
        assert opt[0] in params
        dtype = type(params[opt[0]])
        if dtype == bool:
            new_opt = False if opt[1] != 'True' else True
        else:
            new_opt = dtype(opt[1])
        params[opt[0]] = new_opt

    print('\n')
    print('Parameters ')
    for key in params:
        print(key, params[key])
    print('\n')

    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    random_state = np.random.RandomState(params['seed'])
    device = torch.device(params["device"])

    DATA_DIR = os.path.join(params['folder_location'], params['folder_name'])

    env = environment.Environment(params["domain"], params, random_state)

    if params['batch']:
        dataset_path = params['dataset_path']
        print("\nLoading dataset from file {}".format(dataset_path), flush=True)
        if not os.path.exists(dataset_path):
            raise ValueError("The dataset file does not exist")
        dataset = Dataset_Counts.load_dataset(dataset_path)

        baseline_path = os.path.join(DATA_DIR, params['baseline_path'])
        if 'behavior_cloning' in params['learning_type']:
            baseline_path = os.path.join(os.path.dirname(dataset_path), 'cloned_network_weights.pt')
            baseline = ClonedBaseline(
                params['network_size'], network_path=baseline_path, state_shape=params['state_shape'],
                nb_actions=params['nb_actions'], device=device, seed=params['seed'],
                temperature=params['baseline_temp'], normalize=params['normalize'])
        elif params['learning_type'] in ['pi_b', 'soft_sort']:
            baseline = Baseline(params['network_size'], network_path=baseline_path, state_shape=params['state_shape'],
                                nb_actions=params['nb_actions'], device=device, seed=params['seed'],
                                temperature=params['baseline_temp'], normalize=params['normalize'])
        elif 'count_based' in params['learning_type']:
            baseline = SimilarityBaseline(dataset=dataset, seed=params['seed'], nb_actions=params['nb_actions'],
                                          results_folder=os.path.dirname(dataset_path))
            baseline.evaluate_baseline(env, number_of_steps=100000, number_of_epochs=1,
                                       verbose=True, save_results=True)
        else:
            # no baseline, should use counters to estimate policy
            baseline = None

        folder_name = os.path.dirname(dataset_path)
        print("Data with counts loaded: {} samples".format(dataset.size), flush=True)
        expt = BatchExperiment(dataset=dataset, env=env, folder_name=folder_name, episode_max_len=params['episode_max_len'],
                               minimum_count=params['minimum_count'], extra_stochasticity=params['extra_stochasticity'],
                               history_len=params['history_len'], max_start_nullops=params['max_start_nullops'],
                               keep_all_logs=False)
    else:
        # Create experiment folder
        if not os.path.exists(DATA_DIR):
            os.makedirs(DATA_DIR)

        folder_name = DATA_DIR
        baseline = None
        expt = DQNExperiment(env=env, ai=None, episode_max_len=params['episode_max_len'], annealing=params['annealing'],
                             history_len=params['history_len'], max_start_nullops=params['max_start_nullops'],
                             replay_min_size=params['replay_min_size'], test_epsilon=params['test_epsilon'],
                             folder_name=folder_name, network_path=params['network_path'],
                             extra_stochasticity=params['extra_stochasticity'], score_window_size=100,
                             keep_all_logs=False)

    for ex in range(params['num_experiments']):
        print('\n')
        print('>>>>> Experiment ', ex, ' >>>>> ',
              params['learning_type'], ' >>>>> Epsilon >>>>> ',
              params['epsilon_soft'], ' >>>>> Minimum Count >>>>> ',
              params['minimum_count'], ' >>>>> Kappa >>>>> ',
              params['kappa'], ' >>>>> ', flush=True)
        print('\n')
        print("\nPROGRESS: {0:02.2f}%\n".format(ex / params['num_experiments'] * 100), flush=True)
        ai = AI(baseline, state_shape=env.state_shape, nb_actions=env.nb_actions, action_dim=params['action_dim'],
                reward_dim=params['reward_dim'], history_len=params['history_len'], gamma=params['gamma'],
                learning_rate=params['learning_rate'], epsilon=params['epsilon'], final_epsilon=params['final_epsilon'],
                test_epsilon=params['test_epsilon'], annealing_steps=params['annealing_steps'], minibatch_size=params['minibatch_size'],
                replay_max_size=params['replay_max_size'], update_freq=params['update_freq'],
                learning_frequency=params['learning_frequency'], ddqn=params['ddqn'], learning_type=params['learning_type'],
                network_size=params['network_size'], normalize=params['normalize'], device=device,
                kappa=params['kappa'], minimum_count=params['minimum_count'], epsilon_soft=params['epsilon_soft'])
        expt.ai = ai
        if not params['batch']:
            # resets dataset for online experiment
            expt.dataset_counter = Dataset_Counts(count_param=params['count_param'],
                                                  state_shape=env.state_shape,
                                                  nb_actions=env.nb_actions,
                                                  replay_max_size=params['replay_max_size'],
                                                  is_counting=ai.needs_state_action_counter())

        env.reset()
        with open(expt.folder_name + '/config.yaml', 'w') as y:
            yaml.safe_dump(params, y)  # saving params for reference
        expt.do_epochs(number_of_epochs=params['num_epochs'], is_learning=params['is_learning'],
                       steps_per_epoch=params['steps_per_epoch'], is_testing=params['is_testing'],
                       steps_per_test=params['steps_per_test'],
                       passes_on_dataset=params['passes_on_dataset'], exp_id=ex)