def main(): """ prototype on thought process """ world = mod_env.Environment('Earth') # may implement later, but not needed now goals = [ mod_goal.Goal('Be Happy'), mod_goal.Goal('Get Rich'), mod_goal.Goal('Learn to Juggle') ] jenny = ThinkingAgent('Jenny', goals) print(jenny) # define the thoughts and actions that can occur t = Thoughts(jenny) t.add(Thought('Hang out with Friends', result_success = actions.fun, result_fail = [], time_cost=1, cash_cost=0.0)) t.add(Thought('Find a Job', result_success = actions.job, result_fail = emotions.sadness, time_cost=3, cash_cost=0.2)) t.add(Thought('Go to Work', result_success = objects.cash, result_fail = emotions.sadness, time_cost=5, cash_cost=-10)) t.add(Thought('Read a book', result_success = [actions.fun, actions.training], result_fail = [], time_cost=3, cash_cost=0)) t.add(Thought('Rob a bank', result_success = [objects.cash, actions.prison], result_fail = [emotions.sadness, actions.prison], time_cost=1, cash_cost=0.2)) all_links = Links(jenny) all_links.add_link('goal', goals[0], 'thought', t.thoughts[0]) all_links.add_link('goal', goals[1], 'thought', t.thoughts[1]) all_links.add_link('goal', goals[1], 'thought', t.thoughts[2]) all_links.add_link('goal', goals[0], 'thought', t.thoughts[3]) print(all_links)
def run(args): """ Either generates a dataset from a baseline and computes its associated counts, or evaluates a baseline. """ # fix random seed for reproducibility np.random.seed(args.seed) for fff in os.listdir(args.baseline_dir): if fff.endswith(".yaml"): yaml_file = os.path.join(args.baseline_dir, fff) params = yaml.safe_load(open(yaml_file, 'r')) print('Loading config from {}'.format(yaml_file)) break if not params: raise ValueError('We could not find the configuration file for the baseline, it should be a yaml file.') if args.extra_stochasticity > 0.0: params['extra_stochasticity'] = args.extra_stochasticity env = environment.Environment(params['domain'], params) baseline = Baseline(os.path.join(args.baseline_dir, args.baseline_name), params['network_size'], state_shape=params['state_shape'], nb_actions=params['nb_actions'], seed=args.seed, temperature=args.temperature, device=args.device, normalize=params['normalize']) if args.evaluate_baseline: baseline.evaluate_baseline(env, params, args.eval_steps, args.eval_epochs, args.noise_factor) return print("Generating dataset with actual size {}...".format(args.dataset_size), flush=True) dataset = baseline.generate_dataset( env, os.path.join(args.baseline_dir, args.dataset_dir), params, dataset_size=args.dataset_size, overwrite=args.overwrite, noise_factor=args.noise_factor) compute_counts(dataset, overwrite=args.overwrite, param=args.param)
def run(args): """ Either generates a dataset from a baseline and computes its associated counts, or evaluates a baseline. """ for fff in os.listdir(args.baseline_dir): if fff.endswith(".yaml"): yaml_file = os.path.join(args.baseline_dir, fff) params = yaml.safe_load(open(yaml_file, 'r')) print('Loading config from {}'.format(yaml_file)) break else: # no yaml file found raise ValueError('We could not find the configuration file for the baseline, it should be a yaml file.') if args.seed is not None: # if seed is given in the command line ignores seed from yaml file np.random.seed(args.seed) params['seed'] = args.seed else: if 'seed' in params: args.seed = params['seed'] else: print("no seed found, using 123") args.seed = 123 params['seed'] = 123 if args.extra_stochasticity > 0.0: params['extra_stochasticity'] = args.extra_stochasticity env = environment.Environment(params['domain'], params) baseline = Baseline(network_size=params['network_size'], network_path=os.path.join(args.baseline_dir, args.baseline_name), state_shape=params['state_shape'], nb_actions=params['nb_actions'], device=args.device, seed=args.seed, temperature=args.temperature, normalize=params['normalize'], results_folder=args.baseline_dir) if args.evaluate_baseline: baseline.evaluate_baseline(env, args.eval_steps, args.eval_epochs, args.noise_factor, save_results=True) if args.generate_dataset: print("Generating dataset with actual size {}...".format(args.dataset_size), flush=True) dataset = baseline.generate_dataset( env, args.dataset_dir, params, dataset_size=args.dataset_size, overwrite=args.overwrite, noise_factor=args.noise_factor) compute_counts(dataset, overwrite=args.overwrite, count_param=args.count_param)
def run(domain, config, options): dir_path = os.path.dirname(os.path.realpath(__file__)) if not config: config = 'config_' + domain cfg_file = os.path.join(dir_path, config + '.yaml') params = yaml.safe_load(open(cfg_file, 'r')) # replacing params with command line options for opt in options: assert opt[0] in params dtype = type(params[opt[0]]) if dtype == bool: new_opt = False if opt[1] != 'True' else True else: new_opt = dtype(opt[1]) params[opt[0]] = new_opt print('\n') print('Parameters ') for key in params: print(key, params[key]) print('\n') np.random.seed(params['seed']) torch.manual_seed(params['seed']) random_state = np.random.RandomState(params['seed']) device = torch.device(params["device"]) DATA_DIR = os.path.join(params['folder_location'], params['folder_name']) env = environment.Environment(domain, params, random_state) if params['batch']: from baseline import Baseline baseline_path = os.path.join(DATA_DIR, params['baseline_path']) baseline = Baseline(baseline_path, params['network_size'], state_shape=params['state_shape'], nb_actions=params['nb_actions'], seed=params['seed'], temperature=params['baseline_temp'], device=params['device'], normalize=params['normalize']) dataset_path = os.path.join(DATA_DIR, params['dataset_path']) print("\nLoading dataset from file {}".format(dataset_path), flush=True) if not os.path.exists(dataset_path): raise ValueError("The dataset file does not exist") with open(dataset_path, "rb") as f: data = pickle.load(f) dataset = Dataset_Counts(data, params['count_param']) print("Data with counts loaded: {} samples".format(len(data['s'])), flush=True) folder_name = os.path.dirname(dataset_path) expt = BatchExperiment( dataset=dataset, env=env, folder_name=folder_name, episode_max_len=params['episode_max_len'], minimum_count=params['minimum_count'], extra_stochasticity=params['extra_stochasticity'], history_len=params['history_len'], max_start_nullops=params['max_start_nullops']) else: # Create experiment folder if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) baseline = None expt = DQNExperiment(env=env, ai=None, episode_max_len=params['episode_max_len'], annealing=params['annealing'], history_len=params['history_len'], max_start_nullops=params['max_start_nullops'], replay_min_size=params['replay_min_size'], test_epsilon=params['test_epsilon'], folder_name=DATA_DIR, network_path=params['network_path'], extra_stochasticity=params['extra_stochasticity'], score_window_size=100) for ex in range(params['num_experiments']): print('\n') print('>>>>> Experiment ', ex, ' >>>>> ', params['learning_type'], ' >>>>> Epsilon >>>>> ', params['epsilon_soft'], ' >>>>> Minimum Count >>>>> ', params['minimum_count'], ' >>>>> Kappa >>>>> ', params['kappa'], ' >>>>> ', flush=True) print('\n') ai = AI(baseline, state_shape=env.state_shape, nb_actions=env.nb_actions, action_dim=params['action_dim'], reward_dim=params['reward_dim'], history_len=params['history_len'], gamma=params['gamma'], learning_rate=params['learning_rate'], epsilon=params['epsilon'], final_epsilon=params['final_epsilon'], test_epsilon=params['test_epsilon'], annealing_steps=params['annealing_steps'], minibatch_size=params['minibatch_size'], replay_max_size=params['replay_max_size'], update_freq=params['update_freq'], learning_frequency=params['learning_frequency'], ddqn=params['ddqn'], learning_type=params['learning_type'], network_size=params['network_size'], normalize=params['normalize'], device=device, kappa=params['kappa'], minimum_count=params['minimum_count'], epsilon_soft=params['epsilon_soft']) expt.ai = ai env.reset() with open(expt.folder_name + '/config.yaml', 'w') as y: yaml.safe_dump(params, y) # saving params for reference expt.do_epochs(number_of_epochs=params['num_epochs'], is_learning=params['is_learning'], steps_per_epoch=params['steps_per_epoch'], is_testing=params['is_testing'], steps_per_test=params['steps_per_test'], passes_on_dataset=params['passes_on_dataset'], exp_id=ex)
def __init__(self, training_steps, validation_steps, validation_size, mini_batch_size, learning_rate, number_of_epochs, network_size, folder_location, dataset_file, cloned_network_path, sample_from_env, entropy_coefficient, device, seed, experiment_name, config_file, update_learning_rate): self.sample_from_env = sample_from_env self.smaller_validation_loss = None self.seed = seed try: self.params = yaml.safe_load(open(config_file, 'r')) except FileNotFoundError as e: print( "Configuration file not found; Define a config_file to be able to sample from environment" ) raise e # initialize seeds for reproducibility np.random.seed(seed) torch.manual_seed(seed) # set paths for data and output path log_path = os.path.join('./logs/' + experiment_name) data_dir = folder_location dataset_path = dataset_file self.output_folder = os.path.dirname(dataset_path) self.cloned_network_path = os.path.join(os.path.dirname(dataset_path), cloned_network_path) # start self.logger = SummaryWriter(log_path) # import data full_dataset = Dataset_Counts.load_dataset(dataset_path) self.dataset_train, self.dataset_validation = full_dataset.train_validation_split( test_size=validation_size) # set training parameters self.mini_batch_size = mini_batch_size self.number_of_epochs = number_of_epochs self.network_size = network_size self.entropy_coefficient = entropy_coefficient self.device = device self.learning_rate = learning_rate self.update_learning_rate = update_learning_rate if training_steps != 0: self.training_steps = training_steps else: self.training_steps = int(self.dataset_train.size / self.mini_batch_size) if validation_steps != 0: self.validation_steps = validation_steps else: self.validation_steps = int(self.dataset_validation.size / self.mini_batch_size) self.log_frequency = int(self.training_steps / 10) print( "Training with {} training steps and {} validation steps ".format( self.training_steps, self.validation_steps)) # create model self.cloned_baseline_policy = ClonedBaseline( network_size=network_size, network_path=None, state_shape=self.params['state_shape'], nb_actions=self.params['nb_actions'], device=device, seed=seed, temperature=0) self.best_policy = ClonedBaseline( network_size=network_size, network_path=None, state_shape=self.params['state_shape'], nb_actions=self.params['nb_actions'], device=device, seed=seed, temperature=0, results_folder=self.output_folder) self.best_policy._copy_weight_from( self.best_policy.network.state_dict()) # define loss and optimizer self.nll_loss_function = nn.NLLLoss() self.optimizer = torch.optim.SGD( self.cloned_baseline_policy.network.parameters(), lr=learning_rate) # optimizer = torch.optim.RMSprop(network.parameters(), lr=learning_rate, alpha=0.95, eps=1e-07) # instantiate environment for policy evaluation self.env = environment.Environment(self.params['domain'], self.params) if sample_from_env: print("sampling from environment") baseline_network_path = os.path.join(data_dir, self.params["network_path"]) self.baseline = Baseline(self.params['network_size'], network_path=baseline_network_path, state_shape=self.params['state_shape'], nb_actions=self.params['nb_actions'], device=device, seed=seed, temperature=self.params.get( "baseline_temp", 0.1), normalize=self.params['normalize']) else: self.baseline = None
def run(config_file, options): try: params = yaml.safe_load(open(config_file, 'r')) except FileNotFoundError as e: print("Configuration file not found") raise e # replacing params with command line options for opt in options: assert opt[0] in params dtype = type(params[opt[0]]) if dtype == bool: new_opt = False if opt[1] != 'True' else True else: new_opt = dtype(opt[1]) params[opt[0]] = new_opt print('\n') print('Parameters ') for key in params: print(key, params[key]) print('\n') np.random.seed(params['seed']) torch.manual_seed(params['seed']) random_state = np.random.RandomState(params['seed']) device = torch.device(params["device"]) DATA_DIR = os.path.join(params['folder_location'], params['folder_name']) env = environment.Environment(params["domain"], params, random_state) if params['batch']: dataset_path = params['dataset_path'] print("\nLoading dataset from file {}".format(dataset_path), flush=True) if not os.path.exists(dataset_path): raise ValueError("The dataset file does not exist") dataset = Dataset_Counts.load_dataset(dataset_path) baseline_path = os.path.join(DATA_DIR, params['baseline_path']) if 'behavior_cloning' in params['learning_type']: baseline_path = os.path.join(os.path.dirname(dataset_path), 'cloned_network_weights.pt') baseline = ClonedBaseline( params['network_size'], network_path=baseline_path, state_shape=params['state_shape'], nb_actions=params['nb_actions'], device=device, seed=params['seed'], temperature=params['baseline_temp'], normalize=params['normalize']) elif params['learning_type'] in ['pi_b', 'soft_sort']: baseline = Baseline(params['network_size'], network_path=baseline_path, state_shape=params['state_shape'], nb_actions=params['nb_actions'], device=device, seed=params['seed'], temperature=params['baseline_temp'], normalize=params['normalize']) elif 'count_based' in params['learning_type']: baseline = SimilarityBaseline(dataset=dataset, seed=params['seed'], nb_actions=params['nb_actions'], results_folder=os.path.dirname(dataset_path)) baseline.evaluate_baseline(env, number_of_steps=100000, number_of_epochs=1, verbose=True, save_results=True) else: # no baseline, should use counters to estimate policy baseline = None folder_name = os.path.dirname(dataset_path) print("Data with counts loaded: {} samples".format(dataset.size), flush=True) expt = BatchExperiment(dataset=dataset, env=env, folder_name=folder_name, episode_max_len=params['episode_max_len'], minimum_count=params['minimum_count'], extra_stochasticity=params['extra_stochasticity'], history_len=params['history_len'], max_start_nullops=params['max_start_nullops'], keep_all_logs=False) else: # Create experiment folder if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) folder_name = DATA_DIR baseline = None expt = DQNExperiment(env=env, ai=None, episode_max_len=params['episode_max_len'], annealing=params['annealing'], history_len=params['history_len'], max_start_nullops=params['max_start_nullops'], replay_min_size=params['replay_min_size'], test_epsilon=params['test_epsilon'], folder_name=folder_name, network_path=params['network_path'], extra_stochasticity=params['extra_stochasticity'], score_window_size=100, keep_all_logs=False) for ex in range(params['num_experiments']): print('\n') print('>>>>> Experiment ', ex, ' >>>>> ', params['learning_type'], ' >>>>> Epsilon >>>>> ', params['epsilon_soft'], ' >>>>> Minimum Count >>>>> ', params['minimum_count'], ' >>>>> Kappa >>>>> ', params['kappa'], ' >>>>> ', flush=True) print('\n') print("\nPROGRESS: {0:02.2f}%\n".format(ex / params['num_experiments'] * 100), flush=True) ai = AI(baseline, state_shape=env.state_shape, nb_actions=env.nb_actions, action_dim=params['action_dim'], reward_dim=params['reward_dim'], history_len=params['history_len'], gamma=params['gamma'], learning_rate=params['learning_rate'], epsilon=params['epsilon'], final_epsilon=params['final_epsilon'], test_epsilon=params['test_epsilon'], annealing_steps=params['annealing_steps'], minibatch_size=params['minibatch_size'], replay_max_size=params['replay_max_size'], update_freq=params['update_freq'], learning_frequency=params['learning_frequency'], ddqn=params['ddqn'], learning_type=params['learning_type'], network_size=params['network_size'], normalize=params['normalize'], device=device, kappa=params['kappa'], minimum_count=params['minimum_count'], epsilon_soft=params['epsilon_soft']) expt.ai = ai if not params['batch']: # resets dataset for online experiment expt.dataset_counter = Dataset_Counts(count_param=params['count_param'], state_shape=env.state_shape, nb_actions=env.nb_actions, replay_max_size=params['replay_max_size'], is_counting=ai.needs_state_action_counter()) env.reset() with open(expt.folder_name + '/config.yaml', 'w') as y: yaml.safe_dump(params, y) # saving params for reference expt.do_epochs(number_of_epochs=params['num_epochs'], is_learning=params['is_learning'], steps_per_epoch=params['steps_per_epoch'], is_testing=params['is_testing'], steps_per_test=params['steps_per_test'], passes_on_dataset=params['passes_on_dataset'], exp_id=ex)