def get_exploration_agent(exploration_config, exploration_env): if exploration_config.get("type") == "learned": return dqn.DQNAgent.from_config(exploration_config, exploration_env) elif exploration_config.get("type") == "random": return policy.RandomPolicy(exploration_env.action_space) elif exploration_config.get("type") == "none": return policy.ConstantActionPolicy(grid.Action.end_episode) else: raise ValueError("Invalid exploration agent: {}".format( exploration_config.get("type")))
def __init__(self, alpha, epsilon, discount, environment): self.action_space = environment.action_space self.alpha = alpha self.epsilon = epsilon self.discount = discount self.qvalues = np.zeros( (environment.state_space, environment.action_space), np.float32) self.policy = policy.RandomPolicy(environment.state_space, environment.action_space) self.explore_policy = self.policy
def __init__(self, alpha, discount, environment): self.alpha = alpha self.discount = discount ssp = environment.state_space asp = environment.action_space self.action_space = asp self.qvalues = np.zeros((ssp, asp), np.float32) self.optimal_policy = policy.RandomPolicy(ssp, asp) self.explore_policy = self.optimal_policy self.draw_policy = policy.GreedyPolicy(ssp, asp, self.qvalues)
def episode_test() -> bool: racetrack_ = environment.track.GridWorld(environment.track.TRACK_1, rng) environment_ = environment.Environment(racetrack_, verbose=True) policy_ = policy.RandomPolicy(environment_, rng) agent_ = agent.Agent(environment_, policy_, verbose=True) episode_: agent.Episode = agent_.generate_episode() print() for t, rsa in enumerate(episode_.trajectory): print( f"t={t}\treward={rsa.reward}\tstate={rsa.state}\taction={rsa.action}" ) return True
trace_path = "traces/sample_trace.csv" config = cfg.Config.from_files_and_bindings(["spec_llc.json"], []) env = environment.CacheReplacementEnv(config, trace_path, 0) if args.policy_type == "belady": replacement_policy = belady.BeladyPolicy(env) elif args.policy_type == "lru": replacement_policy = policy.LRU() elif args.policy_type == "s4lru": replacement_policy = s4lru.S4LRU(config.get("associativity")) elif args.policy_type == "belady_nearest_neighbors": train_env = environment.CacheReplacementEnv(config, trace_path, 0) replacement_policy = belady.BeladyNearestNeighborsPolicy(train_env) elif args.policy_type == "random": replacement_policy = policy.RandomPolicy(np.random.RandomState(0)) else: raise ValueError(f"Unsupported policy type: {args.policy_type}") state = env.reset() total_reward = 0 steps = 0 with tqdm.tqdm() as pbar: while True: action = replacement_policy.action(state) state, reward, done, info = env.step(action) total_reward += reward steps += 1 pbar.update(1) if done: break
import game import policy import time iteration = 1 numberWin = [0, 0] numSteps = [[0 for i in range(iteration)] for i in range(0, 2)] #boardSize = 6 for i in range(0, iteration): newGame = game.Gomoku(1) # print "start a new Gomoku game with board size %dx%d"%(boardSize, boardSize) #baselinePolicy = game.BaselinePolicy() randomPolicy = policy.RandomPolicy() minimaxPolicy = policy.MinimaxPolicy() time_to_move = [[], []] while (newGame.isEnd() < 0): nextPlayer = newGame.nextPlayer start = time.time() if (nextPlayer == 1): #action = randomPolicy.getNextAction(newGame) action = minimaxPolicy.getNextAction(newGame) # print "player 1 selects ", action else: action = randomPolicy.getNextAction(newGame) #action = baselinePolicy.getNextAction(newGame) # print "player 2 selects ", action # print time.time() - start time_to_move[nextPlayer - 1].append(time.time() - start) # print "player %d places on (%d, %d)"%(nextPlayer, action[0], action[1])
n_population = 10000 # symptom names for easy reference from auxilliary import symptom_names # Create the underlying population print("Generating population") population = simulator.Population(n_genes, n_vaccines, n_treatments) X = population.generate(n_population) # Make sure that your policy appropriately filters out the population if necessary. This is just a random sample of 1000 people # Generate vaccination results print("Vaccination") vaccine_policy = policy.RandomPolicy(n_vaccines, list(range( -1, n_vaccines))) # make sure to add -1 for 'no vaccine' print("With a for loop") # The simplest way to work is to go through every individual in the population for t in range(n_population): a_t = vaccine_policy.get_action(X[t]) # Then you can obtain results for everybody y_t = population.vaccinate([t], a_t) # Feed the results back in your policy. This allows you to fit the # statistical model you have. vaccine_policy.observe(X[t], a_t, y_t) print("Vaccinate'em all") # Here you can get an action for everybody in the population A = vaccine_policy.get_action(X) # Then you can obtain results for everybody
def __init__(self, config=None): if config is None: config = {} self.env = wrap_dqn(gym.make(config.get('game', 'PongNoFrameskip-v4'))) self.action_size = self.env.action_space.n self.to_vis = config.get('visualize', False) self.verbose = config.get('verbose', True) self.backup = config.get('backup', 25) self.episodes = config.get('episodes', 300) self.depth = config.get('depth', 4) self.state_size = config.get('space', (84, 84)) self.model = None self._target_model = None self.prioritized = config.get(('prioritized', False)) if self.prioritized: self.memory = PrioritizedMemory( max_len=config.get('mem_size', 100000)) else: self.memory = SimpleMemory(max_len=config.get('mem_size', 100000)) if config.get('duel', False): self.model = self._duel_conv() else: self.model = self._conv() self.model.compile(Adam(lr=config.get('lr', 1e-4)), loss=huber_loss) if config.get('target', True): self._target_model = clone_model(self.model) self._target_model.set_weights(self.model.get_weights()) self._time = 0 self.update_time = config.get('target_update', 1000) self.env._max_episode_steps = None self.batch_size = config.get('batch', 32 * 3) self.to_observe = config.get('to_observe', 10000) self.log_dir = config['log_dir'] if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) plot_model(self.model, to_file=os.path.join(self.log_dir, 'model.png'), show_shapes=True) attr = { 'batch size': self.batch_size, 'to observe': self.to_observe, 'depth': self.depth } self.results = {'info': attr} load_prev = config.get('load', False) self.gamma = None pol = None if 'pol' in config: if config['pol'] == 'random': pol = policy.RandomPolicy() elif config['pol'] == 'eps': pol = policy.EpsPolicy(config.get('pol_eps', 0.1)) self.pol = pol if load_prev: path = sorted([ int(x) for x in os.listdir(self.log_dir) if os.path.isdir(os.path.join(self.log_dir, x)) ]) if len(path) != 0: load_prev = self.load(os.path.join(self.log_dir, str(path[-1]))) if self.pol is None: self.pol = policy.AnnealedPolicy( inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=config.get('ex_min', 0.02), value_test=0.5, nb_steps=config.get('ex_steps', 100000)) if self.gamma is None: self.gamma = policy.EpsPolicy(float(config.get('gamma', 0.99))).get_value
# main if __name__ == "__main__": import pandas import policy n_symptoms = 10 n_genes = 128 n_vaccines = 3 n_treatments = 4 pop = Population(n_genes, n_vaccines, n_treatments) n_observations = 1000 X_observation = pop.generate(n_observations) pandas.DataFrame(X_observation).to_csv('observation_features.csv', header=False, index=False) n_treated = 1000 X_treatment = pop.generate(n_treated) X_treatment = X_treatment[X_treatment[:, 1] == 1] print("Generating treatment outcomes") a, y = pop.treatment( X_treatment, policy.RandomPolicy(n_treatments, list(range(n_treatments)))) pandas.DataFrame(X_treatment).to_csv('treatment_features.csv', header=False, index=False) pandas.DataFrame(a).to_csv('treatment_actions.csv', header=False, index=False) pandas.DataFrame(y).to_csv('treatment_outcomes.csv', header=False, index=False)