def setup(game, pim, pits, stones, config=None): """Entry Point""" pbar = tqdm(range(10000)) for char in pbar: pbar.set_description("INITIALIZING GAME %s" % char) print() if config: pbar = tqdm(range(10000)) for _ in pbar: pbar.set_description("GETTING CONFIGURATION FROM {0}".format(config)) print() import yaml with open(config, 'r') as stream: try: config_dict = yaml.load(stream)['config'] pim, pits, stones = config_dict.get('pim', pim), config_dict.get('pits', pits), config_dict.get('stones', stones) game = config_dict.get('game', game) except yaml.YAMLError as exc: print("Something went wrong with reading your config file. \n {}".format(exc)) print(colored("Setting up game with the following config: \n Game Type: {0} " "\n Board Pits: {1} \n Board Stones: {2} \n Penalize Invalid Moves: {3}" .format(BoardConfig.GAME_TYPE_MAP.get(game), pits, stones, pim), "yellow")) print() if click.confirm('Do you want to continue with this configuration?', abort=True): print(text2art("AYO \t \t \t \t OLOPON")) board = Board(pim=pim, pits=pits, stones=stones) if game == "hvh": player_one_name, player_two_name = click.prompt("Enter a name for Player 1 "), click.prompt("Enter a name for Player 2 ") player_one = Human(name=player_one_name, pits=BoardConfig.PLAYER_ONE_PITS, store=BoardConfig.PLAYER_ONE_STORE) player_two = Human(name=player_two_name, pits=BoardConfig.PLAYER_TWO_PITS, store=BoardConfig.PLAYER_TWO_STORE) game = Game(players=[player_one, player_two], board=board) elif game == "hvc": player_one_name = click.prompt("Enter a name for the human player") player_one = Human(name=player_one_name, pits=BoardConfig.PLAYER_ONE_PITS, store=BoardConfig.PLAYER_ONE_STORE) player_two = VectorPlayer(name=generate_name(), pits=BoardConfig.PLAYER_TWO_PITS, store=BoardConfig.PLAYER_TWO_STORE) game = Game(players=[player_one, player_two], board=board) else: player_one = VectorPlayer(name=generate_name(), pits=BoardConfig.PLAYER_ONE_PITS, store=BoardConfig.PLAYER_ONE_STORE) player_two = VectorPlayer(name=generate_name(), pits=BoardConfig.PLAYER_TWO_PITS, store=BoardConfig.PLAYER_TWO_STORE) game = Game(players=[player_one, player_two], board=board) if click.confirm('\n\n{} vs {}. \n Start Game'.format(player_one.name.upper(), player_two.name.upper()), abort=True): game.move(player_one)
def action_paste(self, resource, context, form): # Check there is something to paste cut, paths = context.get_cookie('ikaaro_cp', datatype=CopyCookie) if len(paths) == 0: context.message = messages.MSG_NO_PASTE return # Paste target = resource pasted = [] not_allowed = [] for path in paths: # Check the source resource still exists source = target.get_resource(path, soft=True) if source is None: continue # If cut&paste in the same place, do nothing if cut is True: if target == source.parent: pasted.append(source.name) continue name = generate_name(source.name, list(target.get_names()), '_copy_') if cut is True: # Cut&Paste try: target.move_resource(path, name) except ConsistencyError: not_allowed.append(source.name) continue else: pasted.append(name) else: # Copy&Paste try: target.copy_resource(path, name) except ConsistencyError: not_allowed.append(source.name) continue else: pasted.append(name) # Cut, clean cookie if cut is True: context.del_cookie('ikaaro_cp') message = [] if pasted: resources = ', '.join(pasted) message.append( messages.MSG_RESOURCES_PASTED.gettext(resources=resources)) if not_allowed: resources = ', '.join(not_allowed) msg = messages.MSG_RESOURCES_NOT_PASTED.gettext( resources=resources) message.append(msg) context.message = message
def action_paste(self, resource, context, form): # Check there is something to paste cut, paths = context.get_cookie('ikaaro_cp', datatype=CopyCookie) if len(paths) == 0: context.message = messages.MSG_NO_PASTE return # Paste target = resource pasted = [] not_allowed = [] for path in paths: # Check the source resource still exists source = target.get_resource(path, soft=True) if source is None: continue # If cut&paste in the same place, do nothing if cut is True: if target == source.parent: pasted.append(source.name) continue name = generate_name(source.name, target.get_names(), '_copy_') if cut is True: # Cut&Paste try: target.move_resource(path, name) except ConsistencyError: not_allowed.append(source.name) continue else: pasted.append(name) else: # Copy&Paste try: target.copy_resource(path, name) except ConsistencyError: not_allowed.append(source.name) continue else: pasted.append(name) # Cut, clean cookie if cut is True: context.del_cookie('ikaaro_cp') message = [] if pasted: resources = ', '.join(pasted) message.append(messages.MSG_RESOURCES_PASTED(resources=resources)) if not_allowed: resources = ', '.join(not_allowed) msg = messages.MSG_RESOURCES_NOT_PASTED(resources=resources) message.append(msg) context.message = message
def start_experiment(exp_conf): number_of_agents = exp_conf['num_of_agents'] episodes = exp_conf['iterations'] action_to_env = Queue(number_of_agents) agents = {} # Start Queues sending_queues = {} results = {} for agent_id in range(number_of_agents): agent_id_sending_queue = Queue(1) sending_queues[agent_id] = agent_id_sending_queue results[agent_id] = Queue() agents[agent_id] = Agent(agent_id, action_to_env, sending_queues[agent_id], episodes, exp_conf, results[agent_id]) # Start Env env = FleetEnv(action_to_env, sending_queues, number_of_agents, episodes, exp_conf["DEBUG"]) exp_name = generate_name(exp_conf) for agent_id in range(number_of_agents): p = Process(target=start_agent, args=(env, agents[agent_id])) p.start() env.start() # Aggregate results res = None for agent_id in range(number_of_agents): if res is None: res = np.array(results[agent_id].get()) else: res += np.array(results[agent_id].get()) #print("res") #print(res) return res, exp_name
def start_experiment(exp_conf): NUM_EPISODES = exp_conf['iterations'] DEBUG = exp_conf["DEBUG"] GAMMA = exp_conf['gamma'] net = exp_conf['net'] optimizer = optim.RMSprop(net.parameters(), lr=exp_conf['lr']) env = FleetEnv() score = [] times_trained = 0 times_reach_goal = 0 reward_chart = [] for k in range(NUM_EPISODES): done = False observation = env.reset() # observation, reward, done, info = env.step(env.action_space.sample()) # take a random action episode_series = [] reward_acum = [] time_of_day = 0 while not done: # Get action from pi # action = env.action_space.sample() #np_observation = np.array(get_state_repr(observation)) np_observation = get_state_repr(observation) # np_observation = np.expand_dims(np_observation, axis=0) np_observation = np.expand_dims(np_observation, axis=0) observation_tensor = torch.FloatTensor(np_observation) action_probs = net(observation_tensor) action_probs_orig = action_probs # FOR EXPLORATION: action_probs = F.dropout(action_probs, p=0.3, training=True) action_probs = F.softmax(action_probs, dim=1) m = Categorical(action_probs) action = m.sample() log_prob = m.log_prob(action) # break # Execute action in environment. if k % 1000 == 0 and DEBUG: #print("action_probs_orig ") #print(action_probs_orig) print( "Time of day=" + str(time_of_day) + ", on state=" + str(get_state_as_pair(observation)) + ", selected action=" + str(get_state_as_pair(get_state_from_int(action.item()))) + " ,") time_of_day += 1 observation, reward, done, info = env.step(action.item()) if k % 1000 == 0 and DEBUG: print("new state=" + str(get_state_as_pair(observation)) + ", rewards=" + str(reward) + ", done=" + str(done)) # if done and reward != 1.0: # if observation == 5 or observation == 7 or observation == 11 or observation == 12: # reward = -1.0 step_data = [ get_state_repr(observation), action, log_prob, reward, done, info ] episode_series.append(step_data) last_reward = reward reward_acum.append(reward) # END WHILE SIMULATION reward_chart.append(np.sum(reward_acum)) if len(score) < 100: score.append(np.sum(reward_acum)) else: score[k % 100] = np.sum(reward_acum) if k % 1 == 0 and DEBUG: print( "Episode {} finished after {} timesteps with r={}. Running score: {}. Times trained: {}. Times reached goal: {}." .format(k, len(episode_series), np.sum(reward_acum), np.mean(score), times_trained, times_reach_goal)) times_trained = 0 times_reach_goal = 0 #print_table() # print("Game finished. " + "-" * 5) # print(len(episode_series)) # for param in net.parameters(): # print(param.data) # break # Training: # episode_series.reverse() policy_loss = [] rewards_list = [] for i in range(len(episode_series)): j = i G = 0 #alpha = 1 / len(episode_series) # get the log_prob of the last state: gamma_cum = 1 while j < len(episode_series): [observation, action, log_prob, reward, done, info] = episode_series[j] G = G + reward * gamma_cum gamma_cum = gamma_cum * GAMMA j = j + 1 [observation, action, log_prob, reward, done, info] = episode_series[i] policy_loss.append(G * -log_prob) rewards_list.append(G) policy_loss = torch.cat(policy_loss).sum() policy_loss.backward() optimizer.step() policy_loss = [] # if reward > 0.0: # print_table() # print_net(net) # policy_loss = torch.cat(policy_loss).sum() # policy_loss.backward() # optimizer.step() times_trained = times_trained + 1 #if G != 0.0: # Optimize only if rewards are non zero. # print "Reward list" # print rewards_list # optimizer.zero_grad() # policy_loss = torch.cat(policy_loss).sum() # policy_loss.backward() # optimizer.step() # times_trained = times_trained + 1 # if reward > 0.0: # print("========= Reward " + str(reward) + " ============") # print_net(net) # print_table() # if times_trained > 0: # exit() if reward > 0.0: times_reach_goal = times_reach_goal + 1 return reward_chart, generate_name(exp_conf)
for gamma in gammas: cf = { 'net': "pi_net", 'iterations': it, 'gamma': gamma, 'lr': lr, 'DEBUG': False, 'num_of_agents': 20 } experiments.append(cf) results = [] names = [] for exp in experiments: print( str(datetime.datetime.now()) + " test starts \t" + generate_name(exp)) result, name = start_experiment(exp) print(str(datetime.datetime.now()) + " test ends \t" + generate_name(exp)) results.append(result) names.append(name) #print("results") #print(results) #Plotting pref = str(datetime.datetime.now()) res = split(results, 2) #splits in sets of five nam = split(names, 2) i = 0 for results, names in zip(res, nam): i += 1
def create_file(content=''): filename = utils.generate_name() + '.txt' with open(filename, 'w') as file: file.write(content) print(f'File {filename} was created in {os.getcwd()} directory.')