Exemplo n.º 1
0
def setup(game, pim, pits, stones, config=None):
    """Entry Point"""

    pbar = tqdm(range(10000))
    for char in pbar:
        pbar.set_description("INITIALIZING GAME %s" % char)
    print()

    if config:
        pbar = tqdm(range(10000))
        for _ in pbar:
            pbar.set_description("GETTING CONFIGURATION FROM {0}".format(config))
        print()
        import yaml
        with open(config, 'r') as stream:
            try:
                config_dict = yaml.load(stream)['config']
                pim, pits, stones = config_dict.get('pim', pim), config_dict.get('pits', pits), config_dict.get('stones', stones)
                game = config_dict.get('game', game)
            except yaml.YAMLError as exc:
                print("Something went wrong with reading your config file. \n {}".format(exc))

    print(colored("Setting up game with the following config: \n Game Type: {0} "
                  "\n Board Pits: {1} \n Board Stones: {2} \n Penalize Invalid Moves: {3}"
                  .format(BoardConfig.GAME_TYPE_MAP.get(game), pits, stones, pim), "yellow"))
    print()
    if click.confirm('Do you want to continue with this configuration?', abort=True):
        print(text2art("AYO \t \t \t \t OLOPON"))

    board = Board(pim=pim, pits=pits, stones=stones)

    if game == "hvh":
        player_one_name, player_two_name = click.prompt("Enter a name for Player 1 "), click.prompt("Enter a name for Player 2 ")
        player_one = Human(name=player_one_name, pits=BoardConfig.PLAYER_ONE_PITS, store=BoardConfig.PLAYER_ONE_STORE)
        player_two = Human(name=player_two_name, pits=BoardConfig.PLAYER_TWO_PITS, store=BoardConfig.PLAYER_TWO_STORE)
        game = Game(players=[player_one, player_two], board=board)
    elif game == "hvc":
        player_one_name = click.prompt("Enter a name for the human player")
        player_one = Human(name=player_one_name, pits=BoardConfig.PLAYER_ONE_PITS, store=BoardConfig.PLAYER_ONE_STORE)
        player_two = VectorPlayer(name=generate_name(), pits=BoardConfig.PLAYER_TWO_PITS, store=BoardConfig.PLAYER_TWO_STORE)
        game = Game(players=[player_one, player_two], board=board)
    else:
        player_one = VectorPlayer(name=generate_name(), pits=BoardConfig.PLAYER_ONE_PITS, store=BoardConfig.PLAYER_ONE_STORE)
        player_two = VectorPlayer(name=generate_name(), pits=BoardConfig.PLAYER_TWO_PITS, store=BoardConfig.PLAYER_TWO_STORE)
        game = Game(players=[player_one, player_two], board=board)

    if click.confirm('\n\n{} vs {}. \n Start Game'.format(player_one.name.upper(), player_two.name.upper()), abort=True):
        game.move(player_one)
Exemplo n.º 2
0
    def action_paste(self, resource, context, form):
        # Check there is something to paste
        cut, paths = context.get_cookie('ikaaro_cp', datatype=CopyCookie)
        if len(paths) == 0:
            context.message = messages.MSG_NO_PASTE
            return

        # Paste
        target = resource
        pasted = []
        not_allowed = []
        for path in paths:
            # Check the source resource still exists
            source = target.get_resource(path, soft=True)
            if source is None:
                continue

            # If cut&paste in the same place, do nothing
            if cut is True:
                if target == source.parent:
                    pasted.append(source.name)
                    continue

            name = generate_name(source.name, list(target.get_names()),
                                 '_copy_')
            if cut is True:
                # Cut&Paste
                try:
                    target.move_resource(path, name)
                except ConsistencyError:
                    not_allowed.append(source.name)
                    continue
                else:
                    pasted.append(name)
            else:
                # Copy&Paste
                try:
                    target.copy_resource(path, name)
                except ConsistencyError:
                    not_allowed.append(source.name)
                    continue
                else:
                    pasted.append(name)

        # Cut, clean cookie
        if cut is True:
            context.del_cookie('ikaaro_cp')

        message = []
        if pasted:
            resources = ', '.join(pasted)
            message.append(
                messages.MSG_RESOURCES_PASTED.gettext(resources=resources))
        if not_allowed:
            resources = ', '.join(not_allowed)
            msg = messages.MSG_RESOURCES_NOT_PASTED.gettext(
                resources=resources)
            message.append(msg)

        context.message = message
Exemplo n.º 3
0
    def action_paste(self, resource, context, form):
        # Check there is something to paste
        cut, paths = context.get_cookie('ikaaro_cp', datatype=CopyCookie)
        if len(paths) == 0:
            context.message = messages.MSG_NO_PASTE
            return

        # Paste
        target = resource
        pasted = []
        not_allowed = []
        for path in paths:
            # Check the source resource still exists
            source = target.get_resource(path, soft=True)
            if source is None:
                continue

            # If cut&paste in the same place, do nothing
            if cut is True:
                if target == source.parent:
                    pasted.append(source.name)
                    continue

            name = generate_name(source.name, target.get_names(), '_copy_')
            if cut is True:
                # Cut&Paste
                try:
                    target.move_resource(path, name)
                except ConsistencyError:
                    not_allowed.append(source.name)
                    continue
                else:
                    pasted.append(name)
            else:
                # Copy&Paste
                try:
                    target.copy_resource(path, name)
                except ConsistencyError:
                    not_allowed.append(source.name)
                    continue
                else:
                    pasted.append(name)

        # Cut, clean cookie
        if cut is True:
            context.del_cookie('ikaaro_cp')

        message = []
        if pasted:
            resources = ', '.join(pasted)
            message.append(messages.MSG_RESOURCES_PASTED(resources=resources))
        if not_allowed:
            resources = ', '.join(not_allowed)
            msg = messages.MSG_RESOURCES_NOT_PASTED(resources=resources)
            message.append(msg)

        context.message = message
def start_experiment(exp_conf):

    number_of_agents = exp_conf['num_of_agents']
    episodes = exp_conf['iterations']
    action_to_env = Queue(number_of_agents)
    agents = {}
    # Start Queues
    sending_queues = {}
    results = {}
    for agent_id in range(number_of_agents):
        agent_id_sending_queue = Queue(1)
        sending_queues[agent_id] = agent_id_sending_queue
        results[agent_id] = Queue()
        agents[agent_id] = Agent(agent_id, action_to_env,
                                 sending_queues[agent_id], episodes, exp_conf,
                                 results[agent_id])

    # Start Env
    env = FleetEnv(action_to_env, sending_queues, number_of_agents, episodes,
                   exp_conf["DEBUG"])

    exp_name = generate_name(exp_conf)

    for agent_id in range(number_of_agents):
        p = Process(target=start_agent, args=(env, agents[agent_id]))
        p.start()

    env.start()

    # Aggregate results
    res = None
    for agent_id in range(number_of_agents):
        if res is None:
            res = np.array(results[agent_id].get())
        else:
            res += np.array(results[agent_id].get())
        #print("res")
        #print(res)

    return res, exp_name
def start_experiment(exp_conf):
    NUM_EPISODES = exp_conf['iterations']
    DEBUG = exp_conf["DEBUG"]
    GAMMA = exp_conf['gamma']
    net = exp_conf['net']
    optimizer = optim.RMSprop(net.parameters(), lr=exp_conf['lr'])

    env = FleetEnv()

    score = []
    times_trained = 0
    times_reach_goal = 0

    reward_chart = []
    for k in range(NUM_EPISODES):
        done = False
        observation = env.reset()
        # observation, reward, done, info = env.step(env.action_space.sample()) # take a random action

        episode_series = []
        reward_acum = []
        time_of_day = 0
        while not done:
            # Get action from pi
            # action = env.action_space.sample()
            #np_observation = np.array(get_state_repr(observation))
            np_observation = get_state_repr(observation)
            # np_observation = np.expand_dims(np_observation, axis=0)
            np_observation = np.expand_dims(np_observation, axis=0)
            observation_tensor = torch.FloatTensor(np_observation)

            action_probs = net(observation_tensor)
            action_probs_orig = action_probs

            # FOR EXPLORATION:
            action_probs = F.dropout(action_probs, p=0.3, training=True)

            action_probs = F.softmax(action_probs, dim=1)

            m = Categorical(action_probs)
            action = m.sample()

            log_prob = m.log_prob(action)

            # break
            # Execute action in environment.

            if k % 1000 == 0 and DEBUG:
                #print("action_probs_orig ")
                #print(action_probs_orig)
                print(
                    "Time of day=" + str(time_of_day) + ", on state=" +
                    str(get_state_as_pair(observation)) +
                    ", selected action=" +
                    str(get_state_as_pair(get_state_from_int(action.item()))) +
                    " ,")

            time_of_day += 1

            observation, reward, done, info = env.step(action.item())

            if k % 1000 == 0 and DEBUG:
                print("new state=" + str(get_state_as_pair(observation)) +
                      ", rewards=" + str(reward) + ", done=" + str(done))

            # if done and reward != 1.0:
            # 	if observation == 5 or observation == 7 or observation == 11 or observation == 12:
            # 		reward = -1.0

            step_data = [
                get_state_repr(observation), action, log_prob, reward, done,
                info
            ]
            episode_series.append(step_data)
            last_reward = reward
            reward_acum.append(reward)

        # END WHILE SIMULATION

        reward_chart.append(np.sum(reward_acum))

        if len(score) < 100:
            score.append(np.sum(reward_acum))
        else:
            score[k % 100] = np.sum(reward_acum)

        if k % 1 == 0 and DEBUG:
            print(
                "Episode {} finished after {} timesteps with r={}. Running score: {}. Times trained: {}. Times reached goal: {}."
                .format(k, len(episode_series), np.sum(reward_acum),
                        np.mean(score), times_trained, times_reach_goal))
            times_trained = 0
            times_reach_goal = 0
            #print_table()
        # print("Game finished. " + "-" * 5)
        # print(len(episode_series))
        #         for param in net.parameters():
        #             print(param.data)

        # break
        # Training:
        # episode_series.reverse()
        policy_loss = []
        rewards_list = []
        for i in range(len(episode_series)):
            j = i
            G = 0
            #alpha = 1 / len(episode_series)

            # get the log_prob of the last state:
            gamma_cum = 1

            while j < len(episode_series):
                [observation, action, log_prob, reward, done,
                 info] = episode_series[j]
                G = G + reward * gamma_cum

                gamma_cum = gamma_cum * GAMMA
                j = j + 1

            [observation, action, log_prob, reward, done,
             info] = episode_series[i]

            policy_loss.append(G * -log_prob)

            rewards_list.append(G)

        policy_loss = torch.cat(policy_loss).sum()
        policy_loss.backward()
        optimizer.step()
        policy_loss = []

        # if reward > 0.0:
        #     print_table()
        #     print_net(net)

        # policy_loss = torch.cat(policy_loss).sum()
        # policy_loss.backward()
        # optimizer.step()

        times_trained = times_trained + 1

        #if G != 0.0:  # Optimize only if rewards are non zero.
        # print "Reward list"
        # print rewards_list
        #	optimizer.zero_grad()
        #	policy_loss = torch.cat(policy_loss).sum()
        #	policy_loss.backward()
        #	optimizer.step()
        #	times_trained = times_trained + 1
        # if reward > 0.0:
        #     print("========= Reward " + str(reward) + " ============")
        # print_net(net)
        # print_table()
        # if times_trained > 0:
        #     exit()

        if reward > 0.0:
            times_reach_goal = times_reach_goal + 1

    return reward_chart, generate_name(exp_conf)
            for gamma in gammas:
                cf = {
                    'net': "pi_net",
                    'iterations': it,
                    'gamma': gamma,
                    'lr': lr,
                    'DEBUG': False,
                    'num_of_agents': 20
                }
                experiments.append(cf)

results = []
names = []
for exp in experiments:
    print(
        str(datetime.datetime.now()) + " test starts \t" + generate_name(exp))
    result, name = start_experiment(exp)
    print(str(datetime.datetime.now()) + " test ends \t" + generate_name(exp))
    results.append(result)
    names.append(name)

#print("results")
#print(results)

#Plotting
pref = str(datetime.datetime.now())
res = split(results, 2)  #splits in sets of five
nam = split(names, 2)
i = 0
for results, names in zip(res, nam):
    i += 1
Exemplo n.º 7
0
def create_file(content=''):
    filename = utils.generate_name() + '.txt'
    with open(filename, 'w') as file:
        file.write(content)
    print(f'File {filename} was created in {os.getcwd()} directory.')