示例#1
0
def initialize_buffer_with_single_test_tuple(size):
    grid = Grid(filename="levels/" + str(size) + "x" + str(size) +
                "/grid_100.txt")
    result = set()
    states = grid.generate_all_states()
    find = False
    print("Start state: ")
    print(grid.start_state.spaces)
    for state in states:
        q_state = QState(state)
        for action in range((size - 1) * 4):

            color = int(action / 4 + 1)
            direction = int(action % 4)
            action_tu = (color, direction)

            new_state, reward, terminal = q_state.step(action_tu)

            if terminal:
                sars = (q_state, action, reward, new_state, terminal)
                print("Old state: ")
                print(q_state.state.spaces)
                print("New state")
                print(new_state.state.spaces)

                print("Action: ", action_tu)
                print("Action index: ", action)
                print("Old state winning: ", q_state.is_winning())
                print("New state winning: ", new_state.is_winning())
                result.add(sars)
                find = True
                break
        if find:
            break
    return list(result)
示例#2
0
def initialize_buffer_with_all_tuples(size, mlp):
    grid = Grid(filename="levels/" + str(size) + "x" + str(size) +
                "/grid_1.txt")
    result = set()
    states = grid.generate_all_states()
    for state in states:
        q_state = QState(state)

        # Don't store the winning state in the replay buffer.
        if q_state.is_winning():
            print("Don't include me!")
            continue

        for action in range((size - 1) * 4):

            color = int(action / 4 + 1)
            direction = int(action % 4)
            action_tu = (color, direction)

            new_state, reward, terminal = q_state.step(action_tu)
            sars = (q_state, action, reward, new_state, terminal)
            result.add(sars)
    print("Initial replay buffer size: ", len(result))
    return list(result)
示例#3
0
def train(file, size, Q=dict(), gamma=0.9, num_epochs=3):
	print("Train ", file)
	print("Epochs: ", num_epochs)
	grid = Grid(filename=file)
	epsilon = 1.0

	print("Generate all states!")
	all_states = grid.generate_all_states()
	state_size = len(all_states)
	print("All states: ", state_size)

	lr = 0.5
	gamma = 0.9
	winning_states = 0
	action_size = 4 * (size-1) # number of colors is 4 in 5*5 grid

	iter = 0
	for epoch in range(num_epochs):
		print("Epoch: ", epoch)
		for state in all_states:
			for action in range(action_size):

				if iter % 1000 == 0:
					print("iteration ", iter)

				if not state in Q:
					Q[state] = np.zeros((action_size))

				color = int(action /4 + 1)
				direction = int(action % 4)
				action_tu = (color, direction)

				def get_next_tuple():
					if state.is_viable_action(action_tu):
						new_state = state.next_state(action_tu)
						if new_state.is_winning():
							reward = 1000000000
							return new_state, reward
						else:
							flows = new_state.completed_flow_count()
							zeroes = new_state.num_zeroes_remaining()
							reward = -5 * zeroes
							for f in range(flows):
								reward += 1000
							return new_state, reward
					else:
						reward = -1000000
						new_state = state
						return new_state, reward

				new_state, reward = get_next_tuple()

				if not new_state in Q:
					Q[new_state] = np.zeros((action_size))

				Q[state][action] = Q[state][action] + lr * (reward + gamma * np.max(Q[new_state]) - Q[state][action])

				if new_state.is_winning():
					print("Winning State!")
					winning_states += 1
				iter+=1

	print("Number of winning states: ", winning_states)
	return Q