Python Grid.generate_all_states示例

编程语言: Python

命名空间/包名称: models.grid

类/类型: Grid

方法/功能: generate_all_states

hotexamples.com的示例: 3

Python Grid.generate_all_states - 已找到3个示例。这些是从开源项目中提取的最受好评的models.grid.Grid.generate_all_states现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Grid(22)

add_token_position(4)

generate_all_states(3)

clear(1)

click(1)

create(1)

draw(1)

getGridItem(1)

is_finished(1)

place(1)

select(1)

sketch(1)

solve_gui(1)

示例#1

显示文件

文件： dqn.py 项目： chrisfontas724/CS234Project

def initialize_buffer_with_single_test_tuple(size):
    grid = Grid(filename="levels/" + str(size) + "x" + str(size) +
                "/grid_100.txt")
    result = set()
    states = grid.generate_all_states()
    find = False
    print("Start state: ")
    print(grid.start_state.spaces)
    for state in states:
        q_state = QState(state)
        for action in range((size - 1) * 4):

            color = int(action / 4 + 1)
            direction = int(action % 4)
            action_tu = (color, direction)

            new_state, reward, terminal = q_state.step(action_tu)

            if terminal:
                sars = (q_state, action, reward, new_state, terminal)
                print("Old state: ")
                print(q_state.state.spaces)
                print("New state")
                print(new_state.state.spaces)

                print("Action: ", action_tu)
                print("Action index: ", action)
                print("Old state winning: ", q_state.is_winning())
                print("New state winning: ", new_state.is_winning())
                result.add(sars)
                find = True
                break
        if find:
            break
    return list(result)

示例#2

显示文件

文件： dqn.py 项目： chrisfontas724/CS234Project

def initialize_buffer_with_all_tuples(size, mlp):
    grid = Grid(filename="levels/" + str(size) + "x" + str(size) +
                "/grid_1.txt")
    result = set()
    states = grid.generate_all_states()
    for state in states:
        q_state = QState(state)

        # Don't store the winning state in the replay buffer.
        if q_state.is_winning():
            print("Don't include me!")
            continue

        for action in range((size - 1) * 4):

            color = int(action / 4 + 1)
            direction = int(action % 4)
            action_tu = (color, direction)

            new_state, reward, terminal = q_state.step(action_tu)
            sars = (q_state, action, reward, new_state, terminal)
            result.add(sars)
    print("Initial replay buffer size: ", len(result))
    return list(result)

示例#3

显示文件

def train(file, size, Q=dict(), gamma=0.9, num_epochs=3):
	print("Train ", file)
	print("Epochs: ", num_epochs)
	grid = Grid(filename=file)
	epsilon = 1.0

	print("Generate all states!")
	all_states = grid.generate_all_states()
	state_size = len(all_states)
	print("All states: ", state_size)

	lr = 0.5
	gamma = 0.9
	winning_states = 0
	action_size = 4 * (size-1) # number of colors is 4 in 5*5 grid

	iter = 0
	for epoch in range(num_epochs):
		print("Epoch: ", epoch)
		for state in all_states:
			for action in range(action_size):

				if iter % 1000 == 0:
					print("iteration ", iter)

				if not state in Q:
					Q[state] = np.zeros((action_size))

				color = int(action /4 + 1)
				direction = int(action % 4)
				action_tu = (color, direction)

				def get_next_tuple():
					if state.is_viable_action(action_tu):
						new_state = state.next_state(action_tu)
						if new_state.is_winning():
							reward = 1000000000
							return new_state, reward
						else:
							flows = new_state.completed_flow_count()
							zeroes = new_state.num_zeroes_remaining()
							reward = -5 * zeroes
							for f in range(flows):
								reward += 1000
							return new_state, reward
					else:
						reward = -1000000
						new_state = state
						return new_state, reward

				new_state, reward = get_next_tuple()

				if not new_state in Q:
					Q[new_state] = np.zeros((action_size))

				Q[state][action] = Q[state][action] + lr * (reward + gamma * np.max(Q[new_state]) - Q[state][action])

				if new_state.is_winning():
					print("Winning State!")
					winning_states += 1
				iter+=1

	print("Number of winning states: ", winning_states)
	return Q