示例#1
0
 def __init__(self):
     self.App = tetris.TetrisApp()
     self.App.run()
     self.r = {}
     self.rows = self.App.get_rows()
     self.cols = self.App.get_cols()
     for i in range(2 * self.cols + 3):
         self.r["r" + str(i)] = 1
     self.alpha = 0.9
     self.N = 100
     self.M = 10
     self.samples = []
     self.number_stones = self.App.get_number_stones()
示例#2
0
文件: playoffs.py 项目: tangbj/tetris
def fight(wt_a, wt_b):

    wins_a = 0
    wins_b = 0

    for _ in range(number_of_rounds):
        try:
            app = tetris.TetrisApp(wt_a, True)
            app.run()
            score_a = app.score
        except ValueError:
            print(app.board)
            print(app.block)

        try:
            app = tetris.TetrisApp(wt_b, True)
            app.run()
            score_b = app.score
        except ValueError:
            print(app.board)
            print(app.block)

        if score_a > score_b:
            wins_a += 1
            print("Player A wins with score of {}".format(score_a))
        else:
            wins_b += 1
            print("Player B wins with score of {}".format(score_b))

    if wins_a > wins_b:
        winner = "A"
    else:
        winner = "B"

    print("Player {} advances".format(winner))
    print("************************")
    return wt_a if winner == "A" else wt_b
示例#3
0
def train_neural_network(x):
    prediction = neural_network_model(x)
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
    optimizer = tf.train.AdamOptimizer().minimize(cost)

    hm_epochs = 40

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(hm_epochs):
            epoch_loss = 0
            batch_index = 0
            # for _ in range(int(mnist.train.num_examples / batch_size)):
            #     epoch_x, epoch_y = mnist.train.next_batch(batch_size)
            for _ in range(int(len(states) / batch_size)):
                epoch_x = states[batch_index:batch_index + batch_size]
                # print(epoch_x)
                epoch_y = actions[batch_index:batch_index + batch_size]
                batch_index += batch_size
                _, c = sess.run([optimizer, cost],
                                feed_dict={
                                    x: epoch_x,
                                    y: epoch_y
                                })
                epoch_loss += c
            print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:',
                  epoch_loss)

        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        # print('Accuracy:', accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
        print('Accuracy:', accuracy.eval({x: states, y: actions}))

        game = tetris.TetrisApp()
        game.init_game()
        while 1:
            state = game.readboard(game.prep_current_board())
            action = prediction.eval(session=sess, feed_dict={x: [state]})
            print(action)
            max_index = np.argmax(action)
            action = [0, 0, 0, 0, 0]
            action[max_index] = 1
            # action[max_index] = 0
            # max_index = np.argmax(action)
            # action = [0, 0, 0, 0, 0]
            # action[max_index] = 1
            # print(action)
            _, __, ___ = game.step_act(action)
示例#4
0
文件: main.py 项目: tangbj/tetris
def simulate_for_results(wt_arr):
    try:
        #run the game and get back the result
        app = tetris.TetrisApp(wt_arr, True)
        app.run()
        print(app.score)
    except ValueError:
        print("Error")
        print(app.board)
        print(app.block.value)
        print(app.block.x)
        print(app.block.y)

    #appends to file
    with open("output.txt", "a") as f:
        f.write("{},{}\n".format(app.score, ",".join(wt_arr.astype(str))))

    return app.score
示例#5
0
import tetris
import neuralnetwork as NN
import losses
import numpy as np

em = tetris.TetrisApp(10, 20, 750, True, 40, 30 * 10)
net = NN.DQN(em.get_state_size(), 1, losses.MSE_loss)
em.pcrun()
em.reset()
done = False

gene = np.loadtxt("evolution\\generation16.csv", delimiter=',')

index = 0
net.L1.W = gene[index:index + net.L1.W.size].reshape(net.L1.W.shape)
index += net.L1.W.size
net.L1.B = gene[index:index + net.L1.B.size].reshape(net.L1.B.shape)
index += net.L1.B.size
net.L2.W = gene[index:index + net.L2.W.size].reshape(net.L2.W.shape)
index += net.L2.W.size
net.L2.B = gene[index:index + net.L2.B.size].reshape(net.L2.B.shape)
index += net.L2.B.size
net.L3.W = gene[index:index + net.L3.W.size].reshape(net.L3.W.shape)
index += net.L3.W.size
net.L3.B = gene[index:index + net.L3.B.size].reshape(net.L3.B.shape)

while not done:
    next_state = em.get_next_states()
    predicted_qs = {}

    for i, (*data, ) in enumerate(next_state):
示例#6
0
import tetris
import neuralnetwork as NN
import losses
import numpy as np
import matplotlib.pyplot as plt

N = 655
N2 = 3
score = np.zeros((N, N2))
for i in range(N):
    em = tetris.TetrisApp(8, 16, 0.01*750, False, 40, 30*100)
    net = NN.DQN(em.get_state_size(), 1, losses.MSE_loss)

    gene = np.loadtxt("data\\evolutionNNstate168\\generation" + str(i) + ".csv", delimiter=',')

    index = 0
    net.L1.W = gene[index:index + net.L1.W.size].reshape(net.L1.W.shape)
    index += net.L1.W.size
    net.L1.B = gene[index:index + net.L1.B.size].reshape(net.L1.B.shape)
    index += net.L1.B.size
    net.L2.W = gene[index:index + net.L2.W.size].reshape(net.L2.W.shape)
    index += net.L2.W.size
    net.L2.B = gene[index:index + net.L2.B.size].reshape(net.L2.B.shape)
    index += net.L2.B.size
    net.L3.W = gene[index:index + net.L3.W.size].reshape(net.L3.W.shape)
    index += net.L3.W.size
    net.L3.B = gene[index:index + net.L3.B.size].reshape(net.L3.B.shape)

    for j in range(N2):
        em.pcrun()
        em.reset()
示例#7
0
eps_end = 0.0
eps_decay = 0.002
memory_size = 20000
lr = 0.001 * 0.001
num_episodes = 3000

filename = "TEST_" + str(lr)


def moving_average(a, n=30):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n


em = tetris.TetrisApp(10, 20, 750, False, 40, 30 * 100)
em.pcrun()
policy_net = nn.DQNsimple(em.get_state_size(), 1, losses.MSE_loss)
memory = nn.ReplayMemory(memory_size)
strategy = nn.EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)

# fig = plt.figure()
# thismanager = plt.get_current_fig_manager()
# thismanager.window.wm_geometry("+500+0")
# plt.ion()

score = np.zeros(num_episodes) * np.nan
lossess = np.zeros(num_episodes)
current_step = -1
for episode in range(num_episodes):
    current_step += 1
示例#8
0
def cal_pop_fitness(pop, pieceLimit, seed):
    fitness = []
    for indv in range(len(pop)):
        fitness.append(tetris.TetrisApp(False, seed).run(pop[indv], pieceLimit))
    return fitness
示例#9
0
def run(N=6, num_generations=10000):
    em = tetris.TetrisApp(10, 10, 750, False, 40, 30 * 100)
    em.pcrun()
    net = NN.DQN(em.get_state_size(), 1, losses.MSE_loss)

    dimension = net.L1.W.size + net.L1.B.size + net.L2.W.size + net.L2.B.size + net.L3.W.size + net.L3.B.size
    size_population = 4 * N
    pop_size = (size_population, dimension)
    new_population = np.random.rand(size_population, dimension)
    fitness = np.ndarray(size_population)

    generations = np.linspace(1, num_generations, num_generations)
    maxscore = np.zeros(num_generations)

    for generation in range(num_generations):
        ## compute the fitness of each individual
        for it, row in enumerate(new_population):
            index = 0
            net.L1.W = row[index:index + net.L1.W.size].reshape(net.L1.W.shape)
            index += net.L1.W.size
            net.L1.B = row[index:index + net.L1.B.size].reshape(net.L1.B.shape)
            index += net.L1.B.size
            net.L2.W = row[index:index + net.L2.W.size].reshape(net.L2.W.shape)
            index += net.L2.W.size
            net.L2.B = row[index:index + net.L2.B.size].reshape(net.L2.B.shape)
            index += net.L2.B.size
            net.L3.W = row[index:index + net.L3.W.size].reshape(net.L3.W.shape)
            index += net.L3.W.size
            net.L3.B = row[index:index + net.L3.B.size].reshape(net.L3.B.shape)
            em.reset()
            done = False
            while not done:
                next_state = em.get_next_states()
                predicted_qs = {}

                for i, (*data, ) in enumerate(next_state):
                    predicted_qs[(data[0], data[1])] = net.f_pass(
                        np.array([next_state[data[0], data[1]]]).T)[0, 0]

                best_move = max(predicted_qs, key=predicted_qs.get)

                reward, done = em.pcplace(best_move[0], best_move[1])
                if em.get_game_score() > 20000:
                    break

            fitness[it] = em.get_game_score()

        ## sort this such that the best is on top, etc
        new_population = new_population[fitness.argsort()[::-1]]
        ## help: argsort
        maxscore[generation] = max(fitness)
        print(generation, max(fitness))
        if max(fitness) > 20000:
            break
        np.savetxt("evolution\\generation" + str(generation) + ".csv",
                   new_population[0],
                   delimiter=',')
        offspring_crossover = cross_and_mutate(new_population, pop_size)
        new_population = offspring_crossover
    np.savetxt("evolution\\scores.csv",
               np.array([generations, maxscore]).T,
               delimiter=',')
    return (fitness[0], new_population[0])