Пример #1
0
    def __init__(self,
                 lr=0.01,
                 gamma=0.9,
                 batch_size=100,
                 eps_start=1,
                 eps_end=0,
                 eps_test=0,
                 target_model_update=1000,
                 seq_memory_limit=50000,
                 epsilon_decay=1,
                 comment=""):

        # hyperparameters:
        self.LEARNING_RATE = lr  # default = 0.001 -> higher LR is faster learning but can become unstable and local minimum
        self.GAMMA = gamma  # gamma defines penalty for future reward
        self.BATCH_SIZE = batch_size  # default = 32 -> too small for tetris?
        self.EPSILON_START = eps_start
        self.EPSILON_END = eps_end
        self.EPSILON_DECAY = epsilon_decay  # after how many steps, epsilon = epsilon end
        self.TARGET_MODEL_UPDATE = target_model_update  # default is 10000
        self.EPSILON_TEST = eps_test
        self.SEQUENTIAL_MEMORY_LIMIT = seq_memory_limit
        self.TEST_MAX_EPISODE_STEPS = 10000
        self.TRAIN_MAX_EPISODE_STEPS = 10000
        self.MAX_STEP_SCORE = 500  # score if max episode steps are reached
        self.DYING_PEN = 50

        # comment for plots
        self.COMMENT = comment

        # Initializes a Tetris playing field of width 10 and height 20.
        self.env = TetrisEngine(dying_pen=self.DYING_PEN,
                                max_steps=self.TRAIN_MAX_EPISODE_STEPS,
                                max_step_score=self.MAX_STEP_SCORE)
        self.agent = None
Пример #2
0
    def __init__(self):
        self.game_state = GameState(TetrisBoard())

        #from blocks import BlockLine, BlockRightL, BlockCube
        #l = BlockLine()
        #l.rotate()
        #r = BlockRightL()
        #r.rotate(-1)
        #c = BlockCube()
        #self.game_state.board.place_block(l, (-1,0))
        #self.game_state.board.place_block(r, (1,0))
        #self.game_state.board.place_block(c, (8,0))

        self.engine = TetrisEngine(self.game_state)
        self.ai = TetrisAI(self.engine)
Пример #3
0
    def __init__(self):
        self.game_state = GameState(TetrisBoard())

        #from blocks import BlockLine, BlockRightL, BlockCube
        #l = BlockLine()
        #l.rotate()
        #r = BlockRightL()
        #r.rotate(-1)
        #c = BlockCube()
        #self.game_state.board.place_block(l, (-1,0))
        #self.game_state.board.place_block(r, (1,0))
        #self.game_state.board.place_block(c, (8,0))

        self.engine = TetrisEngine(self.game_state)
        self.ai = TetrisAI(self.engine)
Пример #4
0
class BasicAgent():
    def __init__(self):
        # Initializes a Tetris playing field of width 10 and height 20.
        self.env = TetrisEngine()

    def run(self):
        # Loop to keep playing games
        while True:
            # Variable to indicate whether the game has ended or not
            done = False
            # Resets the environment
            state = self.env.reset()

            # Loop that keeps making moves as long as the game hasn't ended yet
            while not done:
                # Picks a random action
                action = random.randint(0, 5)
                action = 5
                # Performs the action in the game engine
                next_state, reward, done, info = self.env.step(action)
                # Render the game state
                self.env.render()
                # Sleep to make sure a human can follow the gameplay
                sleep(0.05)
Пример #5
0
    def setup(self):
        # Initialization
        for i in range(self.player_num):
            self.engines[i] = TetrisEngine(self.width, self.height)
            self.engines[i].clear()
        if self.use_gui:
            gui = GUI(self, self.block_size)
            self.gui = gui
        else:
            self.stdscr = curses.initscr()
            curses.noecho()

        # Store play information
        self.dbs = {}

        self.done = False

        for i in range(self.player_num):
            # Initial rendering
            self.engine_states[i] = {
                "KO": 0,
                "reward": 0,
                "lines_sent": 0,
                "lines_cleared": 0,
                "hold_shape": None,
                "hold_shape_name": None,
                "hold_locked": False,
                "garbage_lines": 0,
                "highest_line": 0,
                "combo": -1
            }
            # Initialize dbs
            self.dbs[i] = []

        self.game_count += 1
        self.start_time = time.time()
Пример #6
0
    curses.noecho()
    # React to keys without pressing enter (700ms delay)
    curses.halfdelay(7)
    # Enumerate keys
    stdscr.keypad(True)

    # return stdscr


if __name__ == '__main__':
    # Curses standard screen
    stdscr = curses.initscr()

    # Init environment
    width, height = 10, 20  # standard tetris friends rules
    env = TetrisEngine(width, height)

    # Play games on repeat
    while True:
        init()
        stdscr.clear()
        env.clear()
        db = play_game()

        # Return to terminal
        terminate()
        # Should the game info be saved?
        if save_game():
            try:
                fr = open('training_data.npy', 'rb')
                x = np.load(fr)
Пример #7
0
def main(episode, load, learn, debug, random_rate, session):
    load_model = load
    print("load model", load_model, "learn", learn, "debug", debug, "episode",
          episode)

    width, height = 7, 14  # standard tetris friends rules
    env = TetrisEngine(width, height)
    action_count = 7
    agent = Agent(lr=1e-4,
                  input_dims=width * height,
                  gamma=0.5,
                  n_actions=action_count,
                  l1_size=512,
                  l2_size=128)
    if session:
        model_filename = "%s-trained_model.torch" % session
    else:
        model_filename = "trained_model.torch"
    parameter_size = sum([len(p) for p in agent.policy.parameters()])
    print("network parameter size:", parameter_size)

    action_idx = 0

    if load_model:
        agent.policy.load_state_dict(T.load(model_filename))
    for i in range(episode):
        done = False
        score = 0
        state = env.clear()
        counter = 0
        while not done:
            counter += 1
            action, probs = agent.choose_action(state)
            prob = probs[action].item()
            state, reward, done = env.step(action)
            agent.store_rewards(reward)
            score += reward
            if debug:
                stdscr = curses.initscr()
                stdscr.clear()
                stdscr.addstr(str(env))
                stdscr.addstr('\ncumulative reward: ' + str(score))
                stdscr.addstr('\nreward: ' + str(reward))
                time.sleep(.2)
                continue

            if not debug and i % 100 == 0 and counter % 100 == 1:
                idx2direction = {
                    0: "left",
                    1: "right",
                    2: "hard_drop",
                    3: "soft_drop",
                    4: "rotate_left",
                    5: "rotate_right",
                    6: "idle"
                }
                probs_str = ""
                for z, item in enumerate(probs):
                    probs_str += "%s:%0.2f, " % (idx2direction[z], item.item())
                print(probs_str)
                print('episode: ', i, 'counter: ', counter,
                      'reward %0.3f' % reward,
                      'action: %s (%0.2f)' % (action, prob))
            writer.add_scalar("action prob", prob, action_idx)
            action_idx += 1

        if not debug and i % 100 == 0:
            print('episode: ', i, 'score %0.3f' % score)
        writer.add_scalar("final score", score, i)
        if learn:
            agent.learn()
            if i % 1000 == 0:
                T.save(agent.policy.state_dict(), model_filename)
    writer.close()
Пример #8
0
import sys
import os
import shutil
from collections import namedtuple
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

from engine import TetrisEngine

width, height = 10, 20  # standard tetris friends rules
engine = TetrisEngine(width, height)

# if gpu is to be used
use_cuda = torch.cuda.is_available()
if use_cuda:
    print("....Using Gpu...")
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
# Tensor = FloatTensor

######################################################################
# Replay Memory
# -------------
# -  ``Transition`` - a named tuple representing a single transition in
#    our environment
Пример #9
0
    ]

    for pre_actions in possible_pre_actions:
        shape, anchor = engine.shape, engine.anchor

        # Applies the pre-actions.
        for a in pre_actions:
            shape, anchor = engine.actions[a](shape, anchor, board)
            shape, anchor = engine.actions.soft_drop(shape, anchor, board)

        # Tests the best sequence of post-actions.
        for action in [engine.actions.LEFT, engine.actions.RIGHT]:
            new_actions, new_score = compute_helper(engine, shape, anchor, action)
            if new_score < min_score:
                actions = pre_actions + new_actions
                min_score = new_score

    return actions


if __name__ == '__main__':
    engine = TetrisEngine(width=10, height=20)
    steps = compute_optimal_steps(engine)

    while True:
        steps = compute_optimal_steps(engine)
        for step in steps:
            engine.step(step)
            print(engine)
            time.sleep(0.05)
Пример #10
0
 def __init__(self):
     # Initializes a Tetris playing field of width 10 and height 20.
     self.env = TetrisEngine()
Пример #11
0
# -*- coding: utf-8 -*-
from itertools import count
import numpy as np

from engine import TetrisEngine, board_to_bool
from heuristic import heuristic_fn, complete_line

width, height = 10, 20  # standard tetris friends rules
engine = TetrisEngine(width, height, enable_KO=False)


class FixedPolicyAgent:
    def __init__(self):
        self.current_actions = []

    def get_action(self, engine, shape, anchor, board):
        if len(self.current_actions) == 0:
            _, _, self.current_actions = self.select_action(
                engine, shape, anchor, board)
        action = self.current_actions.pop(0)
        return action

    def select_action(self, engine, shape, anchor, board):
        actions_name_final_location_map = engine.get_valid_final_states(
            shape, anchor, board)
        act_pairs = [(k, v[2], v[3])
                     for k, v in actions_name_final_location_map.items()]
        placements = [board_to_bool(p) for k, p, actions in act_pairs]
        h_score = [heuristic_fn(s, complete_line(s)) for s in placements]
        act_idx = np.argmax(h_score)
        actions_name, final_placement, actions = act_pairs[act_idx]
Пример #12
0
                        '--epoch-len',
                        help='Number of training epochs',
                        metavar='E',
                        type=int,
                        default=1000)
    parser.add_argument('-n',
                        '--num-engines',
                        help='Number of simultaneous training engines',
                        metavar='N',
                        type=int,
                        default=100)
    args = parser.parse_args()

    # 엔진을 초기화 합니다.
    engines = [
        TetrisEngine(args.width, args.length) for _ in range(args.num_engines)
    ]

    # 다양한 모델을 빌드합니다.
    train_model, sample_model = build_models(
        args.width,
        args.length,
        len(engines[0].shapes),
        len(engines[0].actions),
    )

    # 존재하는 경우, 기존 가중치를 로드합니다.
    if os.path.exists(args.model_save_loc):
        try:
            train_model.load_weights(args.model_save_loc)
        except:
Пример #13
0
    for pre_actions in possible_pre_actions:
        shape, anchor = engine.shape, engine.anchor

        # Applies the pre-actions.
        for a in pre_actions:
            shape, anchor = engine.actions[a](shape, anchor, board)
            shape, anchor = engine.actions.soft_drop(shape, anchor, board)

        # Tests the best sequence of post-actions.
        for action in [engine.actions.LEFT, engine.actions.RIGHT]:
            new_actions, new_score = compute_helper(engine, shape, anchor,
                                                    action)
            if new_score < min_score:
                actions = pre_actions + new_actions
                min_score = new_score

    return actions


if __name__ == '__main__':
    engine = TetrisEngine(width=10, height=20)
    steps = compute_optimal_steps(engine)

    while True:
        steps = compute_optimal_steps(engine)
        for step in steps:
            engine.step(step)
            print(engine)
            time.sleep(0.05)
Пример #14
0

def play_game_with_gen(dict_genes, engine):
    engine.clear()
    sl = 0
    for t in count():
        actions_name, placement, actions = genetic_agent.select_action(
            engine, engine.shape, engine.anchor, engine.board, dict_genes)
        # Observations
        state, reward, done, cleared_lines, sent_lines = engine.step_to_final(
            actions)
        # Perform one step of the optimization (on the target network)
        sl += sent_lines
        logger.info(engine)
        logger.info(f"Sent lines: {sl}")
        time.sleep(.1)
        if done:
            break
    logger.info("")
    logger.info("")
    logger.info("")


if __name__ == '__main__':
    engine = TetrisEngine(width, height, enable_KO=False)
    darwin = GeneticAlgorithm(population_size=50,
                              mutation_rate=0.05,
                              num_generations=30,
                              engine=engine)
    darwin.evolve_the_beasts()
Пример #15
0
class Agent:
    def __init__(self,
                 lr=0.01,
                 gamma=0.9,
                 batch_size=100,
                 eps_start=1,
                 eps_end=0,
                 eps_test=0,
                 target_model_update=1000,
                 seq_memory_limit=50000,
                 epsilon_decay=1,
                 comment=""):

        # hyperparameters:
        self.LEARNING_RATE = lr  # default = 0.001 -> higher LR is faster learning but can become unstable and local minimum
        self.GAMMA = gamma  # gamma defines penalty for future reward
        self.BATCH_SIZE = batch_size  # default = 32 -> too small for tetris?
        self.EPSILON_START = eps_start
        self.EPSILON_END = eps_end
        self.EPSILON_DECAY = epsilon_decay  # after how many steps, epsilon = epsilon end
        self.TARGET_MODEL_UPDATE = target_model_update  # default is 10000
        self.EPSILON_TEST = eps_test
        self.SEQUENTIAL_MEMORY_LIMIT = seq_memory_limit
        self.TEST_MAX_EPISODE_STEPS = 10000
        self.TRAIN_MAX_EPISODE_STEPS = 10000
        self.MAX_STEP_SCORE = 500  # score if max episode steps are reached
        self.DYING_PEN = 50

        # comment for plots
        self.COMMENT = comment

        # Initializes a Tetris playing field of width 10 and height 20.
        self.env = TetrisEngine(dying_pen=self.DYING_PEN,
                                max_steps=self.TRAIN_MAX_EPISODE_STEPS,
                                max_step_score=self.MAX_STEP_SCORE)
        self.agent = None

        # target model update in source code:
        # if self.target_model_update >= 1 and self.step % self.target_model_update == 0:
        # -> I think that the total steps have to be multiple of target_model_update to work

    @timer
    def train(self, nb_steps=1000, visualise=True):
        """
        the training process of the deep Q agent
        """
        # Resets the environment
        self.env.reset_environment()

        # init Neural network
        actions = 6  # there are 6 discrete actions
        model = self.build_model_conv(actions)
        model.summary()

        # define callbacks
        callbacks = build_callbacks()

        # init and fit the agent
        dqn = self.build_agent(model, actions, nb_steps)
        dqn.compile(Adam(lr=self.LEARNING_RATE), metrics=['mae', 'mse'])
        history_training = dqn.fit(
            self.env,
            nb_steps=nb_steps,
            callbacks=callbacks,
            visualize=visualise,
            log_interval=self.TARGET_MODEL_UPDATE,
            verbose=1,
            nb_max_episode_steps=self.TRAIN_MAX_EPISODE_STEPS)

        # plot the results
        self._plot_custom_results(self.env.df_info,
                                  history_training,
                                  mode='training')

        # save trained agent
        self.agent = dqn

        return dqn

    @timer
    def test(self, nb_episodes=10, visualize=True):
        """
        The testing process of the deep q agent
        """
        self.env.reset_environment()
        history_test = self.agent.test(
            self.env,
            nb_episodes=nb_episodes,
            visualize=visualize,
            nb_max_episode_steps=self.TEST_MAX_EPISODE_STEPS)

        print(np.mean(history_test.history['episode_reward']))

        # plot the results
        self._plot_custom_results(self.env.df_info, history_test, mode='test')

    def save(self, name):
        """
        saving the model weights for future use
        """
        self.agent.save_weights(f'models/{name}.model', overwrite=False)

    def build_model_conv(self, actions):
        """
        define the neural network model architecture for the deep q agent
        """

        model = tf.keras.models.Sequential()

        model.add(
            Conv2D(32, (2, 2),
                   padding='same',
                   kernel_initializer='he_uniform',
                   kernel_constraint=max_norm(3),
                   input_shape=(1, self.env.height, self.env.width)))
        model.add(BatchNormalization())
        model.add(Activation('tanh'))

        model.add(
            Conv2D(64, (2, 2),
                   padding='same',
                   kernel_initializer='he_uniform',
                   kernel_constraint=max_norm(3)))
        model.add(BatchNormalization())
        model.add(Activation('tanh'))

        model.add(
            Conv2D(64, (2, 2),
                   padding='same',
                   kernel_initializer='he_uniform',
                   kernel_constraint=max_norm(3)))
        model.add(BatchNormalization())
        model.add(Activation('tanh'))

        # model.add(MaxPooling2D(pool_size=(2,2)))

        # end of convolutional layers, start of 'hidden' dense layers
        model.add(Flatten())
        model.add(
            Dense(128,
                  kernel_initializer='he_uniform',
                  kernel_constraint=max_norm(3)))
        model.add(BatchNormalization())
        model.add(Activation('tanh'))
        model.add(Dropout(0.5))

        # Final dense layer
        model.add(Dense(actions))
        model.add(BatchNormalization())
        model.add(Activation('linear'))

        return model

    def build_agent(self, model, actions, nb_steps):
        """
        building the deep q agent

        GAMMA:
        REWARD = r1 + gamma*r2 + gamma^2*r3 + gamma^3*r4 ...
        -> gamma defines penalty for future reward
        In general, most algorithms learn faster when they don't have to look too far into the future.
        So, it sometimes helps the performance to set gamma relatively low.
        for many problems a gamma of 0.9 or 0.95 is fine

        LAMBDA:
        The lambda parameter determines how much you bootstrap on earlier learned value versus using
        the current Monte Carlo roll-out. This implies a trade-off between more bias (low lambda)
        and more variance (high lambda).
        A general rule of thumb is to use a lambda equal to 0.9.
        However, it might be good just to try a few settings (e.g., 0, 0.5, 0.8, 0.9, 0.95 and 1.0)
        """
        policy = LinearAnnealedPolicy(
            EpsGreedyQPolicy(
            ),  # takes current best action with prob (1 - epsilon)
            attr='eps',  # decay epsilon (=exploration) per agent step
            value_max=self.
            EPSILON_START,  # start value of epsilon (default =1)
            value_min=self.EPSILON_END,  # last value of epsilon (default =0
            value_test=self.EPSILON_TEST,
            nb_steps=self.EPSILON_DECAY * nb_steps)
        memory = SequentialMemory(limit=self.SEQUENTIAL_MEMORY_LIMIT,
                                  window_length=1)
        build_agent = DQNAgent(model=model,
                               memory=memory,
                               policy=policy,
                               gamma=self.GAMMA,
                               batch_size=self.BATCH_SIZE,
                               nb_actions=actions,
                               nb_steps_warmup=1000,
                               target_model_update=self.TARGET_MODEL_UPDATE,
                               enable_double_dqn=False,
                               train_interval=4)
        return build_agent

    def _plot_custom_results(self, df, history, mode='training'):
        """
        plot custom results
        """
        # input data
        if 'new_episode' not in df:
            raise KeyError(
                'the dataframe has to have the new_episode column to plot the results'
            )
        df["nr_episode"] = df["new_episode"].cumsum()

        df_results = df.groupby('nr_episode', as_index=False) \
            .agg(heigt_diff_sum=('height_difference', 'sum'),
                 new_block_sum=('new_block', 'sum'),
                 nr_lines_sum=('number_of_lines', 'max'),
                 score_sum=('score', 'sum'),
                 score_avg=('score', 'mean'),
                 count_steps=('nr_episode', 'count'))

        df_results['moving_average_score'] = df_results.score_sum.expanding(
        ).mean()
        df_results['moving_average_lines'] = df_results.nr_lines_sum.expanding(
        ).mean()

        # init plot
        figure = pyplot.figure(figsize=(20, 10), dpi=80)
        figure.canvas.set_window_title(mode)

        # PLOT 1: EPISODE REWARD
        pyplot.subplot(221)

        # data (the dict keys are different for training and test)
        if mode == 'training':
            episode_key = 'nb_episode_steps'
        else:
            episode_key = 'nb_steps'

        y_1 = history.history[episode_key]
        y_2 = history.history['episode_reward']
        ind = np.arange(len(y_1))

        # bars
        width = 0.35  # the width of the bars
        pyplot.bar(ind, y_1, width, color='g', label='nb_episode_steps')
        pyplot.ylabel('nr steps per episode')
        pyplot.xlabel('episode')
        pyplot.legend(loc="upper left")

        # line
        axes2 = pyplot.twinx()
        axes2.plot(ind, y_2, color='k', label='episode_reward')
        axes2.set_ylabel('episode reward')
        pyplot.legend(loc="upper right")

        # title
        pyplot.title(mode + ': episode reward and steps per episode')

        # PLOT 2: NR OF LINES CLEARED PER EPISODE
        pyplot.subplot(222)
        x = df_results['nr_episode']
        y = df_results['nr_lines_sum']

        # plotting the points
        pyplot.plot(x, y)

        # naming the x axis
        pyplot.xlabel('episodes')
        # naming the y axis
        pyplot.ylabel('nr_of_lines')

        # title
        pyplot.title(mode + ': number of lines per episode')

        # save the plots
        timestr = time.strftime("%m%d_%H%M%S")
        pyplot.savefig("logs/img_info_" + timestr)

        # PLOT 3: MOVING AVERAGE TOTAL SCORE
        pyplot.subplot(223)
        x = df_results['nr_episode']
        y = df_results['moving_average_score']

        # plotting the points
        pyplot.plot(x, y)

        # naming the x axis
        pyplot.xlabel('episodes')
        # naming the y axis
        pyplot.ylabel('moving average total score')

        # title
        pyplot.title(mode + ': moving average total score')

        # PLOT 4: MOVING AVERAGE LINES CLEARED
        pyplot.subplot(224)
        x = df_results['nr_episode']
        y = df_results['moving_average_lines']

        # plotting the points
        pyplot.plot(x, y)

        # naming the x axis
        pyplot.xlabel('episodes')
        # naming the y axis
        pyplot.ylabel('moving average total score')

        # add subtitle with hyperparams
        subtitile = f"Epsilon start: {self.EPSILON_START}, Epsilon end: {self.EPSILON_END}, Gamma: {self.GAMMA}, LR: {self.LEARNING_RATE}, " \
                    f"target model update: {self.TARGET_MODEL_UPDATE}, Batch size: {self.BATCH_SIZE}, comment: {self.COMMENT}"
        pyplot.figtext(0.01, 0.01, subtitile, fontsize=15)

        # title
        pyplot.title(mode + ': moving average nr of lines')

        # save the plots
        timestr = time.strftime("%m%d_%H%M%S")
        pyplot.savefig("logs/img_info_" + timestr)

        # show the plots
        pyplot.show()
        pyplot.close()

    def plot_metrics(self, save_fig=False):
        """
        plot the callback metrics
        """
        # plot the logs
        with open('dqn_log.json') as json_file:
            data = json.load(json_file)
        df_log = pd.DataFrame.from_dict(data)
        figure = pyplot.figure(figsize=(20, 10), dpi=80)
        for idx, col in enumerate(df_log.columns):
            self._combine_metrics(df_log, col, idx)

        # add subtitle
        subtitle = f"Epsilon start: {self.EPSILON_START}, Epsilon end: {self.EPSILON_END}, Gamma: {self.GAMMA}, LR: {self.LEARNING_RATE}, " \
                   f"target model update: {self.TARGET_MODEL_UPDATE}, Batch size: {self.BATCH_SIZE}, comment: {self.COMMENT}"
        pyplot.figtext(0.01, 0.01, subtitle, fontsize=15)

        # save fig
        timestr = time.strftime("%m%d_%H%M%S")
        if save_fig:
            pyplot.savefig("logs/img_logs_" + timestr)
        pyplot.show()

    @staticmethod
    def _combine_metrics(df, key, index):
        """
        helper method for the plot_metrics function
        """
        pyplot.subplot(4, 3, index + 1)
        pyplot.subplots_adjust(hspace=0.5)

        y = df[key]
        x = df['episode']

        # plotting the points
        pyplot.plot(x, y)

        # naming the x axis
        pyplot.xlabel('episode nr')
        # naming the y axis
        pyplot.ylabel(key.replace('_', ' '))

        # title
        pyplot.title(key.replace('_', ' '))
Пример #16
0
# -*- coding: utf-8 -*-
import sys
import os
import numpy as np
from collections import namedtuple
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque
from engine import TetrisEngine

width, height = 10, 20  # standard tetris friends rules
engine = TetrisEngine(width, height, enable_KO=False)
eps = 10.**-8

use_cuda = torch.cuda.is_available()
if use_cuda:
    print("....Using Gpu...")
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor

Transition = namedtuple(
    'Transition', ('state', 'action', 'shape', 'anchor', 'board', 'reward'))


class CNN_lay(nn.Module):
    def __init__(self):
        super(CNN_lay, self).__init__()
Пример #17
0
class TetrisGame:
    def __init__(self):
        self.game_state = GameState(TetrisBoard())

        #from blocks import BlockLine, BlockRightL, BlockCube
        #l = BlockLine()
        #l.rotate()
        #r = BlockRightL()
        #r.rotate(-1)
        #c = BlockCube()
        #self.game_state.board.place_block(l, (-1,0))
        #self.game_state.board.place_block(r, (1,0))
        #self.game_state.board.place_block(c, (8,0))

        self.engine = TetrisEngine(self.game_state)
        self.ai = TetrisAI(self.engine)

    def run_main(self):
        self.engine.start()

        while not self.engine.running():
            pass

        self.ai.play()

        while self.engine.running():
            try:
                c = getch()
                if c == LEFT_KEY:
                    self.engine.move_left()
                if c == RIGHT_KEY:
                    self.engine.move_right()
                if c == DOWN_KEY:
                    self.engine.move_down()
                if c == DROP_KEY:
                    self.engine.drop_block()
                if c == UP_KEY:
                    self.engine.rotate()
            except KeyboardInterrupt:
                self.engine.stop()
Пример #18
0
import sys
import os
import torch
import time
from engine import TetrisEngine
from dqn_agent import DQN, ReplayMemory, Transition
from torch.autograd import Variable

use_cuda = torch.cuda.is_available()

FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor

width, height = 10, 20 # standard tetris friends rules
engine = TetrisEngine(width, height)

def load_model(filename):
    model = DQN()
    if use_cuda:
        model.cuda()
    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint['state_dict'])

    return model

def run(model):
    state = FloatTensor(engine.clear()[None,None,:,:])
    score = 0
    while True:
        action = model(Variable(state,
            volatile=True).type(FloatTensor)).data.max(1)[1].view(1,1).type(LongTensor)
Пример #19
0
class TetrisGame:
    def __init__(self):
        self.game_state = GameState(TetrisBoard())

        #from blocks import BlockLine, BlockRightL, BlockCube
        #l = BlockLine()
        #l.rotate()
        #r = BlockRightL()
        #r.rotate(-1)
        #c = BlockCube()
        #self.game_state.board.place_block(l, (-1,0))
        #self.game_state.board.place_block(r, (1,0))
        #self.game_state.board.place_block(c, (8,0))

        self.engine = TetrisEngine(self.game_state)
        self.ai = TetrisAI(self.engine)

    def run_main(self):
        self.engine.start()

        while not self.engine.running():
            pass

        self.ai.play()

        while self.engine.running():
            try:
                c = getch()
                if c == LEFT_KEY:
                    self.engine.move_left()
                if c == RIGHT_KEY:
                    self.engine.move_right()
                if c == DOWN_KEY:
                    self.engine.move_down()
                if c == DROP_KEY:
                    self.engine.drop_block()
                if c == UP_KEY:
                    self.engine.rotate()
            except KeyboardInterrupt:
                self.engine.stop()