def init_tetris(): global start_time start_time = time.time() global t t = tetris.Tetris() t.spawn_next_piece(isFirstPiece=True) return t
def play(): csv_file, csv_writer = initialize_writer() for epoch in range(epochs): board = tetris.Tetris(constants.HORZBLOCKS, constants.VERTBLOCKS) final_score = 0 while True: states = board.run() # board.run() returns False if the state is invalid (Game over) if (not states): break state = random.choice(states) board.setState(state) board.draw_game() final_score = board.score csv_writer.writerow([final_score]) print("Training epoch #: {}\t\t Final score = {}".format( epoch, final_score)) csv_file.close()
def run(): """Run the game. The game advances with a step depending on the current game level. The game stops when no new shapes can be placed. Then, the game over message is displayed. The game quits when the user closes the window. """ pygame.init() game = tetris.Tetris() game.update() game.draw() pygame.display.flip() clock = pygame.time.Clock() while 1: clock.tick(game.level) key = None for event in pygame.event.get(): if event.type == QUIT: return elif event.type == KEYDOWN: key = event.key if game.is_game_over(): pygame.display.flip() else: game.tick(key) game.update() game.draw() pygame.display.flip()
def predict(): total_score = 0 results = [] count = 1 with open('games.txt') as games_file: for line in games_file: print('playing {}'.format(count)) count += 1 line = line.rstrip() game = tetris.Tetris(line) action_history = [] for i in range(len(line)): if not (game.won or game.lost): action, _ = propose_move(game) move = ((action % 12) - 1, int(action / 12)) game.make_move(move[0], move[1]) action_history.append(move) results.append(';'.join( ['{}:{}'.format(a, b) for a, b in action_history])) if game.lost: game.score += 1000 print(game.won, game.score) total_score += game.score print(total_score) with open('submissions/player.txt', 'w') as submission: for result in results: submission.write('{}\n'.format(result))
def play_game(g): x = [] y = [] input = ''.join([random.choice('IJLOSTZ') for _ in range(GAME_LENGTH)]) game = tetris.Tetris(input) score = 0 for i in input: if game.won or game.lost: break state = np.zeros(tetris.STATE_SIZE) header = np.zeros((4, 10)) shape = tetris.SHAPES[i] header[:len(shape), :len(shape[0])] = shape state[:4] = header state[4:] = game.grid state = (state > 0).astype(np.int8) action, r = player.propose_move(game) move = ((action % (tetris.GRID_W + 2)) - 1, int(action / (tetris.GRID_W + 2))) game.make_move(move[0], move[1]) reward = np.zeros(tetris.ACTION_SIZE) reward[action] = r x.append(state) y.append(reward) print('playing game {} of {} scored {}'.format(g, NO_GAMES, game.score)) return x, y
def __init__(self, seed: int): self.args = (0, False) self.env = tetris.Tetris(seed) self.right_gain = 0. self.fix_prob = 0. self.penalty_multiplier = 0. self.step_reward = 0. self.reset()
def Main(model_path): c = Configs() model = Model(c.channels, c.blocks).to(device) if model_path is None: model_path = os.path.join(os.path.dirname(sys.argv[0]), 'models/model.pth') if model_path[-3:] == 'pkl': model.load_state_dict(torch.load(model_path)[0].state_dict()) else: model.load_state_dict(torch.load(model_path)) model.eval() batch_size = 100 n = 2000 games = [tetris.Tetris(GetSeed(i)) for i in range(batch_size)] for i in games: ResetGame(i) started = batch_size results = [] rewards = [0. for i in range(batch_size)] is_running = [True for i in range(batch_size)] while len(results) < n: states = [i.GetState() for i, j in zip(games, is_running) if j] states = obs_to_torch(np.stack(states), device) pi = model(states, False)[0] pi = torch.argmax(pi, 1) j = 0 for i in range(batch_size): if not is_running[i]: continue action = pi[j].item() j += 1 r, x, y = action // 200, action // 10 % 20, action % 10 rewards[i] += games[i].InputPlacement(r, x, y)[1] if games[i].IsOver(): results.append((games[i].GetScore(), games[i].GetLines())) rewards[i] = 0. if started < n: games[i] = tetris.Tetris(GetSeed(i)) ResetGame(games[i]) else: is_running[i] = False if len(results) % 200 == 0: print(len(results), '/', n, 'games started') s = list(reversed(sorted([i[0] for i in results]))) for i in range(len(s) - 1): for t in range(2000000, 700000, -50000): if s[i] >= t and s[i+1] < t: print(t, (i + 1) / n) s = list(reversed(sorted([i[1] for i in results]))) for i in range(len(s) - 1): for t in range(350, 150, -10): if s[i] >= t and s[i+1] < t: print(t, (i + 1) / n)
def reset(self): self.state = np.zeros(tetris.STATE_SIZE) self.inputs = ''.join( [random.choice('IJLOSTZ') for _ in range(self.game_length)]) self.action_history = [] self.frame = 0 self.score = 0 self.tetris = tetris.Tetris(self.inputs) return self.state
def handle(self): print('connected') game = tetris.Tetris(159263) while True: try: data = self.read_until(1) if data[0] == 0xff: cur, nxt, level = self.read_until(3) # adjustable: reward_multiplier, hz_dev, hz_dev, microadj_delay, target, start_level st = {'reward_multiplier': 1e-5, 'hz_avg': 12, 'hz_dev': 0, 'microadj_delay': 25, 'start_level': level, 'target': 1200000} if random.randint(0, 1) == 0: st['hz_avg'] = 13.5 st['target'] = 1100000 if random.randint(0, 1) == 0: st['microadj_delay'] = 16 if random.randint(0, 1) == 0: st['hz_dev'] = 1 game.ResetGame(**st) print() print() print('Current game:') print('Start level:', level) print('Tapping speed:', 'NormalDistribution({}, {})'.format(st['hz_avg'], st['hz_dev']) if st['hz_dev'] > 0 else 'constant {}'.format(st['hz_avg']), 'Hz') print('Microadjustment delay:', st['microadj_delay'], 'frames', flush = True) game.SetNowPiece(cur) game.SetNextPiece(nxt) game.InputPlacement(*GetStrat(model, game), False) seq = game.GetMicroadjSequence() # self.print_seq(seq) self.request.send(self.gen_seq(seq)) game.InputPlacement(*GetStrat(model, game), False) seq = game.GetPlannedSequence() # self.print_seq(seq) self.request.send(self.gen_seq(seq)) elif data[0] == 0xfd: r, x, y, nxt = self.read_until(4) game.SetPreviousPlacement(r, x, y) game.SetNextPiece(nxt) # game.PrintState() game.InputPlacement(*GetStrat(model, game), False) seq = game.GetMicroadjSequence() # self.print_seq(seq) self.request.send(self.gen_seq(seq)) game.InputPlacement(*GetStrat(model, game), False) seq = game.GetPlannedSequence() # self.print_seq(seq) self.request.send(self.gen_seq(seq)) except ConnectionResetError: self.request.close() break except ValueError: pass
def run_loop(p, seed): """ This function contains a complete learning and evaluation run for ONE agent. This function is passed multiprocessing.Pool.apply_async() and thus run multiple times (in parallel). :param p: `Bunch` of algorithm parameters. :param seed: integer, this seed is agent-specific. p also contains a run-specific random seed. :return: """ random.seed(seed + p.seed) np.random.seed(seed + p.seed) agent = m_learning.MLearning( regularization=p.regularization, dom_filter=p.dominance_filter, cumu_dom_filter=p.cumu_dom_filter, rollout_dom_filter=p.rollout_dom_filter, rollout_cumu_dom_filter=p.rollout_cumu_dom_filter, lambda_min=p.lambda_min, lambda_max=p.lambda_max, num_lambdas=p.num_lambdas, gamma=p.gamma, rollout_length=p.rollout_length, number_of_rollouts_per_child=p.number_of_rollouts_per_child, learn_every_step_until=p.learn_every_step_until, learn_from_step_in_current_phase=p.learn_from_step, max_batch_size=p.max_batch_size, learn_periodicity=p.learn_periodicity, num_columns=p.num_columns) env = tetris.Tetris(num_columns=p.num_columns, num_rows=p.num_rows) test_env = tetris.Tetris(num_columns=p.num_columns, num_rows=p.num_rows, max_cleared_test_lines=p.max_cleared_test_lines) test_results_ix, tested_weights_ix = \ learn_and_evaluate.learn_and_evaluate(env, test_env, agent, p.num_tests, p.num_games_per_test, p.test_points) return [test_results_ix, tested_weights_ix]
def __init__(self, **args): TetrisQAgent.__init__(self, **args) self.counter = 0 self.moves_tried = 0 self.weights = dict() self.done = False self.reward_weights = [0.8, 0.2, 0.2] temp_game = tetris.Tetris(20, 10) temp_game.new_figure() self.game = GameState(temp_game.field, temp_game.figure.type) if self.num_training == 0: self.weights = { # Weights found during testing 'bias': -190.83667758428328, 'skyline_diff': -1514.7129500869028, 'max_skyline_diff': -2211.239718486838, 'num_holes': -8435.39859867022, 'max_height': -606.511815161419, 'num_rows_cleared': 147.0848355640954} else: self.run_training_rounds()
def game_start_stop(self, menu_item, *args): #print "activate Start Game Menu" self.game_play = not self.game_play menu_item.set_sensitive(False) if self.game_play: self.menu_game_stop.set_sensitive(True) self.menu_game_quick.set_sensitive(False) self.menu_game_start.set_sensitive(False) self.Start_stop_button.set_sensitive(True) self.Start_stop_button_label.set_label(self.start_stop_str[1]) self.Pause_button.set_sensitive(True) self.Pause_button.grab_default() # New Tetris.....Game init..... self.gtkTetris = tetris.Tetris(self) self.gtkTetris.game_init() #gtkTetris.make_noise() self.gtkTetris.from_virtual() self.gtkTetris.move_block(0,0,0) common.current_level = common.options["level"] self.update_game_values() self.timer = gobject.timeout_add(self.level_speeds[common.current_level],self.game_loop) else: #print self.game_play self.game_over_init()
def start(self, level): if level == 6: self.tetris = tetris.Tetris(self.screen) else: self.tetris = eval("tetris.Tetris"+str(level)+"(self.screen)")
def process_state_batch(self, batch): processed_batch = batch.astype('float32') / 255. return processed_batch def process_reward(self, reward): return np.clip(reward, -1., 1.) parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['train', 'test'], default='train') parser.add_argument('--env-name', type=str, default='Tetris99') parser.add_argument('--weights', type=str, default=None) args = parser.parse_args() # Get the environment and extract the number of actions. env = tetris.Tetris() np.random.seed(231) env.seed(231) nb_actions = env.action_space.n print("NUMBER OF ACTIONS: " + str(nb_actions)) #Standard DQN model architecture, but swapping the Dense classifier layers for the rl.layers.NoisyNetDense version. input_shape = (WINDOW_LENGTH, INPUT_SHAPE[0], INPUT_SHAPE[1]) frame = Input(shape=(input_shape)) cv1 = Convolution2D(32, kernel_size=(8, 8), strides=4, activation='relu', data_format='channels_first')(frame) cv2 = Convolution2D(64, kernel_size=(4, 4),
model.add(keras.layers.Dense(256, activation="relu")) model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(128, activation="relu")) model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(7, activation="relu")) model.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"]) #naive data training num_trials = 200000 min_score = 20 max_steps = 2000 game = tetris.Tetris(0, False) training_x = [] training_y = [] scores = [] for trial in range(num_trials): x = [] y = [] game.reset(np.random.randint(2000000000)) for _ in range(max_steps): inputs = [] for _ in range(7): inputs += [np.random.randint(2)] x.append(game.get_state()) y.append(inputs) fail = game.update_state(tuple(inputs))
import tetris import numpy as np import tensorflow as tf games = 1000 explore_rate = 1 #represents how likely the agent is to take a random action explore_decay = 0.999 #likelihood decays every game actor_learning_rate = 0.003 #tunable parameters critic_learning_rate = 0.005 discount_factor = 0.99 env = tetris.Tetris(False) state_size = env.observation_space.shape[0] action_size = env.action_space.n actor = tf.keras.models.Sequential([ tf.keras.layers.Dense(64, input_shape=(state_size, ), activation="relu"), tf.keras.layers.Dense(64, activation="relu"), tf.keras.layers.Dense( action_size, activation="softmax") #softmax outputs probability distribution ]) actor_optimizer = tf.keras.optimizers.Adam(learning_rate=actor_learning_rate) critic = tf.keras.models.Sequential([ tf.keras.layers.Dense(64, input_shape=(state_size, ), activation="relu"), tf.keras.layers.Dense(1) ]) critic_optimizer = tf.keras.optimizers.Adam(learning_rate=critic_learning_rate)
# self.print_seq(seq) self.request.send(self.gen_seq(seq)) game.InputPlacement(*GetStrat(model, game), False) seq = game.GetPlannedSequence() # self.print_seq(seq) self.request.send(self.gen_seq(seq)) except ConnectionResetError: self.request.close() break except ValueError: pass if __name__ == "__main__": with torch.no_grad(): c = Configs() model = Model(c.channels, c.blocks).to(device) model_path = os.path.join(os.path.dirname(sys.argv[0]), 'models/model.pth') if len(sys.argv) <= 1 else sys.argv[1] if model_path[-3:] == 'pkl': model.load_state_dict(torch.load(model_path)[0].state_dict()) else: model.load_state_dict(torch.load(model_path)) model.eval() # load GPU first to reduce lag GetStrat(model, tetris.Tetris()) print('Ready') HOST, PORT = 'localhost', 3456 with socketserver.TCPServer((HOST, PORT), GameConn) as server: try: server.serve_forever() except KeyboardInterrupt: server.shutdown()
import tetris import terminalgui import os from pynput import keyboard import time cetris = tetris.Tetris() gui = terminalgui.TerminalGui() speed = 0.5 def on_press(key): global speed if key == keyboard.Key.right: cetris.move('right') if key == keyboard.Key.left: cetris.move('left') if key == keyboard.Key.up: cetris.rotate('cw') if key == keyboard.Key.down: speed = 0.05 if key == keyboard.Key.space: cetris.drop() if key == keyboard.KeyCode(char = 'c'): cetris.hold() holdedBlock = tetris.Block(cetris.holdedBlock) gui.render(cetris.map, holdedBlock.block) def on_release(key): global speed
def start(self, kind): if kind == 6: self.tetris = tetris.Tetris(self.screen) else: self.tetris = eval( "tetris.Tetris" + str(kind) + "(self.screen)")
APP_NAME = "tetris_cz19_multiplayer" import sys # We would like to import modules that are added to his folder. sys.path.append("/apps/" + APP_NAME) import menu import tetris role = "" hostsettings = "survivor" tetrismenu = menu.TetrisMenu() mode = tetrismenu.main() if mode == "multiplayer": role = tetrismenu.multiplayer() if role == "host": hostsettings = tetrismenu.hostsettings() print(mode, role) tetrisgame = tetris.Tetris(mode, role, hostsettings) tetrisgame.start()
pass # print(print_tetromino(env.generative_model.current_tetromino)) # print(print_board_to_string(after_state)) env.make_step(after_state) # if visualize and not env.current_state.terminal_state: # env.print_board_to_string(env.current_state, clear_the_output, sleep) print(env.cleared_lines) rewards[i] = env.cleared_lines return rewards num_runs = 50 start = time.time() env = tetris.Tetris(num_columns=10, num_rows=10) print("Equal weights policy") agent = ConstantAgent(policy_weights=np.ones(8, dtype=np.float64)) ew_rewards = evaluate(env, agent, num_runs) print("RANDOM policy") agent = ConstantAgent(policy_weights=np.random.normal(0, 1, 8)) random_rewards = evaluate(env, agent, num_runs, visualize=False, clear_the_output=False, sleep=0) print(
import tensorflow as tf import numpy as np import random import cv2 from collections import deque import pygame from pygame.locals import * import tetris game = tetris.Tetris() ACTIONS = 6 # number of valid actions GAMMA = 0.99 # decay rate of past observations OBSERVE = 50. # timesteps to observe before training EXPLORE = 1. # frames over which to anneal epsilon FINAL_EPSILON = 0.1 # final value of epsilon INITIAL_EPSILON = 1.0 # starting value of epsilon REPLAY_MEMORY = 590000 # number of previous transitions to remember BATCH = 32 # size of minibatch FRAME_PER_ACTION = 1 K = 1 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.01, shape=shape)
def __init__(self, seed: int, tpow=1): self.args = (0, False) self.tpow = tpow self.obs = np.zeros(kTensorDim, dtype=np.uint8) self.env = tetris.Tetris(seed, *self.args) self.reset(False)
def train(): csv_file, csv_writer = initialize_writer() model = compile_model() if (constants.ENABLE_CAPTURE): image_counter = 0 print("CAPTURE ENABLED") if (constants.REPRESENTATION_COMPLEX): print("REPRESENTATION_COMPLEX") else: print("REPRESENTATION_SIMPLE") for epoch in range(constants.TRAINING_EPOCHS): #init previous prediction prev_prediction = None # init a new board board = tetris.Tetris(constants.HORZBLOCKS, constants.VERTBLOCKS) # init final score final_score = 0 #generate 1000 next states, the model will probably die before that for block_number in range(10000): #generate next states states = board.run() # board.run() returns False if the state is invalid (Game over) if (not states): final_score = prev_score print("Training epoch #:{}\tFinal score :\t\t{}".format( epoch, final_score)) break #construct predictions predictions = [] for state in states: # for each state predict the expected score p = model.predict(state["formatted_representation"]) predictions.append(p) # explore, choose random next state if random.random() < find_exploration(epoch): choice = random.choice(states) board.setState(choice) max_value = model.predict( choice['formatted_representation'])[0] explored = True # not exploring, choose expected best reward else: explored = False max_value, max_index = find_max_index(predictions) board.setState(states[max_index[0]]) board.draw_game() if (constants.ENABLE_CAPTURE and epoch > 2980 and image_counter <= 2000): image_counter += 1 pygame.image.save( board.screen, "outputs/screenshots/" + str(image_counter) + ".png") ''' back prop the new prediction V(St) = V(St) + alpha*[Rt+1 + gamma * V(St+1) - V(St)] which can be rewitten if alpha = 1: V(St) = Rt+1 + gamma * V(St+1) Where: V(St): prediction of the previous state : prev_prediction alpha: constant step-size parameter/ learning rate : constants.Q_LEARNING_RATE Rt+1: Reward of the current state (current score) : board.score - prev_score gamma: discount rate : constants.DISCOUNT_RATE V(St+1): Prediction of the current state : max_value ''' # only able to back propagate when t > 0 (after one state) if prev_prediction: value = (1 - constants.Q_LEARNING_RATE ) * prev_prediction + constants.Q_LEARNING_RATE * ( board.score - prev_score + constants.DISCOUNT_RATE * max_value) model.fit(x=prev_input, y=value, verbose=0) #update previous prediction, score and input prev_prediction = max_value prev_score = board.score if explored: prev_input = choice['formatted_representation'] else: prev_input = states[max_index[0]]["formatted_representation"] # the model has chosen poorly by dying, (DIE MODEL!) back prop negative reward. model.fit(x=prev_input, y=[-10], verbose=0) csv_writer.writerow([final_score]) csv_file.close() print("Closing") raise SystemExit