def replay_button_interaction(self): mouse_pos = pygame.mouse.get_pos() if (self.replay_button.coords[0] < mouse_pos[0] < self.replay_button.coords[0] + self.replay_button.dimensions[0] and self.replay_button.coords[1] < mouse_pos[1] < self.replay_button.coords[1] + self.replay_button.dimensions[1]): self.replay_button.button_light(self.screen, (125, -3)) mouse_click = pygame.mouse.get_pressed() if mouse_click[0] == 1: questions = ['Sensor size', 'Replay_data path'] input_box = InputBoxMenu(self.screen, len(questions), (self.replay_button.coords[0] + 25, self.replay_button.coords[1] + 75), questions, [int, 'path + csv']) input_box.help() inputs = input_box.ask_boxes() check = input_box.check_inputbox_input() error_message_pos = [20, 20] while check in input_box.errors: self.display_error_message('Error ' + check, position=tuple(error_message_pos), sleep_time=0) error_message_pos[1] += 40 inputs = input_box.ask_boxes() check = input_box.check_inputbox_input() replay = Replay(self.screen, self.screen_width, self.screen_height, activations=self.activation_cbox.isChecked(), traffic=self.traffic_cbox.isChecked(), sensors=self.sensors_cbox.isChecked(), distance_sensor=self.distance_sensor_cbox.isChecked(), sensor_size=int(inputs[0]), enabled_menu=True) replay.replay(inputs[1], enable_trajectory=True) quit() else: self.replay_button.draw_button(self.screen, (125, -3))
def __init__(self, task): # Hyper parameters self.learning_rate_actor = 1e-4 self.learning_rate_critic = 1e-3 self.gamma = 0.99 self.tau = 0.001 # Define net self.sess = tf.Session() self.task = task self.actor = ActorNet(self.sess, self.task.state_size, self.task.action_size, self.learning_rate_actor, \ self.task.action_low, self.task.action_high, self.tau) self.critic = CriticNet(self.sess, self.task.state_size, self.task.action_size, self.learning_rate_critic, self.tau) # Define noise self.mu = 0 self.theta = 0.15 self.sigma = 0.20 self.noise = OUNoise(self.task.action_size, self.mu, self.theta, self.sigma) # Define memory replay self.buffer_size = 1000000 self.batch_size = 64 self.memory = Replay(self.buffer_size, self.batch_size) # Score self.best_score = -np.inf self.best_reward = -np.inf
def post(self): upload_files = self.get_uploads( 'file') # 'file' is file upload field in the form if not upload_files: self.redirect('/failed/nofile/') return blob_info = upload_files[0] key = blob_info.key() if blob_info.size > 1048576: blob_info.delete() self.redirect('/failed/sizeerror/%s' % blob_info.filename) return blob_reader = blobstore.BlobReader(key) magic = blob_reader.read(50) if magic[0:3] != "MPQ" or not "StarCraft II replay" in magic: blob_info.delete() self.redirect('/failed/typeerror/%s' % blob_info.filename) return replayid = counter_as_string('principal') increment('principal') m = md5() m.update(blob_reader.read(blob_info.size)) replaymd5 = m.hexdigest() replay = Replay(replayid=replayid, replaymd5=replaymd5, blobinfo=str(key), ip=self.request.remote_addr) replay.put() self.redirect('/success/%s' % replayid)
def post(self): upload_files = self.get_uploads('file') # 'file' is file upload field in the form if not upload_files: self.redirect('/failed/nofile/') return blob_info = upload_files[0] key = blob_info.key() if blob_info.size > 1048576: blob_info.delete() self.redirect('/failed/sizeerror/%s' % blob_info.filename) return blob_reader = blobstore.BlobReader(key) magic = blob_reader.read(50) if magic[0:3] != "MPQ" or not "StarCraft II replay" in magic: blob_info.delete() self.redirect('/failed/typeerror/%s' % blob_info.filename) return replayid = counter_as_string('principal') increment('principal') m = md5() m.update(blob_reader.read(blob_info.size)) replaymd5 = m.hexdigest() replay = Replay(replayid=replayid, replaymd5 = replaymd5, blobinfo = str(key), ip=self.request.remote_addr) replay.put() self.redirect('/success/%s' % replayid)
def __init__(self, game, model, action_range, field, memory=None, memory_size=1000, nb_frames=None, nb_epoch=1000, batch_size=50, gamma=0.9, epsilon_range=[1., .01], epsilon_rate=0.99, reset_memory=False, observe=0, checkpoint=None): self.model = model self.game = game self.field = field self.memory_size = memory_size self.nb_frames = nb_frames self.nb_epoch = nb_epoch self.batch_size = batch_size self.gamma = gamma self.epsilon_range = epsilon_range self.epsilon_rate = epsilon_rate self.reset_memory = reset_memory self.observe = observe self.checkpoint = checkpoint self.action_range = action_range self.loss = 0 self.score_last_games = [] self.ma_score_list = [] self.replay = Replay(self.field, self.memory_size, gamma=self.gamma)
def startReplay(self): self.unloadMap() self.replay = Replay() self.world = self.replay.loadWorld() self.world.application = self self.battle_controller = BattleController(self, self.world) self.ai = AI(self, self.world) self.createVisual() self.replay.loadCommands()
def __init__(self, replay_data, player_name): """ Initializes an OnlineReplay instance. Unless you know what you're doing, don't call this method manually - this is intented to be called internally by OnlineReplay.from_map. """ Replay.__init__(self, replay_data, player_name)
class BattleController(object): """Handles battle commands issued by the player, AI, or a script. Can also record them to a replay file for later playback.""" def __init__(self, application, world): self.application = application self.world = world self.recording = False def startRecording(self): self.recording = True self.replay = Replay() self.replay.saveWorld(self.world) def stopRecording(self): if self.recording: self.recording = False self.replay.saveCommands() def executeCommand(self, command): if command.command == "run": self.run(self.world.findObjectByID(command.target)) elif command.command == "executeAction": self.executeAction(command.action, self.world.findObjectByID(command.target)) elif command.command == "endTurn": self.endTurn() def run(self, dest_tile): """Move the character to the target tile, checking for interrupts and triggers.""" if self.recording: self.replay.addCommand(BattleCommand("run", dest_tile.ID)) threat = self.world.getThreateningCharacter(self.world.current_character_turn.tile) if threat: threat.opportunityAttack(self.world.current_character_turn) else: route = self.application.pather.planMove(self.world.current_character_turn, dest_tile) #print "---" #for node in route.path: # print node.coords self.world.current_character_turn.run(route) if not self.world.visual and self.world.current_character_turn: self.world.createMovementGrid(self.world.current_character_turn) def executeAction(self, action, target): if action.AP_cost > self.world.current_character_turn.cur_AP: return if self.recording: self.replay.addCommand(BattleCommand("executeAction", target.ID, action)) #print "Action score:", self.application.ai.scoreAction(action, target) self.application.gui.combat_log.printMessage(self.world.current_character_turn.name + " used " + action.name + " on " + target.name + ".") action(self.application).execute(self.world.current_character_turn, target, self.world.getTargetsInArea(self.world.current_character_turn, target, action.targeting_rules)) if not self.world.visual and self.world.current_character_turn: self.world.createMovementGrid(self.world.current_character_turn) def endTurn(self): if self.recording: self.replay.addCommand(BattleCommand("endTurn")) self.world.current_character_turn.endTurn()
def __init__(self, config, scene): self.vrep_path = config.vrep_path self.viz = config.visualization self.autolaunch = config.autolaunch self.port = config.api_port self.clientID = None self.scene = scene self.dt = config.dt self.replay = Replay(config.max_buffer, config.batch_size) self.batch_size = config.batch_size
class TestReplay(unittest.TestCase): def setUp(self): ''' Create class instance and Gym environment instance ''' self.memory = Replay(4) self.env = gym.make('CartPole-v0') def test_burn_memory(self): ''' Test to check burn_memory functionality ''' self.memory.burn_memory(self.env, 2) self.assertEqual(len(self.memory.store), 2) def test_replace(self): ''' Test to check replacement of old transition tuples after crossing capacity ''' self.memory.burn_memory(self.env, 2) state = self.env.reset() for _ in range(4): random_action = self.env.action_space.sample() next_state, reward, done, _ = self.env.step(random_action) self.memory.add_to_memory((next_state, reward, state, done)) if done: state = self.env.reset() else: state = next_state self.assertEqual(len(self.memory.store), self.memory.capacity) def test_sample(self): ''' Test to check sampling function of replay memory ''' self.memory.burn_memory(self.env, 3) batch = self.memory.sample_from_memory(2) self.assertEqual(len(batch), 2)
def read_mulligans(deck_name): for f in file_manager.find_deck_games(deck_name): if file_manager.unzip_file(f): replay = Replay(file_manager.replay_file) l = [] lz = [] kept, mulliganed, drawn = replay.get_cards_kept_by_player() for i in kept: l.append(database.get_card_name(i)) print('Kept: ' + ', '.join(l)) for i in mulliganed: lz.append(database.get_card_name(i)) print('Mulliganed: ' + ', '.join(lz))
def __init__(self, replay_data, player_name, enabled_mods): """ Initializes a LocalReplay instance. Unless you know what you're doing, don't call this method manually - this is intented to be called internally by LocalReplay.from_path. Args: List replay_data: A list of osrpasrse.ReplayEvent objects, containing x, y, time_since_previous_action, and keys_pressed. String player_name: An identifier marking the player that did the replay. Name or user id are common. Integer enabled_mods: A base10 representation of the enabled mods on the replay. """ Replay.__init__(self, replay_data, player_name, enabled_mods)
def get(self, resource): query = Replay.all() query.filter('replayid =', resource) results = query.fetch(1) if results: num_results = len(results) result = results[0] blob_info = blobstore.BlobInfo.get(result.blobinfo) original_filename = blob_info.filename filesize = blob_info.size dl_count = get_count(resource) else: num_results = 0 original_filename = "" filesize = "" dl_count = 0 upload_url = blobstore.create_upload_url('/upload') path = os.path.join(os.path.dirname(__file__), 'info.html') self.response.headers['Cache-Control'] = 'no-cache' self.response.headers['Pragma'] = 'no-cache' template_values = { 'upload_url': upload_url, 'counter': counter_as_string('principal'), 'resource': resource, 'num_results': num_results, 'original_filename': original_filename, 'filesize': filesize, 'dl_count': dl_count, } self.response.out.write(template.render(path, template_values))
def get(self, resource, extension=".SC2Replay"): if resource[-10:] in (".SC2Replay", ".sc2replay"): resource = resource[0:-10] query = Replay.all() query.filter('replayid =', resource) results = query.fetch(1) if results: result = results[0] blob_info = blobstore.BlobInfo.get(result.blobinfo) if blob_info: upload_url = blobstore.create_upload_url('/upload') path = os.path.join(os.path.dirname(__file__), 'download.html') self.response.headers['Cache-Control'] = 'no-cache' self.response.headers['Pragma'] = 'no-cache' baseurl = urlparse(self.request.url).netloc if "sc2share.com" in baseurl: baseurl = "sc2share.com" template_values = { 'download_filename': blob_info.filename.encode("utf-8"), 'download_url': 'd/%s/%s' % (resource, quote(blob_info.filename.encode("utf-8"))), 'baseurl': baseurl, } self.response.out.write(template.render(path, template_values)) return else: reason = 'nosuchfile' else: reason = 'nosuchfile' upload_url = blobstore.create_upload_url('/upload') path = os.path.join(os.path.dirname(__file__), 'nofetch.html') self.response.headers['Cache-Control'] = 'no-cache' self.response.headers['Pragma'] = 'no-cache' failure_reasons = {} failure_reasons['pt'] = { 'nosuchfile': 'O arquivo pedido não existe. Pode ser que ele nunca tenha existido, pode ser que ele tenha sido apagado, e pode ser que algo catastrófico tenha acontecido. Difícil dizer o que foi.' } failure_reasons['en'] = { 'nosuchfile': 'The requested file does not exist. Maybe it never existed, maybe it has been deleted, maybe something catastrophic happened. In any case, we apologize.' } template_values = { 'upload_url': upload_url, 'errormsg': failure_reasons['en'][reason] } self.response.out.write(template.render(path, template_values))
class Core(object): def __init__(self, config, scene): self.vrep_path = config.vrep_path self.viz = config.visualization self.autolaunch = config.autolaunch self.port = config.api_port self.clientID = None self.scene = scene self.dt = config.dt self.replay = Replay(config.max_buffer, config.batch_size) self.batch_size = config.batch_size def vrep_launch(self): if self.autolaunch: if self.viz: vrep_exec = self.vrep_path + '/vrep.sh ' t_val = 5.0 else: vrep_exec = self.vrep_path + '/vrep.sh -h ' t_val = 1.0 synch_mode_cmd= \ '-gREMOTEAPISERVERSERVICE_'+str(self.port)+'_FALSE_TRUE ' subprocess.call( \ vrep_exec+synch_mode_cmd+self.scene+' &',shell=True) time.sleep(t_val) self.clientID = vrep.simxStart('127.0.0.1', self.port, True, True, 5000, 5) def vrep_start(self): vrep.simxStartSimulation(self.clientID, vrep.simx_opmode_blocking) vrep.simxSynchronous(self.clientID, True) def vrep_reset(self): vrep.simxStopSimulation(self.clientID, vrep.simx_opmode_oneshot) time.sleep(0.1) def pause(self): vrep.simxPauseSimulation(self.clientID, vrep.simx_opmode_oneshot) def close(self): self.vrep_reset() while vrep.simxGetConnectionId(self.clientID) != -1: vrep.simxSynchronousTrigger(self.clientID) vrep.simxFinish(self.clientID) self.replay.clear()
def get_mulligan_stats(deck_name): cards_kept = [] cards_drawn = [] kept_and_result = {} percentages = {} outof = {} for f in file_manager.find_deck_games(deck_name): if file_manager.unzip_file(f): replay = Replay(file_manager.replay_file) k, m, d = replay.get_cards_kept_by_player() if k != None and m != None and d != None: r = replay.game_won() cards_kept.extend(k) cards_drawn.extend(d) # print(str(len(k)+len(m))) if len(k) <= 2: # if True: for card in k: if not card in kept_and_result: kept_and_result[card] = None if kept_and_result[card] == None: kept_and_result[card] = [0, 0] if r == True: kept_and_result[card][0] += 1 kept_and_result[card][1] += 1 cards_drawn_set = list(set(cards_drawn)) for c in cards_drawn_set: times_drawn = float(cards_drawn.count(c)) times_kept = float(cards_kept.count(c)) outof.update({c: [int(times_kept), int(times_drawn)]}) percent = round(times_kept / times_drawn, 3) percentages.update({c: percent}) for i in range(len(percentages.items())): card_id = percentages.keys()[i] # winrate = "Not defined" # if card_id in kept_and_result and kept_and_result[card_id][1] != 0: # winrate = float(kept_and_result[card_id][0]/) extra_str = 'Not defined' if card_id in kept_and_result: kar_won = kept_and_result[card_id][0] kar_kept = kept_and_result[card_id][1] extra_str = str(kar_won) + "/" + str(kar_kept) + \ " " + str(round((float(kar_won) / kar_kept) * 100)) + "%" print(str(database.get_card_name(card_id)) + ": " + str(percentages[card_id] * 100) + "% " + str(outof[card_id][0]) + "/" + str(outof[card_id][1]) + ", winrate: " + extra_str)
def wrapper(*args, **kwargs): map_id = args[0] user_id = args[1] lzma = Cacher.check_cache(map_id, user_id) if (lzma): replay_data = osrparse.parse_replay(lzma, pure_lzma=True).play_data return Replay(replay_data, user_id) else: return function(*args, **kwargs)
def wrapper(*args, **kwargs): cacher = args[0] map_id = args[1] user_id = args[2] enabled_mods = args[4] lzma = cacher.check_cache(map_id, user_id) if (lzma): replay_data = osrparse.parse_replay(lzma, pure_lzma=True).play_data return Replay(replay_data, user_id, enabled_mods) else: return function(*args, **kwargs)
def main(_): pp.pprint(flags.FLAGS.__flags) with tf.Session() as sess: data_loader = Data_loader(FLAGS.embedding_file, FLAGS.embedding_size) q_network = Q_network(sess, FLAGS.embedding_size, FLAGS.step_size, FLAGS.target_frequency, FLAGS.hidden_units, FLAGS.final_units, FLAGS.greedy_ratio, data_loader) replay = Replay(q_network, FLAGS.minibatch_size, FLAGS.replay_size) model = DQL(FLAGS.budget, data_loader, q_network, replay) model.run()
def __init__(self, parameters): # Gym environment parameters self.env_name = parameters.environment_name self.env = gym.make(self.env_name) self.state_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.n # Training parameters self.discount = Training_parameters.discount self.train_episodes = parameters.train_episodes self.test_episodes = Training_parameters.test_episodes self.test_frequency = Training_parameters.test_frequency self.render_decision = parameters.render_decision self.render_frequency = Training_parameters.render_frequency # Replay memory parameters self.memory = Replay() self.memory.burn_memory(self.env) # Q-networks parameters self.Q_net = Network(self.state_dim, self.action_dim, Network_parameters.Q_net_var_scope, parameters.duel) self.target_Q_net = Network(self.state_dim, self.action_dim, Network_parameters.target_Q_net_var_scope, parameters.duel) self.update_target_frequency = Training_parameters.update_target_frequency self.double = parameters.double
def get(self, resource, extension): query = Replay.all() query.filter('replayid =', resource) results = query.fetch(1) if results: result = results[0] blob_info = blobstore.BlobInfo.get(result.blobinfo) increment(resource) self.send_blob(blob_info) return
def replay(self, args): Log.log_switch = False Replay.switch = True if args.lite: Replay.mode = 'LITE' print('* MODE : LITE *') if args.transactionid and args.date: print('Please specify only one type of data for replay') return elif args.transactionid: Replay().replay_execute(self.parser, transaction_id=args.transactionid) elif args.date: Replay().replay_execute(self.parser, start_time=args.date[0], end_time=args.date[1]) else: Replay().replay_execute(self.parser) else: print('* MODE : REPLAY *') if args.transactionid and args.date: print('Please specify only one type of data for replay') return elif args.transactionid: Replay().replay_execute(self.parser, transaction_id=args.transactionid) elif args.date: Replay().replay_execute(self.parser, start_time=args.date[0], end_time=args.date[1]) else: Replay().replay_execute(self.parser)
def get(self, resource, extension=".SC2Replay"): if resource[-10:] in (".SC2Replay", ".sc2replay"): resource = resource[0:-10] query = Replay.all() query.filter('replayid =', resource) results = query.fetch(1) if results: result = results[0] blob_info = blobstore.BlobInfo.get(result.blobinfo) if blob_info: upload_url = blobstore.create_upload_url('/upload') path = os.path.join(os.path.dirname(__file__), 'download.html') self.response.headers['Cache-Control'] = 'no-cache' self.response.headers['Pragma'] = 'no-cache' baseurl = urlparse(self.request.url).netloc; if "sc2share.com" in baseurl: baseurl = "sc2share.com" template_values = { 'download_filename': blob_info.filename.encode("utf-8"), 'download_url': 'd/%s/%s' % (resource, quote(blob_info.filename.encode("utf-8"))), 'baseurl': baseurl, } self.response.out.write(template.render(path, template_values)) return else: reason = 'nosuchfile' else: reason = 'nosuchfile' upload_url = blobstore.create_upload_url('/upload') path = os.path.join(os.path.dirname(__file__), 'nofetch.html') self.response.headers['Cache-Control'] = 'no-cache' self.response.headers['Pragma'] = 'no-cache' failure_reasons = {} failure_reasons['pt'] = { 'nosuchfile': 'O arquivo pedido não existe. Pode ser que ele nunca tenha existido, pode ser que ele tenha sido apagado, e pode ser que algo catastrófico tenha acontecido. Difícil dizer o que foi.' } failure_reasons['en'] = { 'nosuchfile': 'The requested file does not exist. Maybe it never existed, maybe it has been deleted, maybe something catastrophic happened. In any case, we apologize.' } template_values = { 'upload_url': upload_url, 'errormsg': failure_reasons['en'][reason] } self.response.out.write(template.render(path, template_values))
def __init__( self, batch_size=64, device='cpu', gamma=0.95, gradient_clip=0.0, loss_fn='L2', ): self.env = gym.make('CartPole-v0') self.input_size = self.env.observation_space.shape[0] self.num_actions = self.env.action_space.n self.device = device self.qnet = CartPolePolicy(self.input_size, self.num_actions, device) self.target_qnet = CartPolePolicy(self.input_size, self.num_actions, device) self.target_qnet.copy_params_(self.qnet) self.eps_sch = LinearEpsilonScheduler() self.optimizer = optim.Adam(self.qnet.parameters(), lr=1e-4) if gradient_clip > 0.0: for p in self.qnet.parameters(): p.register_hook(lambda grad: torch.clamp( grad, min=-gradient_clip, max=gradient_clip)) self.schema = DataSchema( names=["prev_state", "action", "reward", "state", "done"], shapes=[(self.input_size, ), (1, ), (1, ), (self.input_size, ), (1, )], dtypes=[np.int64, np.int64, np.float32, np.float32, np.float32], ) self.replay = Replay(100000, self.schema) self.batch_size = batch_size self.gamma = gamma self.loss_fn = loss_fn
def __init__(self, bin_file_path): """ Args: bin_file_path (string): File path containing preprocessed """ self.game_states = [] self.root_dir = bin_file_path for root, dirs, files in os.walk(bin_file_path): for name in files: if name.split('.')[-1] != "bin": continue with open(os.path.join(self.root_dir, name), 'rb') as f: file_content = f.read() _, states = Replay(file_content) for state in states: if state.players is None: continue if len(state.players) != 6: continue if state.state != State.Game: continue # add default state, team red self.add_states(state, Team.Red) # add state flipped about x axis, team red self.add_states( du.flip_state(state, x_axis_flip=True, y_axis_flip=False), Team.Red) # add state flipped about y axis, team blue self.add_states( du.flip_state(state, x_axis_flip=False, y_axis_flip=True), Team.Blue) # add state flipped about x and y axis, team blue self.add_states( du.flip_state(state, x_axis_flip=True, y_axis_flip=True), Team.Blue) self.game_states = du.filter_states_3v3(game_states=self.game_states)
def play_one_game(self): replay = Replay() s = self.env.reset() count = 0 while True: conv_s = np.reshape(s, [1, 84, 84, 4]) p_g = self.nns["good"].predict(conv_s) p_n = self.nns["normal"].predict(conv_s) p_b = self.nns["bad"].predict(conv_s) p = 2 * p_g["pi"][0] + p_n["pi"][0] - p_b["pi"][0] p += np.ones_like(self.a) p /= np.sum(p) a = np.random.choice(self.a, p=p) s_, r, t, _ = self.env.step(a) replay.add(s, a) replay.score += r s = s_ count += 1 if count % 10 == 0: print(".", end="", flush=True) if t: print() break return replay
def __depth_first_search(self, start_path, inter_path): current_path = join(start_path, inter_path) dirs_and_files = listdir(current_path) dirs = [] files = [] for df in dirs_and_files: # print(current_path) if is_replay(join(current_path, df)): files.append(df) if isdir(join(current_path, df)): if 'Replays' == df: raise Exception('Replays folder is already formed') else: dirs.append(df) for i in range(len(dirs)): inter = join(inter_path, dirs[i]) # print('recurse', inter) self.__depth_first_search(start_path, inter) key = '' #finished recursive steps, now we read the discovered replays for i in range(len(files)): src_file = join(start_path, inter_path, files[i]) original = Replay(src_file) keys = self.__inspector.inspect(original) #go through each key for j in range(len(keys)): replay = copy_replay(original) key = keys[j] #place replays in proper folders if key in self.__folders.keys(): self.__folders[key].append(replay) else: self.__folders[key] = [] #series flag -1 means there are no replay with the same player names, yet ... replay.series_flag = -1 self.__folders[key].append(replay)
def __init__(self, bin_file_path): """ Args: bin_file_path (string): File path containing preprocessed """ self.game_states = [] self.root_dir = bin_file_path for root, dirs, files in os.walk(bin_file_path): for name in files: if name.split('.')[-1] == "bin": with open(os.path.join(self.root_dir, name), 'rb') as f: file_content = f.read() _, states = Replay(file_content) for state in states: if state.players is not None and len( state.players) == 2: # flip states so that opposing demonstrations are learned # and there are more states to learn from # add default state, team 0 self.game_states.append((state, 0)) # add state flipped about x axis, team 0 self.game_states.append( (du.flip_state(state, flip_x=True, flip_y=False), 0)) # add state flipped about y axis, team 1 self.game_states.append( (du.flip_state(state, flip_x=False, flip_y=True), 1)) # add state flipped about x and y axis, team 1 self.game_states.append( (du.flip_state(state, flip_x=True, flip_y=True), 1)) self.game_states = du.filter_states(game_states=self.game_states)
def main(): xminmax = [0, 0] yminmax = [0, 0] bin_file_path = 'preprocessed' for root, dirs, files in os.walk(bin_file_path): for name in files: if name.split('.')[-1] == "bin": print(name) with open(os.path.join(bin_file_path, name), 'rb') as f: file_content = f.read() _, states = Replay(file_content) for state in states: if state.players is not None and len( state.players) == 2: xmin = state.players[0].disc.x if state.players[ 0].disc.x < state.players[ 1].disc.x else state.players[1].disc.x xmax = state.players[0].disc.x if state.players[ 0].disc.x > state.players[ 1].disc.x else state.players[1].disc.x ymin = state.players[0].disc.y if state.players[ 0].disc.y < state.players[ 1].disc.y else state.players[1].disc.y ymax = state.players[0].disc.y if state.players[ 0].disc.y > state.players[ 1].disc.y else state.players[1].disc.y xminmax[ 0] = xmin if xmin < xminmax[0] else xminmax[0] xminmax[ 1] = xmax if xmax > xminmax[1] else xminmax[1] yminmax[ 0] = ymin if ymin < yminmax[0] else yminmax[0] yminmax[ 1] = ymax if ymax > yminmax[1] else yminmax[1] print('x min max:', xminmax) print('y min max:', yminmax) print('---------------------')
def __init__(self, bin_file_path): """ Args: bin_file_path (string): File path containing preprocessed """ self.game_states = [] self.root_dir = bin_file_path for root, dirs, files in os.walk(bin_file_path): for name in files: if name.split('.')[-1] == "bin": with open(os.path.join(self.root_dir, name), 'rb') as f: file_content = f.read() _, states = Replay(file_content) for state in states: if state.players is not None and len( state.players) == 2: self.game_states.append((state, 0)) self.game_states.append((state, 1))
def _compare_two_replays(replay1, replay2): """ Compares two Replays and return their average distance and standard deviation of distances. """ # get all coordinates in numpy arrays so that they're arranged like: # [ x_1 x_2 ... x_n # y_1 y_2 ... y_n ] # indexed by columns first. data1 = replay1.as_list_with_timestamps() data2 = replay2.as_list_with_timestamps() # interpolate (data1, data2) = Replay.interpolate(data1, data2) # remove time from each tuple data1 = [d[1:] for d in data1] data2 = [d[1:] for d in data2] (mu, sigma) = Comparer._compute_data_similarity(data1, data2) return (mu, sigma)
def __init__(self, basepath): self.recinfo = Recinfo(basepath) self.position = ExtractPosition(self.recinfo) self.epochs = behavior_epochs(self.recinfo) self.artifact = findartifact(self.recinfo) self.makePrmPrb = makePrmPrb(self.recinfo) self.utils = SessionUtil(self.recinfo) self.spikes = spikes(self.recinfo) self.brainstates = SleepScore(self.recinfo) self.swa = Hswa(self.recinfo) self.theta = Theta(self.recinfo) self.spindle = Spindle(self.recinfo) self.gamma = Gamma(self.recinfo) self.ripple = Ripple(self.recinfo) self.placefield = pf(self.recinfo) self.replay = Replay(self.recinfo) self.decode = DecodeBehav(self.recinfo) self.localsleep = LocalSleep(self.recinfo) self.viewdata = SessView(self.recinfo) self.pbe = PBE(self.recinfo) self.eventpsth = event_event()
def handle_replay(node, seed, command, transfers, **kwargs): # Check if a valid command arguments = command.split(' ', 1) t_id = None try: t_id = arguments[1] except IndexError: return pretty_print('Invalid command - See example usage.') bundle = None t_id = t_id.strip() if not transfers: return pretty_print('Looks like you do not have any account history.') for transfer in transfers: id_as_string = str(transfer['short_transaction_id']) if id_as_string == t_id: bundle = transfer['bundle'] break if bundle is None: return pretty_print( 'Looks like there is no bundle associated with your specified short transaction id. Please try again' ) pretty_print('Starting to replay your specified bundle. This might take a few second...', color='green') return Replay( node, seed, bundle, replay_callback=lambda message: pretty_print(message, color='blue'), **kwargs )
def save_data(replay_paths): # description = "%s" % (".".join(replay_paths)) # hash = hashlib.md5(description.encode('utf-8')).hexdigest() # pickle_path = os.path.join('data', "%s.pickle") # if os.path.exists(pickle_path): # print("%s already saved" % pickle_path) # return expand_dataset = Dataset(sections=[], labels=[]) conquer_dataset = Dataset(sections=[], labels=[]) buckets = Buckets(expand=expand_dataset, conquer=conquer_dataset) for replay_path in replay_paths: print("Processing replay %s with kernels %d and %d" % (replay_path, expand_kernel_size, conquer_kernel_size)) # pickle_path = os.path.join('data/', '%s.%d.%s' % (filename, kernel_size, 'pickle')) print("Loading replay %s" % replay_path) replay = Replay(replay_path) replay.load() print(replay) print("Combining data ... ") replay.combine_data() first_stage_limit = replay.find_sections_count_before_first_collision() # Expand Data prep print("Padding with %d ... " % expand_kernel_size) replay.prepare_padded_arrays(expand_kernel_size) print("Generating sections for cells with surrounding") sections, labels = replay.get_sections_and_labels(own=False) print("Collect expand phase. First %d moves." % first_stage_limit) expand_sections, expand_labels = sections[:first_stage_limit], labels[:first_stage_limit] print("Rotate each section") expand_sections, expand_labels = rotate_all_sections(expand_sections, expand_labels) buckets.expand.sections.append(expand_sections) buckets.expand.labels.append(expand_labels) # Conquer Data prep # print("Padding with %d ... " % conquer_kernel_size) # replay.prepare_padded_arrays(conquer_kernel_size) # print("Generating sections for OWN cells with surrounding") # sections, labels = replay.get_sections_and_labels(own=True) # # print("Collect conquer phase. Last %d moves." % (len(sections) - first_stage_limit)) # conquer_sections, conquer_labels = sections[first_stage_limit:], labels[first_stage_limit:] # print("Rotate each section") # conquer_sections, conquer_labels = rotate_all_sections(conquer_sections, conquer_labels) # buckets.conquer.sections.append(conquer_sections) # buckets.conquer.labels.append(conquer_labels) # Expand expand_dataset = Dataset( sections=np.concatenate(buckets.expand.sections, axis=0), labels=np.concatenate(buckets.expand.labels, axis=0) ) train_data, test_data, train_labels, test_labels = train_test_split( expand_dataset.sections, expand_dataset.labels, train_size=.8) print("Equalizing test data and labels") # We want testing data to have equal amount of different classes # Otherwise accuracy can be spoiled test_data, test_labels = equalized_sections(test_data, test_labels) print("%d of expand training data, %d of expand testing data" % (len(train_data), len(test_data))) expand_data = { 'train_data': train_data, 'train_labels': train_labels, 'test_data': test_data, 'test_labels': test_labels, 'kernel_size': expand_kernel_size } # Conquer # conquer_dataset = Dataset( # sections=np.concatenate(buckets.conquer.sections, axis=0), # labels=np.concatenate(buckets.conquer.labels, axis=0) # ) # # train_data, test_data, train_labels, test_labels = train_test_split( # conquer_dataset.sections, conquer_dataset.labels, train_size=.8) # print("Equalizing train data and labels") # train_data, train_labels = equalized_sections(train_data, train_labels) # print("Equalizing test data and labels") # test_data, test_labels = equalized_sections(test_data, test_labels) # print("%d of conquer training data, %d of conquer testing data" % (len(train_data), len(test_data))) # conquer_data = { # 'train_data': train_data, # 'train_labels': train_labels, # 'test_data': test_data, # 'test_labels': test_labels, # 'kernel_size': conquer_kernel_size # } conquer_data = None data = { 'expand_data': expand_data, 'conquer_data': conquer_data } pickle_path = 'data/data.pickle' with open(pickle_path, 'wb') as f: print("Saving to %s" % pickle_path) pickle.dump(data, f) return data
class DQN_Agent: def __init__(self, parameters): # Gym environment parameters self.env_name = parameters.environment_name self.env = gym.make(self.env_name) self.state_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.n # Training parameters self.discount = Training_parameters.discount self.train_episodes = parameters.train_episodes self.test_episodes = Training_parameters.test_episodes self.test_frequency = Training_parameters.test_frequency self.render_decision = parameters.render_decision self.render_frequency = Training_parameters.render_frequency # Replay memory parameters self.memory = Replay() self.memory.burn_memory(self.env) # Q-networks parameters self.Q_net = Network(self.state_dim, self.action_dim, Network_parameters.Q_net_var_scope, parameters.duel) self.target_Q_net = Network(self.state_dim, self.action_dim, Network_parameters.target_Q_net_var_scope, parameters.duel) self.update_target_frequency = Training_parameters.update_target_frequency self.double = parameters.double def epsilon_greedy_policy(self, q_values, epsilon=0.05): """ Returns action as per epsilon-greedy policy :param q_values: Q-values for the possible actions :param epsilon: Parameter to define exploratory action probability :return: action: Action selected by agent as per epsilon-greedy policy """ if random.random() < epsilon: return self.env.action_space.sample() else: return self.greedy_policy(q_values) def greedy_policy(self, q_values): ''' Returns action as per greedy policy Parameters: q_values: Q-values for the possible actions Output: Action selected by agent as per greedy policy corresponding to maximum Q-value ''' return np.argmax(q_values) def train(self): performance = [] # Setup video rendering for Gym environment if self.render_decision: f = lambda X: X % self.render_frequency == 0 self.env.render() video_save_path = f'{Directories.output}Video_DQN_{self.env_name}/' self.env = gym.wrappers.Monitor(self.env, video_save_path, video_callable=f, force=True) self.env.reset() for episode in range(self.train_episodes): state = self.env.reset() done = False while not done: # Perform an action in environment and add to replay memory Q_values = self.Q_net.predict(state.reshape(-1, self.state_dim)) # Anneal exploration probability epsilon epsilon = Training_parameters.inital_eps - (Training_parameters.scale_eps * (Training_parameters.inital_eps - Training_parameters.final_eps) * (episode / self.train_episodes)) action = self.epsilon_greedy_policy(Q_values, epsilon) next_state, reward, done, _ = self.env.step(action) self.memory.add_to_memory((next_state, reward, state, action, done)) # Sample batch from memory and train model batch = self.memory.sample_from_memory() batch_next_state, batch_reward, batch_state, batch_action, check_if_terminal = map(np.array, zip(*batch)) check_if_not_terminal = np.invert(check_if_terminal) if self.double: Q_next = self.Q_net.predict(batch_next_state.reshape(-1, self.state_dim)) next_actions = np.argmax(Q_next, axis=1) next_actions_indices = np.vstack([np.arange(Network_parameters.batch_size), next_actions]).T target_Q_next_all_actions = self.target_Q_net.predict(batch_next_state.reshape(-1, self.state_dim)) targets = batch_reward + check_if_not_terminal * self.discount *tf.gather_nd(target_Q_next_all_actions, next_actions_indices) else: target_Q_next = self.target_Q_net.predict(batch_next_state.reshape(-1, self.state_dim)) targets = batch_reward + check_if_not_terminal*self.discount * np.max(target_Q_next, axis=1) actions_selected = np.vstack([np.arange(Network_parameters.batch_size), batch_action]).T self.Q_net.fit(batch_state, targets, actions_selected) # Update target model as per update frequency if episode % self.update_target_frequency == 0: self.Q_net.update_target_model(self.target_Q_net) # Test policy as per test frequency if episode % self.test_frequency == 0: test_rewards, test_std = self.test() print(f'After {episode} episodes, mean test reward is {test_rewards} with std of {test_std}') performance.append((test_rewards, test_std)) return performance def test(self): rewards = [] for test_episode in range(self.test_episodes): curr_episode_reward = 0 state = self.env.reset() done = False while not done: action = self.greedy_policy(self.Q_net.predict(state.reshape(1, -1))) next_state, reward, done, _ = self.env.step(action) curr_episode_reward += reward if done: state = self.env.reset() else: state = next_state rewards.append(curr_episode_reward) rewards = np.array(rewards) return np.mean(rewards), np.std(rewards)
def print_results(deck_name): for f in file_manager.find_deck_games(deck_name): if file_manager.unzip_file(f): replay = Replay(file_manager.replay_file) print replay.game_won()