def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = Snake(display=True) num_actions = ale.nactions if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) # if parameters.rom.endswith('.bin'): # rom = parameters.rom # else: # rom = "%s.bin" % parameters.rom rom = parameters.rom core = parameters.core if core == 'snes': core = 'snes9x2010_libretro.so' elif core == 'atari': core = 'stella_libretro.so' else: raise ValueError("--core must be atari|snes") full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) full_core_path = os.path.join(defaults.BASE_CORE_PATH, core) two_players = False if parameters.nn_file2 is not None: two_players = True if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = rle_python_interface.RLEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) if two_players: ale.setBool('two_players', True) ale.loadROM(full_rom_path, full_core_path) num_actions = len(ale.getMinimalActionSet()) if parameters.nn_file is None: network = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent2 = None if two_players: if parameters.nn_file2 == 'default': network2 = q_network.DeepQLearner(defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle2 = open(parameters.nn_file2, 'r') network2 = cPickle.load(handle2) agent2 = ale_agent.NeuralAgent(network2, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng, 'b') agent = ale_agent.NeuralAgent(network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) experiment = ale_experiment.ALEExperiment(ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, agent2) experiment.run()
def start_training(params): """ Initialize rom, game, agent, network and start a training run """ # CREATE A FOLDER TO HOLD RESULTS exp_pref = "../results/" + params.EXPERIMENT_PREFIX time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) exp_dir = exp_pref + time_str + \ "{}".format(params.LEARNING_RATE).replace(".", "p") + "_" \ + "{}".format(params.DISCOUNT).replace(".", "p") try: os.stat(exp_dir) except OSError: os.makedirs(exp_dir) logger = logging.getLogger("DeepLogger") logger.setLevel(logging.INFO) # Logging filehandler #fh = logging.FileHandler(exp_dir + "/log.log") # Rotate file when filesize is 5 mb fh = RotatingFileHandler(exp_dir + "/log.log", maxBytes=5000000, backupCount=100) fh.setLevel(logging.INFO) # Console filehandler ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter('%(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(fh) # Prevent nohup from producing large log file, logging to file is handled internally # logger.addHandler(ch) log_params(logger, params) #logging.basicConfig(level=logging.INFO, filename=exp_dir + "/log.log") if params.DETERMINISTIC: rng = np.random.RandomState(12345) else: rng = np.random.RandomState() if params.CUDNN_DETERMINISTIC: theano.config.dnn.conv.algo_bwd = 'deterministic' # Init ale ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', 123) ale.setBool('display_screen', params.DISPLAY_SCREEN) ale.setFloat('repeat_action_probability', params.REPEAT_ACTION_PROBABILITY) full_rom_path = os.path.join(params.ROM_PATH, params.ROM_NAME) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) print "Legal actions: ", num_actions print ale.getMinimalActionSet() # Instantiate network logger.info("Setting up network...") network = None # Be able to continue training from a network or watch a network play if (params.NETWORK_PICKLE_FILE is None): logger.info("Initializing a new random network...") network = q_network.DeepQLearner( params.RESIZED_WIDTH, params.RESIZED_HEIGHT, num_actions, params.PHI_LENGTH, params.DISCOUNT, params.LEARNING_RATE, params.RMS_DECAY, params.RMS_EPSILON, params.MOMENTUM, params.CLIP_DELTA, params.FREEZE_INTERVAL, params.BATCH_SIZE, params.NETWORK_TYPE, params.UPDATE_RULE, params.BATCH_ACCUMULATOR, rng) else: logger.info("Loading network instance from file...") handle = open(params.NETWORK_PICKLE_FILE, 'r') network = cPickle.load(handle) # Only used when getting a random network if params.RANDOM_NETWORK_PICKLE: import sys sys.setrecursionlimit(10000) result_net_file = open(params.EXPERIMENT_PREFIX + '.pkl', 'w') print "File opened" cPickle.dump(network, result_net_file, -1) print "Pickle dumped" result_net_file.close() sys.exit(0) # Instatiate agent logger.info("Setting up agent...") agent = ale_agent.NeuralAgent(network, params.EPSILON_START, params.EPSILON_MIN, params.EPSILON_DECAY, params.REPLAY_MEMORY_SIZE, exp_dir, params.REPLAY_START_SIZE, params.UPDATE_FREQUENCY, rng) # Instantiate experient logger.info("Setting up experiment...") experiment = ale_experiment.ALEExperiment( ale, agent, params.RESIZED_WIDTH, params.RESIZED_HEIGHT, params.RESIZE_METHOD, params.EPOCHS, params.STEPS_PER_EPOCH, params.STEPS_PER_TEST, params.FRAME_SKIP, params.DEATH_ENDS_EPISODE, params.MAX_START_NULLOPS, rng) # Run experiment logger.info("Running experiment...") experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) # dump parameters for replication time_str = time.strftime("%Y-%m-%d_%H-%M_", time.localtime()) exp_dir = time_str + parameters.experiment_prefix exp_dir = os.path.join("results", exp_dir) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) parameter_file = open(os.path.join(exp_dir, 'parameter.txt'), 'w', 0) parameter_file.write(str(parameters)) parameter_file.flush() parameter_file.close() if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) avail_actions = ale.getMinimalActionSet() if parameters.train_all: num_actions = len(ale.getLegalActionSet()) else: num_actions = len(avail_actions) print "avail_actions: " + str(avail_actions) print "num_actions: " + str(num_actions) if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, avail_actions, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng, parameters.train_all) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng, exp_dir, parameters.train_all) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) if parameters.nn_file is None: print 'building network...' network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: print 'loading network...' handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) print 'building agent...' if parameters.aws_secret_key and parameters.aws_access_key and parameters.s3_bucket: s3_utility = S3Utility(parameters.aws_access_key, parameters.aws_secret_key, parameters.s3_bucket) else: s3_utility = None agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng, s3_utility) print 'building experiment...' experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) print 'running experiment...' experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ## Here !!! if isinstance(parameters.record_screen_dir, str): if len(parameters.record_screen_dir): ale.setString('record_screen_dir', parameters.record_screen_dir) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: with open(parameters.nn_file, 'r') as handle: network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) ## Get the coach: let it have read/write access to the agent's databanks coach = None if parameters.nn_coach_file is not None: with open(parameters.nn_coach_file, 'r') as handle: network = cPickle.load(handle) coach = ale_coach.NeuralCoach(network, agent.get_training_dataset(), parameters.coach_epsilon, rng) experiment = ale_experiment.ALEExperiment(ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, coach=coach) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ time_str = time.strftime("_%m-%d-%H-%M_", time.localtime()) logging.basicConfig(filename='log' + time_str + '.txt', level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) ###################################################### # Daniel: This is where I insert human-guided stuff. # ###################################################### # Logic to deal with loading a separate network trained on human data. # Must also address mapping from human net (0,1,2,...) to ALE. # I know that, for Breakout, my {0,1,2} correspond to {NOOP,LEFT,RIGHT}. # But how should these get mapped to ALE actions? I know 0=noop, 1=fire. # Keep in mind that there's a SECOND mapping that happens after this! map_action_index = None human_net = None human_experience_replay = None if parameters.use_human_net: if (rom == 'breakout' or rom == 'breakout.bin'): # This maps the action indices from the net (0,1,2,...) into a # **second** mapping [0 1 3 4], which is game-independent, so the # main work is to set map_action_index. # Thus, 0 ==> 0 ==> 0 (NOOP) # Thus, 1 ==> 3 ==> 4 (LEFT) # Thus, 2 ==> 2 ==> 3 (RIGHT) # (The net doesn't use FIRE.) map_action_index = {0: 0, 1: 3, 2: 2} elif (rom == 'space_invaders' or rom == 'space_invaders.bin'): # Second mapping is [0 1 3 4 11 12] E.g., 4 is FLEFT in my data, # needs to be mapped to index 5 so it results in '12'. map_action_index = {0: 0, 1: 1, 2: 3, 3: 2, 4: 5, 5: 4} else: raise ValueError("rom={} doesn't have action mapping".format(rom)) # Let's make the human net; #actions = len(map_action_index). human_net = human_q_net.HumanQNetwork( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, len(map_action_index), parameters.phi_length, parameters.batch_size, parameters.network_type, parameters.human_net_path, map_action_index) if parameters.use_human_experience_replay: if (rom == 'breakout' or rom == 'breakout.bin'): human_experience_replay = np.load( parameters.human_experience_replay_path) else: raise ValueError("rom={} doesn't have xp replay".format(rom)) ########################### # Daniel: Back to normal. # ########################### if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'rb') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng, parameters.epochs, parameters.use_human_net, parameters.use_human_experience_replay, human_net, human_experience_replay) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) # Load VAE file with open(parameters.vae_aux_file, 'r') as f: aux_data = pickle.load(f) f.close() params = aux_data['params'] with tf.device(None): #"/gpu:0"): config = tf.ConfigProto() config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # config = tf.ConfigProto( # device_count={'GPU': 0} # ) sess = tf.Session(config=config) VAE = vae.vae(params) VAE._create_network_() try: sess.run(tf.global_variables_initializer()) except AttributeError: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() chkpt = tf.train.get_checkpoint_state(parameters.vae_file) if chkpt and chkpt.model_checkpoint_path: saver.restore(sess, chkpt.model_checkpoint_path) else: print 'No checkpoint found' import theano import ale_experiment import ale_agent import q_network if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' if parameters.nn_file is None: network = q_network.DeepQLearner( VAE.z_size, 1, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) experiment = ale_experiment.ALEExperiment( ale, agent, VAE.X_size[1], VAE.X_size[0], parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE, sess) experiment.run()
def launch(defaults): timeseries = import_timeseries.timeseries(defaults.trainfile, defaults.testfile) ale = ale_action.ale(timeseries, defaults.steps_per_epoch) #re-run last script ctrl+F6 #test # ale.reset_game() # print(ale.getCurrentState()) # print(ale.act(2)) # print(ale.priceOrder) # #print("balance: "+ str(ale.stateToCat()[-1])) # print(ale.getCurrentState()) # print(ale.act(0)) # #print("balance: "+ str(ale.stateToCat()[-1])) # print(ale.getCurrentState()) # print(ale.act(0)) # #print("balance: "+ str(ale.stateToCat()[-1])) # print(ale.getCurrentState()) # print(ale.act(0)) # # print(ale.getCurrentState()) # print(ale.act(0)) # #print("balance: "+ str(ale.stateToCat()[-1])) # print(ale.getCurrentState()) # print(ale.act(0)) # #print("balance: "+ str(ale.stateToCat()[-1])) # print(ale.getCurrentState()) # print(ale.act(0)) # #print("balance: "+ str(ale.stateToCat()[-1])) # print(ale.timeseries.train[ale.currentPosTime]) # print(ale.getCurrentState()) # print(ale.act(3)) # # assert (0==1) # nn_file = None if nn_file is None: network = q_network.DeepQLearner( defaults.WIDTH, defaults.HEIGHT, ale.getActionCount(), defaults.phi_length, #num_frames - tipo istorija time defaults.discount, defaults.learning_rate, defaults.rms_decay, defaults.rms_epsilon, defaults.momentum, defaults.CLIP_DELTA, defaults.FREEZE_INTERVAL, defaults.batch_size, defaults.update_rule, defaults.batch_accumulator, ale.getInputCount()) else: handle = open(nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent(network, defaults.epsilon_start, defaults.epsilon_min, defaults.epsilon_decay, defaults.replay_memory_size, defaults.experiment_prefix, defaults.replay_start_size, defaults.update_frequency) experiment = ale_experiment.ALEExperiment(ale, agent, defaults.WIDTH, defaults.HEIGHT, defaults.epochs, defaults.steps_per_epoch, defaults.steps_per_test, defaults.death_ends_episode) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) # Load VAE file with open(parameters.vae_aux_file, 'r') as f: aux_data = pickle.load(f) f.close() params = aux_data['params'] with tf.device(None): #"/gpu:0"): config = tf.ConfigProto() config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # ) sess = tf.Session(config=config) VAE = vae.vae(params) VAE._create_network_() try: sess.run(tf.global_variables_initializer()) except AttributeError: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() import theano import ale_experiment import ale_agent import q_network if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_WIDTH, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE, sess, defaults.VAE_REQ_STEPS, defaults.VAE_STORAGE_SIZE) time_str = time.strftime("%m-%d-%H-%M", time.gmtime()) vae_save_path = '%s/%s_beta%f_z%d' % (defaults.VAE_OUT_PREFIX, rom.split('.')[0], params['beta'], params['z_size']) os.system('mkdir -p %s' % (vae_save_path)) experiment.run() ckpt_path = '%s/%s_%s' % (vae_save_path, rom.split('.')[0], time_str) print ckpt_path if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) saver.save(sess, '%s/checkpoint.ckpt' % (ckpt_path))
def launch(args, defaults, description, ALE=None): """ Execute a complete training run. """ parameters = process_args(args, defaults, description) if ALE is None: if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.abspath( os.path.join(defaults.BASE_ROM_PATH, rom)) ale = custom_ale_interface.CustomALEInterface( rom=parameters.rom, display_screen=parameters.display_screen) else: ale = ALE # assume ALE already have rom inside num_actions = len(ale.getLegalActionSet()) # 1. first goes run control from user if parameters.nn_file is not None: nn_file = os.path.abspath(parameters.nn_file) logging.info('loading network from parameters: ' + nn_file) with open(nn_file, 'r') as handle: network = cPickle.load(handle) logging.info('network loaded') # nasty bug with discount parameter, sometimes it is not saved if not network.__dict__.get('discount', None): network.discount = parameters.discount # 2. second goes defaults elif defaults.__dict__.get( 'NN_FILE', None) is not None: # do we have NN_FILE in defaults class params? nn_file = os.path.abspath(defaults.NN_FILE) logging.info('loading network from defaults: ' + nn_file) with open(nn_file, 'r') as handle: network = cPickle.load(handle) logging.info('network loaded') # nasty bug with discount parameter, sometimes it is not saved if not network.__dict__.get('discount', None): network.discount = parameters.discount # 3. training from scratch otherwise else: logging.info('generating network from scratch') network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, 'experiments') # experiment folder to store results experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.death_ends_episode) experiment.run()
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) try: mode = int(parameters.mode) except ValueError: mode = 1 if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' if parameters.experiment_directory: experiment_directory = parameters.experiment_directory else: time_str = time.strftime("_%Y-%m-%d-%H-%M") experiment_directory = parameters.experiment_prefix + time_str \ + '_mode_' + str(mode) ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) if parameters.record_video: video_directory = os.path.join(experiment_directory, 'video') if not os.path.isdir(video_directory): os.makedirs(video_directory) ale.setString('record_screen_dir', video_directory) if sys.platform != 'darwin': ale.setBool('sound', True) ale.setString("record_sound_filename", os.path.join(video_directory, "sound.wav")) # "We set fragsize to 64 to ensure proper sound sync" # (that's what videoRecordingExample.cpp in ALE says. I don't really know what it means) ale.setInt("fragsize", 64) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) ale.setMode(mode) if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.use_double, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent(network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, experiment_directory, parameters.replay_start_size, parameters.update_frequency, rng, recording=parameters.recording) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, length_in_episodes=parameters.episodes) experiment.run()