Exemplo n.º 1
0
batch_size = 32
update_freq = 10000
learn_freq = 4
save_freq = 500000
action_space_size = env.action_space.n
NUM_STEPS = 4000000
replay_memory_size = 40000
replay_alpha = 0.6
replay_beta = 0.4
replay_epsilon = 1e-6
is_load_model = True
watch_flag = True   
fps = 30 #frames shown per second when watch_flag == True

log_csv_writer = logger.make_output_format("json", "logs")
log = logger.Logger("logs", [log_csv_writer])

def preprocess_frame(frame):
    """Given a frame, scales it and converts to grayscale"""
    im = resize(color.rgb2gray(frame)[:176, :], (input_height, input_width), mode='constant')
    return im

"""
model arch
----------

Conv1 (8x8x32 filter) -> ReLU -> Conv2 (4x4x64 filter) -> ReLU -> Conv3 (3x3x64 filter) -> ReLU ->
FC4 (512 neurons) -> ReLU -> FC5 (9 neurons) -> ReLU ->  Output Q-value for each action
"""
def q_function_nn(obs, action_space_size, scope, reuse=False):
    """
Exemplo n.º 2
0
    def eval(self):
        # create base_dir to save results
        env_id = self.args['env_id'] if self.args[
            'env_kind'] == 'mario' else self.args['eval_type']
        # base_dir =  os.path.join(self.args['log_dir'], self.args['exp_name'], env_id)
        # os.makedirs(base_dir, exist_ok=True)

        # i forget to restore, i cannot believe myself
        # load_path = self.args['load_path']

        # args['IS_HIGH_RES'] is used to signal whether save videos
        nlevels = self.args['NUM_LEVELS']

        save_video = False

        # train progress results logger
        format_strs = ['csv']
        format_strs = filter(None, format_strs)
        dirc = os.path.join(self.args['log_dir'], 'inter')
        output_formats = [
            logger.make_output_format(f, dirc) for f in format_strs
        ]
        self.result_logger = logger.Logger(dir=dirc,
                                           output_formats=output_formats)

        if self.args['env_kind'] == 'mario':
            # do NOT FORGET to change this
            nlevels = 20

        # curr_iter = 0
        # results_list = []
        restore_iter = [25 * i for i in range(117)] + [2929]

        for r in restore_iter:
            load_path = os.path.join(self.args['load_dir'],
                                     'model-{}'.format(r))
            print(load_path)
            self.agent.load(load_path)

            save_video = False
            nlevels = 20 if self.args['env_kind'] == 'mario' else self.args[
                'NUM_LEVELS']
            results, _ = self.agent.evaluate(nlevels, save_video)
            results['iter'] = r
            for (k, v) in results.items():
                self.result_logger.logkv(k, v)
            self.result_logger.dumpkvs()
        '''    
        results['iter'] = curr_iter = int(l.split('/')[-1].split('-')[-1])
        print(results)
        results_list.append(results)

        csv_columns = results_list[0].keys()
        print(csv_columns)

        curr_dir = os.path.join(base_dir, str(curr_iter))
        os.makedirs(curr_dir, exist_ok=True)
        
        csv_save_path = os.path.join(curr_dir, 'results.csv'.format())
        with open(csv_save_path, 'w') as file:
            writer = csv.DictWriter(file, fieldnames=csv_columns)
            writer.writeheader()
            for data in results_list:
                writer.writerow(data)
        print('results are dumped to {}'.format(csv_save_path))
        '''
        '''
Exemplo n.º 3
0
def create_json_logger(log_dir):
    return logger.Logger(log_dir,
            [logger.make_output_format(f, log_dir) for f in ['json']]
            )
Exemplo n.º 4
0
    def train(self):
        curr_iter = 0

        # train progress results logger
        format_strs = ['csv']
        format_strs = filter(None, format_strs)
        dirc = os.path.join(self.args['log_dir'], 'inter')
        if self.restore_iter > -1:
            dirc = os.path.join(self.args['log_dir'],
                                'inter-{}'.format(self.restore_iter))
        output_formats = [
            logger.make_output_format(f, dirc) for f in format_strs
        ]
        self.result_logger = logger.Logger(dir=dirc,
                                           output_formats=output_formats)

        # in case we are restoring the training
        if self.restore_iter > -1:
            self.agent.load(self.load_path)
            if not self.args['transfer_load']:
                curr_iter = self.restore_iter

        print('max_iter: {}'.format(self.max_iter))

        # interim saves to compare in the future
        # for 128M frames,

        inter_save = []
        for i in range(3):
            divisor = (2**(i + 1))
            inter_save.append(
                int(self.args['num_timesteps'] // divisor) //
                (self.args['nsteps'] * self.args['NUM_ENVS'] *
                 self.args['nframeskip']))
        print('inter_save: {}'.format(inter_save))

        total_time = 0.0
        # results_list = []

        while curr_iter < self.early_max_iter:
            frac = 1.0 - (float(curr_iter) / self.max_iter)

            # self.agent.update calls rollout
            start_time = time.time()

            ## linearly annealing
            curr_lr = self.lr(frac)
            curr_cr = self.cliprange(frac)

            ## removed within training evaluation
            ## i could not make flag_sum to work properly
            ## evaluate each 100 run for 20 training levels
            # only for mario (first evaluate, then update)
            # i am doing change to get zero-shot generalization without any effort
            if curr_iter % (self.args['save_interval']) == 0:
                save_video = False
                nlevels = 20 if self.args[
                    'env_kind'] == 'mario' else self.args['NUM_LEVELS']
                results, _ = self.agent.evaluate(nlevels, save_video)
                results['iter'] = curr_iter
                for (k, v) in results.items():
                    self.result_logger.logkv(k, v)
                self.result_logger.dumpkvs()

            # representation learning in each 25 steps
            info = self.agent.update(lr=curr_lr, cliprange=curr_cr)
            end_time = time.time()

            # additional info
            info['frac'] = frac
            info['curr_lr'] = curr_lr
            info['curr_cr'] = curr_cr
            info['curr_iter'] = curr_iter
            # info['max_iter'] = self.max_iter
            info['elapsed_time'] = end_time - start_time
            # info['total_time'] = total_time = (total_time + info['elapsed_time']) / 3600.0
            info['expected_time'] = self.max_iter * info[
                'elapsed_time'] / 3600.0

            ## logging results using baselines's logger
            logger.logkvs(info)
            logger.dumpkvs()

            if curr_iter % self.args['save_interval'] == 0:
                self.agent.save(curr_iter, cliprange=curr_cr)

            if curr_iter in inter_save:
                self.agent.save(curr_iter, cliprange=curr_cr)

            curr_iter += 1

        self.agent.save(curr_iter, cliprange=curr_cr)

        # final evaluation for mario
        save_video = False
        nlevels = 20 if self.args['env_kind'] == 'mario' else self.args[
            'NUM_LEVELS']
        results, _ = self.agent.evaluate(nlevels, save_video)
        results['iter'] = curr_iter
        for (k, v) in results.items():
            self.result_logger.logkv(k, v)
        self.result_logger.dumpkvs()
Exemplo n.º 5
0
def create_logger(log_dir):
    return logger.Logger(log_dir,
            [logger.make_output_format(f, log_dir) for f in logger.LOG_OUTPUT_FORMATS]
            )