示例#1
0
def test_logger(test_multiline_str):
    logger.critical(test_multiline_str)
    logger.debug(test_multiline_str)
    logger.error(test_multiline_str)
    logger.exception(test_multiline_str)
    logger.info(test_multiline_str)
    logger.warn(test_multiline_str)
示例#2
0
 def body_done_log(self, body):
     '''Log the summary for a body when it is done'''
     env = body.env
     clock = env.clock
     memory = body.memory
     msg = f'Trial {self.info_space.get("trial")} session {self.info_space.get("session")} env {env.e}, body {body.aeb}, epi {clock.get("epi")}, t {clock.get("t")}, loss: {body.loss:.4f}, total_reward: {memory.total_reward:.2f}, last-{memory.avg_window}-epi avg: {memory.avg_total_reward:.2f}'
     logger.info(msg)
示例#3
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     self.nanflat_body_e = util.nanflatten(self.body_e)
     for idx, body in enumerate(self.nanflat_body_e):
         body.nanflat_e_idx = idx
     self.body_num = len(self.nanflat_body_e)
     logger.info(util.self_desc(self))
示例#4
0
 def close(self):
     '''
     Close session and clean up.
     Save agent, close env.
     Prepare self.df.
     '''
     self.agent_space.close()
     self.env_space.close()
     logger.info('Session done, closing.')
示例#5
0
 def update_lr(self):
     assert 'lr' in self.optim_spec
     old_lr = self.optim_spec['lr']
     new_lr = self.lr_decay(self)
     if new_lr == old_lr:
         return
     self.optim_spec['lr'] = new_lr
     logger.info(f'Learning rate decayed from {old_lr:.6f} to {self.optim_spec["lr"]:.6f}')
     self.optim = net_util.get_optim(self, self.optim_spec)
示例#6
0
 def __init__(self, experiment):
     from slm_lab.experiment.control import Experiment
     ray.register_custom_serializer(Experiment, use_pickle=True)
     ray.register_custom_serializer(InfoSpace, use_pickle=True)
     ray.register_custom_serializer(pd.DataFrame, use_pickle=True)
     ray.register_custom_serializer(pd.Series, use_pickle=True)
     self.experiment = experiment
     self.config_space = build_config_space(experiment)
     logger.info(f'Running {util.get_class_name(self)}, with meta spec:\n{self.experiment.spec["meta"]}')
示例#7
0
文件: sil.py 项目: ronald-xie/SLM-Lab
 def post_body_init(self):
     '''Initializes the part of algorithm needing a body to exist first.'''
     self.body = self.agent.nanflat_body_a[0]  # single-body algo
     # create the extra replay memory for SIL
     memory_name = self.memory_spec['sil_replay_name']
     MemoryClass = getattr(memory, memory_name)
     self.body.replay_memory = MemoryClass(self.memory_spec, self, self.body)
     self.init_algorithm_params()
     self.init_nets()
     logger.info(util.self_desc(self))
示例#8
0
 def __init__(self, spec, info_space=None):
     info_space = info_space or InfoSpace()
     init_thread_vars(spec, info_space, unit='trial')
     self.spec = spec
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     self.session_data_dict = {}
     self.data = None
     analysis.save_spec(spec, info_space, unit='trial')
     logger.info(f'Initialized trial {self.index}')
示例#9
0
def retro_analyze_experiment(predir):
    '''Retro-analyze all experiment level datas.'''
    logger.info('Retro-analyzing experiment from file')
    from slm_lab.experiment.control import Experiment
    # mock experiment
    spec, info_space = mock_info_space_spec(predir)
    experiment = Experiment(spec, info_space)
    trial_data_dict = trial_data_dict_from_file(predir)
    experiment.trial_data_dict = trial_data_dict
    return analyze_experiment(experiment)
示例#10
0
def analyze_trial(trial):
    '''
    Gather trial data, plot, and return trial df for high level agg.
    @returns {DataFrame} trial_fitness_df Single-row df of trial fitness vector (avg over aeb, sessions), indexed with trial index.
    '''
    logger.info('Analyzing trial')
    trial_fitness_df = calc_trial_fitness_df(trial)
    trial_fig = plot_trial(trial.spec, trial.info_space)
    save_trial_data(trial.spec, trial.info_space, trial_fitness_df, trial_fig)
    return trial_fitness_df
示例#11
0
def load_algorithm(algorithm):
    '''Save all the nets for an algorithm'''
    agent = algorithm.agent
    net_names = algorithm.net_names
    prepath = util.get_prepath(agent.spec, agent.info_space, unit='session')
    logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names}')
    for net_name in net_names:
        net = getattr(algorithm, net_name)
        model_path = f'{prepath}_model_{net_name}.pth'
        load(net, model_path)
示例#12
0
def get_grad_norms(net):
    '''Returns a list of the norm of the gradients for all parameters'''
    norms = []
    for i, param in enumerate(net.parameters()):
        if param.grad is None:
            logger.info(f'Param with None grad: {param.shape}, layer: {i}')
            norms.append(None)
        else:
            grad_norm = torch.norm(param.grad)
            norms.append(grad_norm)
    return norms
示例#13
0
 def post_body_init(self):
     '''
     Initializes the part of algorithm needing a body to exist first. A body is a part of an Agent. Agents may have 1 to k bodies. Bodies do the acting in environments, and contain:
         - Memory (holding experiences obtained by acting in the environment)
         - State and action dimensions for an environment
         - Boolean var for if the action space is discrete
     '''
     self.body = self.agent.nanflat_body_a[0]  # single-body algo
     self.init_algorithm_params()
     self.init_nets()
     logger.info(util.self_desc(self))
示例#14
0
 def post_init_nets(self):
     '''
     Method to conditionally load models.
     Call at the end of init_net() after setting self.net_names
     '''
     assert hasattr(self, 'net_names')
     if util.get_lab_mode() == 'enjoy':
         logger.info('Loaded algorithm models for lab_mode: enjoy')
         self.load()
     else:
         logger.info(f'Initialized algorithm models for lab_mode: {util.get_lab_mode()}')
示例#15
0
def save_algorithm(algorithm, epi=None):
    '''Save all the nets for an algorithm'''
    agent = algorithm.agent
    net_names = algorithm.net_names
    prepath = util.get_prepath(agent.spec, agent.info_space, unit='session')
    if epi is not None:
        prepath = f'{prepath}_epi_{epi}'
    logger.info(f'Saving algorithm {util.get_class_name(algorithm)} nets {net_names}')
    for net_name in net_names:
        net = getattr(algorithm, net_name)
        model_path = f'{prepath}_model_{net_name}.pth'
        save(net, model_path)
示例#16
0
 def __init__(self, spec, info_space=None):
     info_space = info_space or InfoSpace()
     init_thread_vars(spec, info_space, unit='experiment')
     self.spec = spec
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     self.trial_data_dict = {}
     self.data = None
     SearchClass = getattr(search, spec['meta'].get('search'))
     self.search = SearchClass(self)
     analysis.save_spec(spec, info_space, unit='experiment')
     logger.info(f'Initialized experiment {self.index}')
示例#17
0
def retro_analyze_sessions(predir):
    '''Retro-analyze all session level datas.'''
    logger.info('Retro-analyzing sessions from file')
    from slm_lab.experiment.control import Session
    for filename in os.listdir(predir):
        if filename.endswith('_session_df.csv'):
            tn, sn = filename.replace('_session_df.csv', '').split('_')[-2:]
            trial_index, session_index = int(tn[1:]), int(sn[1:])
            # mock session
            spec, info_space = mock_info_space_spec(predir, trial_index, session_index)
            session = Session(spec, info_space)
            session_data = session_data_from_file(predir, trial_index, session_index)
            analyze_session(session, session_data)
示例#18
0
def analyze_session(session, session_data=None):
    '''
    Gather session data, plot, and return fitness df for high level agg.
    @returns {DataFrame} session_fitness_df Single-row df of session fitness vector (avg over aeb), indexed with session index.
    '''
    logger.info('Analyzing session')
    if session_data is None:
        session_mdp_data, session_data = get_session_data(session)
    else:  # from retro analysis
        session_mdp_data = None
    session_fitness_df = calc_session_fitness_df(session, session_data)
    session_fig = plot_session(session.spec, session.info_space, session_data)
    save_session_data(session.spec, session.info_space, session_mdp_data, session_data, session_fitness_df, session_fig)
    return session_fitness_df
示例#19
0
def check_all():
    '''Check all spec files, all specs.'''
    spec_files = ps.filter_(os.listdir(SPEC_DIR), lambda f: f.endswith('.json') and not f.startswith('_'))
    for spec_file in spec_files:
        spec_dict = util.read(f'{SPEC_DIR}/{spec_file}')
        for spec_name, spec in spec_dict.items():
            try:
                spec['name'] = spec_name
                spec['git_SHA'] = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
                check(spec)
            except Exception as e:
                logger.exception(f'spec_file {spec_file} fails spec check')
                raise e
    logger.info(f'Checked all specs from: {ps.join(spec_files, ",")}')
    return True
示例#20
0
def retro_analyze(predir):
    '''
    Method to analyze experiment from file after it ran.
    Read from files, constructs lab units, run retro analyses on all lab units.
    This method has no side-effects, i.e. doesn't overwrite data it should not.
    @example

    from slm_lab.experiment import analysis
    predir = 'data/reinforce_cartpole_2018_01_22_211751'
    analysis.retro_analyze(predir)
    '''
    os.environ['PREPATH'] = f'{predir}/retro_analyze'  # to prevent overwriting log file
    logger.info(f'Retro-analyzing {predir}')
    retro_analyze_sessions(predir)
    retro_analyze_trials(predir)
    retro_analyze_experiment(predir)
示例#21
0
文件: viz.py 项目: ronald-xie/SLM-Lab
def save_image(figure, filepath=None):
    if os.environ['PY_ENV'] == 'test':
        return
    if filepath is None:
        filepath = f'{PLOT_FILEDIR}/{ps.get(figure, "layout.title")}.png'
    filepath = util.smart_path(filepath)
    dirname, filename = os.path.split(filepath)
    try:
        cmd = f'orca graph -o {filename} \'{json.dumps(figure)}\''
        if 'linux' in sys.platform:
            cmd = 'xvfb-run -a -s "-screen 0 1400x900x24" -- ' + cmd
        Popen(cmd, cwd=dirname, shell=True, stderr=DEVNULL, stdout=DEVNULL)
        logger.info(f'Graph saved to {dirname}/{filename}')
    except Exception as e:
        logger.exception(
            'Please install orca for plotly and run retro-analysis to generate graphs.')
示例#22
0
def calc_session_fitness_df(session, session_data):
    '''Calculate the session fitness df'''
    session_fitness_data = {}
    for aeb in session_data:
        aeb_df = session_data[aeb]
        util.downcast_float32(aeb_df)
        body = session.aeb_space.body_space.data[aeb]
        aeb_fitness_sr = calc_aeb_fitness_sr(aeb_df, body.env.name)
        aeb_fitness_df = pd.DataFrame([aeb_fitness_sr], index=[session.index])
        aeb_fitness_df = aeb_fitness_df.reindex(FITNESS_COLS[:3], axis=1)
        session_fitness_data[aeb] = aeb_fitness_df
    # form multiindex df, then take mean across all bodies
    session_fitness_df = pd.concat(session_fitness_data, axis=1)
    mean_fitness_df = session_fitness_df.mean(axis=1, level=3)
    session_fitness = calc_fitness(mean_fitness_df)
    logger.info(f'Session mean fitness: {session_fitness}\n{mean_fitness_df}')
    return session_fitness_df
示例#23
0
 def __init__(self, spec, info_space=None):
     info_space = info_space or InfoSpace()
     init_thread_vars(spec, info_space, unit='session')
     self.spec = deepcopy(spec)
     self.info_space = info_space
     self.coor, self.index = self.info_space.get_coor_idx(self)
     self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
     torch.cuda.manual_seed_all(self.random_seed)
     torch.manual_seed(self.random_seed)
     np.random.seed(self.random_seed)
     self.data = None
     self.aeb_space = AEBSpace(self.spec, self.info_space)
     self.env_space = EnvSpace(self.spec, self.aeb_space)
     self.agent_space = AgentSpace(self.spec, self.aeb_space)
     logger.info(util.self_desc(self))
     self.aeb_space.init_body_space()
     self.aeb_space.post_body_init()
     logger.info(f'Initialized session {self.index}')
示例#24
0
def analyze_experiment(experiment):
    '''
    Gather experiment trial_data_dict as experiment_df, plot.
    Search module must return best_spec and experiment_data with format {trial_index: exp_trial_data},
    where trial_data = {**var_spec, **fitness_vec, fitness}.
    This is then made into experiment_df.
    @returns {DataFrame} experiment_df Of var_specs, fitness_vec, fitness for all trials.
    '''
    logger.info('Analyzing experiment')
    experiment_df = pd.DataFrame(experiment.trial_data_dict).transpose()
    cols = FITNESS_COLS + ['fitness']
    config_cols = sorted(ps.difference(experiment_df.columns.tolist(), cols))
    sorted_cols = config_cols + cols
    experiment_df = experiment_df.reindex(sorted_cols, axis=1)
    experiment_df.sort_values(by=['fitness'], ascending=False, inplace=True)
    logger.info(f'Experiment data:\n{experiment_df}')
    experiment_fig = plot_experiment(experiment.spec, experiment_df)
    save_experiment_data(experiment.spec, experiment.info_space, experiment_df, experiment_fig)
    return experiment_df
示例#25
0
def save_session_data(spec, info_space, session_mdp_data, session_data, session_fitness_df, session_fig):
    '''
    Save the session data: session_mdp_df, session_df, session_fitness_df, session_graph.
    session_data is saved as session_df; multi-indexed with (a,e,b), 3 extra levels
    to read, use:
    session_df = util.read(filepath, header=[0, 1, 2, 3])
    session_data = util.session_df_to_data(session_df)
    Likewise for session_mdp_df
    '''
    prepath = util.get_prepath(spec, info_space, unit='session')
    logger.info(f'Saving session data to {prepath}')
    if session_mdp_data is not None:  # not from retro analysis
        session_mdp_df = pd.concat(session_mdp_data, axis=1)
        session_df = pd.concat(session_data, axis=1)
        # TODO reactivate saving when get to the transition matrix research
        # util.write(session_mdp_df, f'{prepath}_session_mdp_df.csv')
        util.write(session_df, f'{prepath}_session_df.csv')
    util.write(session_fitness_df, f'{prepath}_session_fitness_df.csv')
    viz.save_image(session_fig, f'{prepath}_session_graph.png')
示例#26
0
def calc_trial_fitness_df(trial):
    '''
    Calculate the trial fitness df by aggregating from the collected session_data_dict (session_fitness_df's).
    Adds a consistency dimension to fitness vector.
    '''
    trial_fitness_data = {}
    all_session_fitness_df = pd.concat(list(trial.session_data_dict.values()))
    for aeb in util.get_df_aeb_list(all_session_fitness_df):
        aeb_fitness_df = all_session_fitness_df.loc[:, aeb]
        aeb_fitness_sr = aeb_fitness_df.mean()
        consistency = calc_consistency(aeb_fitness_df)
        aeb_fitness_sr = aeb_fitness_sr.append(pd.Series({'consistency': consistency}))
        aeb_fitness_df = pd.DataFrame([aeb_fitness_sr], index=[trial.index])
        aeb_fitness_df = aeb_fitness_df.reindex(FITNESS_COLS, axis=1)
        trial_fitness_data[aeb] = aeb_fitness_df
    # form multiindex df, then take mean across all bodies
    trial_fitness_df = pd.concat(trial_fitness_data, axis=1)
    mean_fitness_df = trial_fitness_df.mean(axis=1, level=3)
    trial_fitness_df = mean_fitness_df
    trial_fitness = calc_fitness(mean_fitness_df)
    logger.info(f'Trial mean fitness: {trial_fitness}\n{mean_fitness_df}')
    return trial_fitness_df
示例#27
0
def retro_analyze_trials(predir):
    '''Retro-analyze all trial level datas.'''
    logger.info('Retro-analyzing trials from file')
    from slm_lab.experiment.control import Trial
    for filename in os.listdir(predir):
        if filename.endswith('_trial_data.json'):
            filepath = f'{predir}/{filename}'
            tn = filename.replace('_trial_data.json', '').split('_')[-1]
            trial_index = int(tn[1:])
            # mock trial
            spec, info_space = mock_info_space_spec(predir, trial_index)
            trial = Trial(spec, info_space)
            session_data_dict = session_data_dict_from_file(predir, trial_index)
            trial.session_data_dict = session_data_dict
            trial_fitness_df = analyze_trial(trial)
            # write trial_data that was written from ray search
            fitness_vec = trial_fitness_df.iloc[0].to_dict()
            fitness = calc_fitness(trial_fitness_df)
            trial_data = util.read(filepath)
            trial_data.update({
                **fitness_vec, 'fitness': fitness, 'trial_index': trial_index,
            })
            util.write(trial_data, filepath)
示例#28
0
    def run(self):
        meta_spec = self.experiment.spec['meta']
        ray.init(**meta_spec.get('resources', {}))
        max_generation = meta_spec['max_generation']
        pop_size = meta_spec['max_trial'] or calc_population_size(self.experiment)
        logger.info(f'EvolutionarySearch max_generation: {max_generation}, population size: {pop_size}')
        trial_data_dict = {}
        config_hash = {}  # config hash_str to trial_index

        toolbox = self.init_deap()
        population = toolbox.population(n=pop_size)
        for gen in range(1, max_generation + 1):
            logger.info(f'Running generation: {gen}/{max_generation}')
            ray_id_to_config = {}
            pending_ids = []
            for individual in population:
                config = dict(individual.items())
                hash_str = util.to_json(config, indent=0)
                if hash_str not in config_hash:
                    trial_index = self.experiment.info_space.tick('trial')['trial']
                    config_hash[hash_str] = config['trial_index'] = trial_index
                    ray_id = run_trial.remote(self.experiment, config)
                    ray_id_to_config[ray_id] = config
                    pending_ids.append(ray_id)
                individual['trial_index'] = config_hash[hash_str]

            trial_data_dict.update(get_ray_results(pending_ids, ray_id_to_config))

            for individual in population:
                trial_index = individual.pop('trial_index')
                trial_data = trial_data_dict.get(trial_index, {'fitness': 0})  # if trial errored
                individual.fitness.values = trial_data['fitness'],

            preview = 'Fittest of population preview:'
            for individual in tools.selBest(population, k=min(10, pop_size)):
                preview += f'\nfitness: {individual.fitness.values[0]}, {individual}'
            logger.info(preview)

            # prepare offspring for next generation
            if gen < max_generation:
                population = toolbox.select(population, len(population))
                # Vary the pool of individuals
                population = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.5)

        ray.worker.cleanup()
        return trial_data_dict
示例#29
0
def generate_specs(spec, const='agent'):
    '''
    Generate benchmark specs with compatible  discrete/continuous/both types:
    - take a spec
    - for each in benchmark envs
        - use the template env spec to update spec
        - append to benchmark specs
    Interchange agent and env for the reversed benchmark.
    '''
    if const == 'agent':
        const_name = ps.get(spec, 'agent.0.algorithm.name')
        variant = 'env'
    else:
        const_name = ps.get(spec, 'env.0.name')
        variant = 'agent'

    filepath = f'{spec_util.SPEC_DIR}/benchmark_{const_name}.json'
    if os.path.exists(filepath):
        logger.info(f'Benchmark for {const_name} exists at {filepath} already, not overwriting.')
        benchmark_specs = util.read(filepath)
        return benchmark_specs

    logger.info(f'Generating benchmark for {const_name}')
    benchmark_variants = []
    benchmark_specs = {}
    for dist_cont, const_names in BENCHMARK[const].items():
        if const_name in const_names:
            benchmark_variants.extend(BENCHMARK[variant][dist_cont])
    for vary_name in benchmark_variants:
        vary_spec = ENV_TEMPLATES[vary_name]
        spec_name = f'{const_name}_{vary_name}'
        benchmark_spec = spec.copy()
        benchmark_spec['name'] = spec_name
        benchmark_spec[variant] = [vary_spec]
        benchmark_specs[spec_name] = benchmark_spec

    util.write(benchmark_specs, filepath)
    logger.info(f'Benchmark for {const_name} written to {filepath}.')
    return benchmark_specs
示例#30
0
 def close(self):
     logger.info(f'Trial {self.index} done')
示例#31
0
    def init_nets(self):
        '''
        Initialize the neural networks used to learn the actor and critic from the spec
        Below we automatically select an appropriate net based on two different conditions
        1. If the action space is discrete or continuous action
            - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
            - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
        2. If the actor and critic are separate or share weights
            - If the networks share weights then the single network returns a list.
            - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network.
            - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network.
        3. If the network type is feedforward, convolutional, or recurrent
            - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory
            - Recurrent networks take n states as input and require an OnPolicySeqReplay or OnPolicySeqBatchReplay memory
        '''
        net_type = self.net_spec['type']
        # options of net_type are {MLPNet, ConvNet, RecurrentNet} x {Shared, Separate}
        in_dim = self.body.state_dim
        if self.body.is_discrete:
            if 'Shared' in net_type:
                self.share_architecture = True
                out_dim = [self.body.action_dim, 1]
            else:
                assert 'Separate' in net_type
                self.share_architecture = False
                out_dim = self.body.action_dim
                critic_out_dim = 1
        else:
            if 'Shared' in net_type:
                self.share_architecture = True
                out_dim = [self.body.action_dim, self.body.action_dim, 1]
            else:
                assert 'Separate' in net_type
                self.share_architecture = False
                out_dim = [self.body.action_dim, self.body.action_dim]
                critic_out_dim = 1

        self.net_spec['type'] = net_type = net_type.replace('Shared', '').replace('Separate', '')
        if 'MLP' in net_type and ps.is_list(out_dim) and len(out_dim) > 1:
            self.net_spec['type'] = 'MLPHeterogenousTails'

        actor_net_spec = self.net_spec.copy()
        critic_net_spec = self.net_spec.copy()
        for k in self.net_spec:
            if 'actor_' in k:
                actor_net_spec[k.replace('actor_', '')] = actor_net_spec.pop(k)
                critic_net_spec.pop(k)
            if 'critic_' in k:
                critic_net_spec[k.replace('critic_', '')] = critic_net_spec.pop(k)
                actor_net_spec.pop(k)

        NetClass = getattr(net, self.net_spec['type'])
        # properly set net_spec and action_dim for actor, critic nets
        if self.share_architecture:
            # net = actor_critic as one
            self.net = NetClass(actor_net_spec, self, in_dim, out_dim)
        else:
            # main net = actor
            self.net = NetClass(actor_net_spec, self, in_dim, out_dim)
            if critic_net_spec['use_same_optim']:
                critic_net_spec = actor_net_spec
            self.critic = NetClass(critic_net_spec, self, in_dim, critic_out_dim)
        logger.info(f'Training on gpu: {self.net.gpu}')
示例#32
0
 def close(self):
     logger.info('Experiment done, closing.')
示例#33
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     self.flat_nonan_body_a = util.flatten_nonan(self.body_a)
     self.algorithm.post_body_init()
     logger.info(util.self_desc(self))
示例#34
0
 def close(self):
     logger.info('EnvSpace.close')
     for env in self.envs:
         env.close()
示例#35
0
 def close(self):
     reload(search)  # fixes ray consecutive run crashing due to bad cleanup
     logger.info('Experiment done and closed.')
示例#36
0
文件: base.py 项目: vmuthuk2/SLM-Lab
 def print_memory_info(self):
     '''Prints size of all of the memory arrays'''
     for k in self.data_keys:
         d = getattr(self, k)
         logger.info(f'Memory for body {self.body.aeb}: {k} :shape: {d.shape}, dtype: {d.dtype}, size: {util.sizeof(d)}MB')
示例#37
0
def save_trial_data(trial_spec, trial_df):
    spec_name = trial_spec['name']
    prepath = f'data/{spec_name}/{spec_name}_{util.get_timestamp()}'
    logger.info(f'Saving trial data to {prepath}_*')
    util.write(trial_spec, f'{prepath}_spec.json')
示例#38
0
def retro_analyze_sessions(predir):
    '''Retro analyze all sessions'''
    logger.info('Running retro_analyze_sessions')
    session_spec_paths = glob(f'{predir}/*_s*_spec.json')
    util.parallelize(_retro_analyze_session, [(p,) for p in session_spec_paths], num_cpus=util.NUM_CPUS)
示例#39
0
def save_trial_data(spec, info_space, trial_fitness_df):
    '''Save the trial data: spec, trial_fitness_df.'''
    prepath = get_prepath(spec, info_space, unit='trial')
    logger.info(f'Saving trial data to {prepath}')
    util.write(trial_fitness_df, f'{prepath}_trial_fitness_df.csv')
示例#40
0
 def save(self, epi=None):
     '''Save net models for algorithm given the required property self.net_names'''
     if not hasattr(self, 'net_names'):
         logger.info('No net declared in self.net_names in init_nets(); no models to save.')
     else:
         net_util.save_algorithm(self, epi=epi)
示例#41
0
 def post_body_init(self):
     '''Run init for agent, env components that need bodies to exist first, e.g. memory or architecture.'''
     self.clock = self.env_space.get_base_clock()
     logger.info(util.self_desc(self))
     self.agent_space.post_body_init()
     self.env_space.post_body_init()
示例#42
0
 def update_lr(self):
     assert 'lr' in self.optim_param
     old_lr = self.optim_param['lr']
     self.optim_param['lr'] = old_lr * 0.9
     logger.info(f'Learning rate decayed from {old_lr} to {self.optim_param["lr"]}')
     self.optim = net_util.get_optim_multinet(self.params, self.optim_param)
示例#43
0
 def close(self):
     logger.info('Trial done and closed.')
示例#44
0
 def close(self):
     logger.info('Experiment done')
示例#45
0
 def close(self):
     logger.info('AgentSpace.close')
     for agent in self.agents:
         agent.close()
示例#46
0
 def post_body_init(self):
     '''Initializes the part of algorithm needing a body to exist first.'''
     self.body = self.agent.nanflat_body_a[0]  # single-body algo
     self.init_algorithm_params()
     self.init_nets()
     logger.info(util.self_desc(self))
示例#47
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     for env in self.envs:
         env.post_body_init()
     logger.info(util.self_desc(self))
示例#48
0
 def init_tensorboard(self):
     if not hasattr(self, 'tb_writer'):
         log_prepath = self.spec['meta']['log_prepath']
         self.tb_writer = SummaryWriter(os.path.dirname(log_prepath), filename_suffix=os.path.basename(log_prepath))
         self.tb_actions = []  # store actions for tensorboard
         logger.info(f'Using TensorBoard logging for dev mode. Run `tensorboard --logdir={log_prepath}` to start TensorBoard.')
示例#49
0
 def __init__(self, DistSessionClass, spec, info_space, global_nets):
     super(DistSession, self).__init__()
     self.name = f'w{info_space.get("session")}'
     self.session = DistSessionClass(spec, info_space, global_nets)
     logger.info(f'Initialized DistSession {self.session.index}')
示例#50
0
 def log_summary(self):
     '''Log the summary for this body when its environment is done'''
     prefix = self.get_log_prefix()
     memory = self.memory
     msg = f'{prefix}, loss: {self.last_loss:.8f}, total_reward: {memory.total_reward:.4f}, last-{memory.avg_window}-epi avg: {memory.avg_total_reward:.4f}'
     logger.info(msg)
示例#51
0
 def init_nets(self):
     '''Initialize the neural networks used to learn the actor and critic from the spec'''
     body = self.agent.nanflat_body_a[0]  # singleton algo
     state_dim = body.state_dim
     action_dim = body.action_dim
     self.is_discrete = body.is_discrete
     net_spec = self.agent.spec['net']
     mem_spec = self.agent.spec['memory']
     net_type = self.agent.spec['net']['type']
     actor_kwargs = util.compact_dict(
         dict(
             hid_layers_activation=_.get(net_spec, 'hid_layers_activation'),
             optim_param=_.get(net_spec, 'optim_actor'),
             loss_param=_.get(net_spec,
                              'loss'),  # Note: Not used for training actor
             clamp_grad=_.get(net_spec, 'clamp_grad'),
             clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
             gpu=_.get(net_spec, 'gpu'),
         ))
     if self.agent.spec['net']['use_same_optim']:
         logger.info('Using same optimizer for actor and critic')
         critic_kwargs = actor_kwargs
     else:
         logger.info('Using different optimizer for actor and critic')
         critic_kwargs = util.compact_dict(
             dict(
                 hid_layers_activation=_.get(net_spec,
                                             'hid_layers_activation'),
                 optim_param=_.get(net_spec, 'optim_critic'),
                 loss_param=_.get(net_spec, 'loss'),
                 clamp_grad=_.get(net_spec, 'clamp_grad'),
                 clamp_grad_val=_.get(net_spec, 'clamp_grad_val'),
                 gpu=_.get(net_spec, 'gpu'),
             ))
     '''
      Below we automatically select an appropriate net based on two different conditions
        1. If the action space is discrete or continuous action
                - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution.
                - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions
        2. If the actor and critic are separate or share weights
                - If the networks share weights then the single network returns a list.
                     - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network.
                     - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network.
        3. If the network type is feedforward, convolutional, or recurrent
                 - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory
                 - Recurrent networks take n states as input and require an OnPolicyNStepReplay or OnPolicyNStepBatchReplay memory
     '''
     if net_type == 'MLPseparate':
         self.is_shared_architecture = False
         self.is_recurrent = False
         if self.is_discrete:
             self.actor = getattr(net, 'MLPNet')(state_dim,
                                                 net_spec['hid_layers'],
                                                 action_dim, **actor_kwargs)
             logger.info(
                 "Feedforward net, discrete action space, actor and critic are separate networks"
             )
         else:
             self.actor = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim], **actor_kwargs)
             logger.info(
                 "Feedforward net, continuous action space, actor and critic are separate networks"
             )
         self.critic = getattr(net,
                               'MLPNet')(state_dim, net_spec['hid_layers'],
                                         1, **critic_kwargs)
     elif net_type == 'MLPshared':
         self.is_shared_architecture = True
         self.is_recurrent = False
         if self.is_discrete:
             self.actorcritic = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'], [action_dim, 1],
                 **actor_kwargs)
             logger.info(
                 "Feedforward net, discrete action space, actor and critic combined into single network, sharing params"
             )
         else:
             self.actorcritic = getattr(net, 'MLPHeterogenousHeads')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim, 1], **actor_kwargs)
             logger.info(
                 "Feedforward net, continuous action space, actor and critic combined into single network, sharing params"
             )
     elif net_type == 'Convseparate':
         self.is_shared_architecture = False
         self.is_recurrent = False
         if self.is_discrete:
             self.actor = getattr(net,
                                  'ConvNet')(state_dim,
                                             net_spec['hid_layers'],
                                             action_dim, **actor_kwargs)
             logger.info(
                 "Convolutional net, discrete action space, actor and critic are separate networks"
             )
         else:
             self.actor = getattr(net, 'ConvNet')(state_dim,
                                                  net_spec['hid_layers'],
                                                  [action_dim, action_dim],
                                                  **actor_kwargs)
             logger.info(
                 "Convolutional net, continuous action space, actor and critic are separate networks"
             )
         self.critic = getattr(net,
                               'ConvNet')(state_dim, net_spec['hid_layers'],
                                          1, **critic_kwargs)
     elif net_type == 'Convshared':
         self.is_shared_architecture = True
         self.is_recurrent = False
         if self.is_discrete:
             self.actorcritic = getattr(net,
                                        'ConvNet')(state_dim,
                                                   net_spec['hid_layers'],
                                                   [action_dim, 1],
                                                   **actor_kwargs)
             logger.info(
                 "Convolutional net, discrete action space, actor and critic combined into single network, sharing params"
             )
         else:
             self.actorcritic = getattr(net, 'ConvNet')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim, 1], **actor_kwargs)
             logger.info(
                 "Convolutional net, continuous action space, actor and critic combined into single network, sharing params"
             )
     elif net_type == 'Recurrentseparate':
         self.is_shared_architecture = False
         self.is_recurrent = True
         if self.is_discrete:
             self.actor = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'], action_dim,
                 mem_spec['length_history'], **actor_kwargs)
             logger.info(
                 "Recurrent net, discrete action space, actor and critic are separate networks"
             )
         else:
             self.actor = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim], mem_spec['length_history'],
                 **actor_kwargs)
             logger.info(
                 "Recurrent net, continuous action space, actor and critic are separate networks"
             )
         self.critic = getattr(net,
                               'RecurrentNet')(state_dim,
                                               net_spec['hid_layers'], 1,
                                               mem_spec['length_history'],
                                               **critic_kwargs)
     elif net_type == 'Recurrentshared':
         self.is_shared_architecture = True
         self.is_recurrent = True
         if self.is_discrete:
             self.actorcritic = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'], [action_dim, 1],
                 mem_spec['length_history'], **actor_kwargs)
             logger.info(
                 "Recurrent net, discrete action space, actor and critic combined into single network, sharing params"
             )
         else:
             self.actorcritic = getattr(net, 'RecurrentNet')(
                 state_dim, net_spec['hid_layers'],
                 [action_dim, action_dim, 1], mem_spec['length_history'],
                 **actor_kwargs)
             logger.info(
                 "Recurrent net, continuous action space, actor and critic combined into single network, sharing params"
             )
     else:
         logger.warn(
             "Incorrect network type. Please use 'MLPshared', MLPseparate', Recurrentshared, or Recurrentseparate."
         )
         raise NotImplementedError
示例#52
0
 def log_metrics(self, metrics, df_mode):
     '''Log session metrics'''
     prefix = self.get_log_prefix()
     row_str = '  '.join([f'{k}: {v:g}' for k, v in metrics.items()])
     msg = f'{prefix} [{df_mode}_df metrics] {row_str}'
     logger.info(msg)
示例#53
0
 def close(self):
     logger.info('Trial done, closing.')
示例#54
0
def save(net, model_path):
    '''Save model weights to path'''
    torch.save(net.state_dict(), util.smart_path(model_path))
    logger.info(f'Saved model to {model_path}')
示例#55
0
 def load(self):
     '''Load net models for algorithm given the required property self.net_names'''
     if not hasattr(self, 'net_names'):
         logger.info('No net declared in self.net_names in init_nets(); no models to load.')
     else:
         net_util.load_algorithm(self)
示例#56
0
def load(net, model_path):
    '''Save model weights from a path into a net module'''
    net.load_state_dict(torch.load(util.smart_path(model_path)))
    logger.info(f'Loaded model from {model_path}')
示例#57
0
def save_trial_data(spec, info_space, trial_fitness_df, trial_fig):
    '''Save the trial data: spec, trial_fitness_df.'''
    prepath = util.get_prepath(spec, info_space, unit='trial')
    logger.info(f'Saving trial data to {prepath}')
    util.write(trial_fitness_df, f'{prepath}_trial_fitness_df.csv')
    viz.save_image(trial_fig, f'{prepath}_trial_graph.png')
示例#58
0
 def run_distributed_sessions(self):
     logger.info('Running distributed sessions')
     global_nets = self.init_global_nets()
     session_datas = self.parallelize_sessions(global_nets)
     return session_datas
示例#59
0
 def load(self):
     '''Load net models for algorithm given the required property self.net_names'''
     if not hasattr(self, 'net_names'):
         logger.info('No net declared in self.net_names in init_nets(); no models to load.')
     else:
         net_util.load_algorithm(self)
示例#60
0
 def post_body_init(self):
     '''Initializes the part of algorithm needing a body to exist first.'''
     self.init_nets()
     self.init_algo_params()
     logger.info(util.self_desc(self))