Exemplo n.º 1
0
def make_experiment(exp_name, zip_project=False, track_git=True):
    # create experiment :: need to add any information here that is relevant.
    assert not exp_exists(exp_name), """experiment with this name already exists, either remove the existing version or rename the new launch."""
    arg_dict = {}
    expID = make_id(32)
    arg_dict['expID'] = expID
    arg_dict['script'] = sys.argv[0] # TODO: check that this is the correct approach.
    _logger.configure(LOG_ROOT, prefix=exp_name)
    if zip_project:
        dir_zip(PROJECT_ROOT, output_file='source.zip', excludes=["*.ckpt*", "*tmp_dir*", "*.mp4", "*.png", "*data*", "*.pkl", "*.git*"])
        shutil.move('source.zip', os.path.join(LOG_ROOT, exp_name + '/'  + 'source.zip'))
    if track_git:
        arg_dict['gitcommit'] = get_gitcommit()
        arg_dict['gitbranch'] = get_gitbranch()
        
    _logger.log_params(Args={'expID': expID})
    timestamp = datetime.datetime.now().strftime("%x :: %X")
   
    exp_doc = {
        'expID': expID, 'expName': exp_name, 
        'logdir': os.path.join(LOG_ROOT, exp_name),
        'time': timestamp    
    }

    coll = get_exp_coll()
    coll.insert(exp_doc)
    return expID
def adapt_and_test():
    import os
    import dill
    from playground.maml.maml_torch.maml_multi_step import FunctionalMLP

    logger.configure(log_directory=Args.log_dir, prefix=Args.log_prefix)
    logger.log_params(Args=vars(Args))

    # load weights
    with open(os.path.join(Args.log_dir, Args.log_prefix, Args.weight_path),
              'rb') as f:
        weights = dill.load(f)
    model = FunctionalMLP(1, 1)

    losses = DefaultBear(list)
    for amp, task in amp_tasks:
        model.params.update({
            k: t.tensor(v, requires_grad=True, dtype=t.double).to(device)
            for k, v in weights[0].items()
        })
        sgd = t.optim.SGD(model.parameters(), lr=Args.learning_rate)
        proper = t.tensor(task.proper()).to(device)
        samples = t.tensor(task.samples(Args.k_shot)).to(device)

        for grad_ind in range(Args.grad_steps):
            with t.no_grad():
                xs, labels = proper
                ys = model(xs.unsqueeze(-1))
                loss = model.criteria(ys, labels.unsqueeze(-1))
                logger.log(grad_ind,
                           loss=loss.item(),
                           silent=grad_ind != Args.grad_steps - 1)
                losses[f"amp-{amp:.2f}-loss"].append(loss.item())

            xs, labels = samples
            ys = model(xs.unsqueeze(-1))
            loss = model.criteria(ys, labels.unsqueeze(-1))
            sgd.zero_grad()
            loss.backward()
            sgd.step()
        # losses = np.array([v for k, v in losses.items()])

    import matplotlib.pyplot as plt
    fig = plt.figure()
    plt.title(f'Learning Curves')
    for amp, task in amp_tasks:
        plt.plot(losses[f"amp-{amp:.2f}-loss"], label=f"amp {amp:.2f}")
    plt.legend()
    logger.log_pyplot(None, key=f"losses/learning_curves_amp.png", fig=fig)
    plt.close()

    average_losses = np.array(
        [losses[f"amp-{amp:.2f}-loss"] for amp, task in amp_tasks])
    fig = plt.figure()
    plt.title(f'Learning Curves Averaged amp ~ [5 - 10]')
    plt.plot(average_losses.mean(0))
    plt.ylim(0, 28)
    logger.log_pyplot(None, key=f"losses/learning_curves_amp_all.png", fig=fig)
    plt.close()
Exemplo n.º 3
0
def test(setup):
    d = Color(3.1415926, 'red')
    s = "{:.1}".format(d)
    print(s)

    logger.log_params(G=dict(some_config="hey"))
    logger.log(step=0, some=Color(0.1, 'yellow'))
    logger.log(step=1, some=Color(0.28571, 'yellow', lambda v: "{:.5f}%".format(v * 100)))
    logger.log(step=2, some=Color(0.85, 'yellow', percent))
    logger.log({"some_var/smooth": 10}, some=Color(0.85, 'yellow', percent), step=3)
    logger.log(step=4, some=Color(10, 'yellow'))
Exemplo n.º 4
0
def launch_maml_mlp(log_prefix=None, **_G):
    G.log_prefix = log_prefix or f'{now:%Y-%m-%d}/debug-maml-baselines/sinusoid-maml-mlp'
    G.update(_G)

    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G))

    np.random.seed(G.seed)
    t.manual_seed(G.seed)
    t.cuda.manual_seed(G.seed)

    maml(test_fn=standard_sine_test)
Exemplo n.º 5
0
def launch_maml_lstm(log_prefix=None, **_G):
    G.log_prefix = log_prefix or f'{now:%Y-%m-%d}/debug-maml-baselines/sinusoid-maml-lstm'
    G.update(_G)

    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G))

    np.random.seed(G.seed)
    t.manual_seed(G.seed)
    t.cuda.manual_seed(G.seed)

    auto_rnn = FunctionalLSTM(1, 1, 10)
    maml(model=auto_rnn, test_fn=standard_sine_test)
Exemplo n.º 6
0
def launch_reptile_auto_gru(log_prefix=None, **_G):
    G.log_prefix = log_prefix or f'{now:%Y-%m-%d}/debug-maml-baselines/sinusoid-reptile-auto-gru'
    G.update(_G)

    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G))

    np.random.seed(G.seed)
    t.manual_seed(G.seed)
    t.cuda.manual_seed(G.seed)

    auto_rnn = FunctionalAutoGRU(1, 1, 10)
    reptile(model=auto_rnn, test_fn=standard_sine_test)
Exemplo n.º 7
0
def run_e_maml():
    # print(config.RUN.log_directory)
    # if config.G.run_mode == "e_maml":
    #     print('{G.inner_alg} E-MAML'.format(G=config.G))
    # elif config.G.run_mode == "maml":
    #     print('{G.inner_alg} Vanilla MAML'.format(G=config.G))

    # todo: let's take the control of the log director away from the train script. It should all be set from outside.
    logger.configure(log_directory=config.RUN.log_directory,
                     prefix=f"run_maml-{config.G.seed}")
    logger.log_params(RUN=vars(config.RUN),
                      G=vars(config.G),
                      Reporting=vars(config.Reporting),
                      DEBUG=vars(config.DEBUG))

    import sys
    print(" ".join(sys.argv))

    tasks = MetaRLTasks(env_name=config.G.env_name,
                        batch_size=config.G.n_parallel_envs,
                        start_seed=config.G.start_seed,
                        task_seed=config.G.task_seed,
                        log_directory=(config.RUN.log_directory +
                                       "/{seed}") if config.G.render else None,
                        max_steps=config.G.env_max_timesteps)

    test_tasks = MetaRLTasks(env_name=config.G.env_name, batch_size=config.G.n_parallel_envs,
                             start_seed=config.G.test_start_seed,
                             task_seed=config.G.test_task_seed,
                             log_directory=(config.RUN.log_directory + "/{seed}") if config.G.render else None,
                             max_steps=config.G.env_max_timesteps) if config.G.eval_test_interval \
        else ExitStack()

    # with Dashboard(config.RUN.prefix, server=config.Reporting.plot_server,
    #                port=config.Reporting.plot_server_port) as dash, U.single_threaded_session(), tasks, test_tasks:
    with U.make_session(num_cpu=config.G.n_cpu), tasks, test_tasks:
        # logger.on_dumpkvs(make_plot_fn(dash))
        maml = E_MAML(ob_space=tasks.envs.observation_space,
                      act_space=tasks.envs.action_space)
        summary = tf.summary.FileWriter(config.RUN.log_directory,
                                        tf.get_default_graph())
        summary.flush()
        trainer = Trainer()
        U.initialize()
        trainer.train(tasks=tasks, maml=maml, test_tasks=test_tasks)
        # logger.clear_callback()

    tf.reset_default_graph()
Exemplo n.º 8
0
def torch_upload():
    from ml_logger import logger
    import numpy as np

    logger.configure(root_dir="http://54.71.92.65:9080", prefix="geyang/ml_logger-debug/test-1",
                     register_experiment=True)
    logger.log_params(args={})

    with logger.Sync():
        import os
        import torch
        from pycurl import Curl
        from tempfile import NamedTemporaryFile

        logger.remove('upload/example.pt')

        with NamedTemporaryFile(delete=True) as f:
            torch.save(np.ones([10_000_000]), f)
            # torch.save(np.ones([1000_000]), f)
            logger.print(f.name)

            c = Curl()
            c.setopt(c.URL, logger.root_dir)
            # proxy = os.environ.get('HTTP_PROXY')
            # c.setopt(c.PROXY, proxy)
            # logger.print('proxy:', proxy)
            c.setopt(c.TIMEOUT, 100000)
            c.setopt(c.HTTPPOST, [
                ('file', (
                    c.FORM_FILE, f.name,
                    c.FORM_FILENAME, logger.prefix + '/upload/example.pt',
                    c.FORM_CONTENTTYPE, 'plain/text',
                )),
            ])
            c.perform()
            c.close()

        logger.print('done')


        # logger.remove(".")
        # a = np.ones([1, 1, 100_000_000 // 4])
        # logger.print(f"the size of the tensor is {a.size}")
        # data = dict(key="ok", large=a)
        # logger.torch_save(data, f"save/data-{logger.now('%H.%M.%S')}.pkl")
    logger.print('done')
Exemplo n.º 9
0
def run_e_maml(_G=None):
    import baselines.common.tf_util as U
    if _G is not None:
        config.G.update(_G)

    for k, v in [
            *vars(config.RUN).items(), *vars(config.G).items(),
            *vars(config.Reporting).items(), *vars(config.DEBUG).items()
    ]:
        comet_logger.log_parameter(k, v)

    # todo: let's take the control of the log director away from the train script. It should all be set from outside.
    logger.configure(log_directory=config.RUN.log_dir,
                     prefix=config.RUN.log_prefix)
    logger.log_params(RUN=vars(config.RUN),
                      G=vars(config.G),
                      Reporting=vars(config.Reporting),
                      DEBUG=vars(config.DEBUG))
    logger.log_file(__file__)

    tasks = MetaRLTasks(env_name=config.G.env_name,
                        batch_size=config.G.n_parallel_envs,
                        start_seed=config.G.start_seed,
                        log_directory=(config.RUN.log_directory +
                                       "/{seed}") if config.G.render else None,
                        max_steps=config.G.env_max_timesteps)

    # sess_config = tf.ConfigProto(log_device_placement=config.Reporting.log_device_placement)
    # with tf.Session(config=sess_config), tf.device('/gpu:0'), tasks:
    graph = tf.Graph()
    with graph.as_default(), U.make_session(num_cpu=config.G.n_cpu), tasks:
        maml = E_MAML(ob_space=tasks.envs.observation_space,
                      act_space=tasks.envs.action_space)
        comet_logger.set_model_graph(tf.get_default_graph())

        # writer = tf.summary.FileWriter(logdir='/opt/project/debug-graph', graph=graph)
        # writer.flush()
        # exit()

        trainer = Trainer()
        U.initialize()
        trainer.train(tasks=tasks, maml=maml)
        logger.flush()

    tf.reset_default_graph()
        def _fit_by_cv_and_eval(estimator_key, conf_dict):
            estimator, param_grid, param_dict_init = _initialize_model_cv(estimator_key, conf_dict, verbose=True)

            # 1) perform cross-validation hyperparam search to select params
            selected_params = estimator.fit_by_cv(X_train, Y_train, param_grid=param_grid, n_folds=n_folds,
                                                  n_jobs=n_jobs_inner, random_state=rds)

            logger.log("%s selected params:"%estimator_key)
            logger.log_params(**selected_params)
            # 2) evaluate selected params with different initializations
            param_dict_init.update(selected_params)

            logger.log("evaluating %s parameters with %i seeds"%(estimator_key, len(eval_seeds)))
            scores = _evaluate_params(estimator.__class__, param_dict_init, X_train, Y_train, X_valid, Y_valid,
                                      seeds=eval_seeds)

            cv_result_dict[estimator_key] = {'selected_params': selected_params, 'scores': scores, 'eval_seeds': eval_seeds}
            logger.log("evaluation scores for %s: %s" % (estimator_key, str(scores)))
Exemplo n.º 11
0
def launch(**_G):
    import matplotlib

    matplotlib.use('Agg')

    G.update(_G)

    import numpy as np
    np.random.seed(G.seed)
    t.manual_seed(G.seed)
    t.cuda.manual_seed(G.seed)

    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)

    logger.log_params(G=vars(G))

    model = Model(**vars(G))
    from playground.maml.maml_torch.tasks import Sine
    maml_supervised(model, Sine, **vars(G))
Exemplo n.º 12
0
def run_maml(_G=None):
    if _G is not None:
        G.update(_G)

    for k, v in vars(G).items():
        comet_logger.log_parameter(k, v)

    # todo: let's take the control of the log director away from the train script. It should all be set from outside.
    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G), )
    logger.log_file(__file__)

    tasks = MetaRLTasks(env_name=G.env_name,
                        batch_size=G.n_parallel_envs,
                        start_seed=G.start_seed,
                        max_steps=G.env_max_timesteps)

    env = tasks.sample()

    print(env)
Exemplo n.º 13
0
def launch_training():
    from playground.maml.maml_torch.maml_multi_step import maml, G

    np.random.seed(G.seed)
    t.manual_seed(G.seed)
    t.cuda.manual_seed(G.seed)

    from datetime import datetime

    now = datetime.now()
    G.log_prefix = f"{now:%Y-%m-%d}/new-maml-torch/out-of-distribution"
    G.n_epochs = 70000  # from cbfinn universality paper
    G.n_gradient_steps = 5
    G.test_grad_steps = [1, 5]
    G.test_interval = 5
    G.save_interval = 100  # save the weights ever 100 epoch.

    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G))

    maml(test_fn=all_tests)
Exemplo n.º 14
0
def start_run(exp_name, params, runID=None):
    coll = get_exp_coll()
    if runID is None:
        runID = make_id(8)
    
    run_tstamp = datetime.datetime.now().strftime("%x :: %X")

    script_txt = ' '.join(sys.argv)
    run_doc = {
        'runID': runID,
        'params': params,
        'runpath': os.path.join(LOG_ROOT, exp_name + '/' + runID),
        'script': sys.argv[0],
        'command': script_txt,
        'timestamp': run_tstamp
    }
    _logger.configure(LOG_ROOT, prefix=exp_name + '/' + runID)  
    coll.update_one({'expName': exp_name}, {'$push': {'runs': run_doc}})
    _logger.log_params(Args=params)
    
    return _logger 
Exemplo n.º 15
0
    def thunk(*args, **kwargs):
        import traceback
        from ml_logger import logger

        assert not (args and ARGS), \
            f"can not use position argument at both thunk creation as well as run.\n" \
            f"_args: {args}\n" \
            f"ARGS: {ARGS}\n"

        logger.configure(root_dir=RUN.server,
                         prefix=PREFIX,
                         register_experiment=False,
                         max_workers=10)
        logger.log_params(host=dict(hostname=logger.hostname),
                          run=dict(status="running",
                                   startTime=logger.now(),
                                   job_id=logger.job_id))

        import time
        try:
            _KWARGS = {**KWARGS}
            _KWARGS.update(**kwargs)

            results = fn(*(args or ARGS), **_KWARGS)

            logger.log_line("========== execution is complete ==========")
            logger.log_params(
                run=dict(status="completed", completeTime=logger.now()))
            logger.flush()
            time.sleep(3)
        except Exception as e:
            tb = traceback.format_exc()
            with logger.SyncContext(
            ):  # Make sure uploaded finished before termination.
                logger.print(tb, color="red")
                logger.log_text(tb, filename="traceback.err")
                logger.log_params(
                    run=dict(status="error", exitTime=logger.now()))
                logger.flush()
            time.sleep(3)
            raise e

        return results
Exemplo n.º 16
0
    def _(*args, **kwargs):
        import traceback
        from ml_logger import logger

        assert not (args and ARGS), f"can not use position argument at both thunk creation as well as " \
            f"run.\n_args: {args}\nARGS: {ARGS}"

        logger.configure(log_directory=RUN.server,
                         prefix=PREFIX,
                         register_experiment=False,
                         max_workers=10)
        logger.log_params(host=dict(hostname=logger.hostname),
                          run=dict(status="running", startTime=logger.now()))

        try:
            _KWARGS = KWARGS.copy()
            _KWARGS.update(kwargs)

            fn(*(args or ARGS), **_KWARGS)

            logger.log_line("========= execution is complete ==========")
            logger.log_params(
                run=dict(status="completed", completeTime=logger.now()))
        except Exception as e:
            import time
            time.sleep(1)
            tb = traceback.format_exc()
            with logger.SyncContext(
            ):  # Make sure uploaded finished before termination.
                logger.log_text(tb, filename="traceback.err")
                logger.log_params(
                    run=dict(status="error", exitTime=logger.now()))
                logger.log_line(tb)
                logger.flush()
            time.sleep(30)
            raise e

        import time
        time.sleep(30)
Exemplo n.º 17
0
    args = [
        dict(x=x, y=y, filename=f"images/{x:0.3f},{y:0.3f}.png")
        for x in np.linspace(-0.25, 0.25, 128)
    ]
    logger.log_data(args, 'index.pkl')

    for p in tqdm(args):
        x, y, filename = p['x'], p['y'], p['filename']
        env.set_state(np.array([x, y, 0, 0]), np.array([0, 0, 0, 0]))
        # env.do_simulation([0, 0], 1) # PointMass does not need this.
        image = env.render('grey', width=20, height=20)
        frames.append(image)
        logger.log_image(image, filename)

    print('saving video')
    logger.log_video(frames, f"{env_name}.mp4")
    print('done')


if __name__ == "__main__":
    import os
    from ml_logger import logger
    logger.log_params(some_namespace=dict(layer=10, learning_rate=0.0001))
    exit()

    # logger.configure(log_directory="/tmp/learning-to-imitate", prefix="envs")
    logger.configure(log_directory=os.path.abspath("../datasets"), prefix="")
    # logger.configure(log_directory="http://54.71.92.65:8081", prefix="debug/many_world/")

    [render(env) for env in envs]
Exemplo n.º 18
0
def train():
    from moleskin import moleskin as M

    M.tic('Full Run')
    if G.model == "lenet":
        model = Conv2d()
    elif G.model == 'mlp':
        model = Mlp()
    else:
        raise NotImplementedError('only lenet and mlp are allowed')
    model.train()
    print(model)

    G.log_prefix = f"mnist_{type(model).__name__}"
    logger.configure(log_directory=G.log_dir, prefix=G.log_prefix)
    logger.log_params(G=vars(G), Model=dict(architecture=str(model)))

    from torchvision import datasets, transforms

    trans = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, ), (1.0, ))])
    train_set = datasets.MNIST(root=G.data_dir,
                               train=True,
                               transform=trans,
                               download=True)
    test_set = datasets.MNIST(root=G.data_dir,
                              train=False,
                              transform=trans,
                              download=True)
    train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                               batch_size=G.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                              batch_size=G.batch_size,
                                              shuffle=False)

    celoss = nn.CrossEntropyLoss()
    adam = optim.SGD(model.parameters(), lr=G.learning_rate, momentum=0.9)
    for epoch in range(G.n_epochs):
        for it, (x, target) in enumerate(train_loader):
            adam.zero_grad()
            ys = model(x)
            loss = celoss(ys, target)
            loss.backward()
            adam.step()

            if it % G.test_interval == 0:
                with h.Eval(model), torch.no_grad():
                    accuracy = h.Average()
                    for x, label in test_loader:
                        acc = h.cast(
                            h.one_hot_to_int(model(x).detach()) == label,
                            float).sum() / len(x)
                        accuracy.add(acc.detach().numpy())
                logger.log(float(epoch) + it / len(train_loader),
                           accuracy=accuracy.value)

        M.split("epoch")
        # logger.log(epoch, it=it, loss=loss.detach().numpy())
    M.toc('Full Run')
Exemplo n.º 19
0
def train(deps=None, **kwargs):
    from ml_logger import logger
    from dmc_gen.config import Args

    Args._update(deps, **kwargs)
    logger.log_params(Args=vars(Args))

    utils.set_seed_everywhere(Args.seed)
    wrappers.VideoWrapper.prefix = wrappers.ColorWrapper.prefix = DMCGEN_DATA

    # Initialize environments
    image_size = 84 if Args.algo == 'sac' else 100
    env = wrappers.make_env(
        domain_name=Args.domain,
        task_name=Args.task,
        seed=Args.seed,
        episode_length=Args.episode_length,
        action_repeat=Args.action_repeat,
        image_size=image_size,
    )
    test_env = wrappers.make_env(domain_name=Args.domain,
                                 task_name=Args.task,
                                 seed=Args.seed + 42,
                                 episode_length=Args.episode_length,
                                 action_repeat=Args.action_repeat,
                                 image_size=image_size,
                                 mode=Args.eval_mode)

    # Prepare agent
    cropped_obs_shape = (3 * Args.frame_stack, 84, 84)
    agent = make_agent(algo=Args.algo,
                       obs_shape=cropped_obs_shape,
                       act_shape=env.action_space.shape,
                       args=Args).to(Args.device)

    if Args.load_checkpoint:
        print('Loading from checkpoint:', Args.load_checkpoint)
        logger.load_module(agent,
                           path="models/*.pkl",
                           wd=Args.load_checkpoint,
                           map_location=Args.device)

    replay_buffer = utils.ReplayBuffer(obs_shape=env.observation_space.shape,
                                       action_shape=env.action_space.shape,
                                       capacity=Args.train_steps,
                                       batch_size=Args.batch_size)

    episode, episode_reward, episode_step, done = 0, 0, 0, True
    logger.start('train')
    for step in range(Args.start_step, Args.train_steps + 1):
        if done:
            if step > Args.start_step:
                logger.store_metrics({'dt_epoch': logger.split('train')})
                logger.log_metrics_summary(dict(step=step),
                                           default_stats='mean')

            # Evaluate agent periodically
            if step % Args.eval_freq == 0:
                logger.store_metrics(episode=episode)
                with logger.Prefix(metrics="eval/"):
                    evaluate(env,
                             agent,
                             Args.eval_episodes,
                             save_video=f"videos/{step:08d}_train.mp4")
                with logger.Prefix(metrics="test/"):
                    evaluate(test_env,
                             agent,
                             Args.eval_episodes,
                             save_video=f"videos/{step:08d}_test.mp4")
                logger.log_metrics_summary(dict(step=step),
                                           default_stats='mean')

            # Save agent periodically
            if step > Args.start_step and step % Args.save_freq == 0:
                with logger.Sync():
                    logger.save_module(agent, f"models/{step:06d}.pkl")
                if Args.save_last:
                    logger.remove(f"models/{step - Args.save_freq:06d}.pkl")
                # torch.save(agent, os.path.join(model_dir, f'{step}.pt'))

            logger.store_metrics(episode_reward=episode_reward,
                                 episode=episode + 1,
                                 prefix="train/")

            obs = env.reset()
            episode_reward, episode_step, done = 0, 0, False
            episode += 1

        # Sample action for data collection
        if step < Args.init_steps:
            action = env.action_space.sample()
        else:
            with utils.Eval(agent):
                action = agent.sample_action(obs)

        # Run training update
        if step >= Args.init_steps:
            num_updates = Args.init_steps if step == Args.init_steps else 1
            for _ in range(num_updates):
                agent.update(replay_buffer, step)

        # Take step
        next_obs, reward, done, _ = env.step(action)
        done_bool = 0 if episode_step + 1 == env._max_episode_steps else float(
            done)
        replay_buffer.add(obs, action, reward, next_obs, done_bool)
        episode_reward += reward
        obs = next_obs

        episode_step += 1

    logger.print(
        f'Completed training for {Args.domain}_{Args.task}/{Args.algo}/{Args.seed}'
    )
Exemplo n.º 20
0
    fn = lambda x: np.random.rand() + (1 + 0.001 * x) * np.sin(x * 0.1 / np.pi)
    fn_1 = lambda x: np.random.rand() + (1 + 0.001 * x) * np.sin(x * 0.04 / np.
                                                                 pi)

    for username in ["episodeyang", "amyzhang"]:
        for project in ['cpc-belief', 'playground']:
            for i in range(10):
                prefix = f"{DEBUG_DIR}/{username}/{project}/{'mdp/' if i < 5 else '/'}experiment_{i:02d}"
                logger.remove(prefix)

                logger.configure(prefix=prefix)

                logger.log_params(Args=dict(lr=10**(-2 - i),
                                            weight_decay=0.001,
                                            gradient_clip=0.9,
                                            env_id="GoalMassDiscreteIdLess-v0",
                                            seed=int(i * 100)))
                for ep in range(500 + 1):
                    logger.log_metrics(epoch=ep,
                                       sine=fn(ep),
                                       slow_sine=fn_1(ep))
                    logger.flush()
                    if ep % 10 == 0:
                        logger.log_image(face('gray'),
                                         f"figures/gray_{ep:04d}.png")
                        logger.log_image(face('rgb'),
                                         f"figures/rgb_{ep:04d}.png")

                logger.log_image(face('gray'), "figures/face_gray.png")
                logger.log_image(face('rgb'), "figures/face_rgb.png")
Exemplo n.º 21
0
def instr(fn, *ARGS, __file=False, __silent=False, **KWARGS):
    """
    thunk for configuring the logger. The reason why this is not a decorator is

    :param fn: function to be called
    :param *ARGS: position arguments for the call
    :param __file__: console mode, by-pass file related logging
    :param __silent: do not print
    :param **KWARGS: keyword arguments for the call
    :return: a thunk that can be called without parameters
    """
    from ml_logger import logger

    if __file:
        caller_script = pJoin(os.getcwd(), __file)
    else:
        launch_module = inspect.getmodule(inspect.stack()[1][0])
        __file = launch_module.__file__
        caller_script = abspath(__file)

    # note: for scripts in the `plan2vec` module this also works -- b/c we truncate fixed depth.
    script_path = logger.truncate(caller_script,
                                  depth=len(__file__.split('/')) - 1)
    file_stem = logger.stem(script_path)
    file_name = basename(file_stem)

    RUN(file_name=file_name, file_stem=file_stem, now=logger.now())

    PREFIX = RUN.PREFIX

    # todo: there should be a better way to log these.
    # todo: we shouldn't need to log to the same directory, and the directory for the run shouldn't be fixed.
    logger.configure(
        root_dir=RUN.server,
        prefix=PREFIX,
        asynchronous=False,  # use sync logger
        max_workers=4,
        register_experiment=False)
    if RUN.restart:
        with logger.Sync():
            logger.remove(".")
    logger.upload_file(caller_script)
    # the tension is in between creation vs run. Code snapshot are shared, but runs need to be unique.
    _ = dict()
    if ARGS:
        _['args'] = ARGS
    if KWARGS:
        _['kwargs'] = KWARGS

    logger.log_params(run=logger.run_info(status="created",
                                          script_path=script_path),
                      revision=logger.rev_info(),
                      fn=logger.fn_info(fn),
                      **_,
                      silent=__silent)

    logger.print(
        'taking diff, if this step takes too long, check if your '
        'uncommitted changes are too large.',
        color="green")
    logger.diff()
    if RUN.readme:
        logger.log_text(RUN.readme, "README.md", dedent=True)

    import jaynes  # now set the job name to prefix
    if jaynes.RUN.config and jaynes.RUN.mode != "local":
        runner_class, runner_args = jaynes.RUN.config['runner']
        if 'name' in runner_args:  # ssh mode does not have 'name'.
            runner_args['name'] = pJoin(file_name, RUN.JOB_NAME)
        del logger, jaynes, runner_args, runner_class
        if not __file:
            cprint(f'Set up job name', "green")

    def thunk(*args, **kwargs):
        import traceback
        from ml_logger import logger

        assert not (args and ARGS), \
            f"can not use position argument at both thunk creation as well as run.\n" \
            f"_args: {args}\n" \
            f"ARGS: {ARGS}\n"

        logger.configure(root_dir=RUN.server,
                         prefix=PREFIX,
                         register_experiment=False,
                         max_workers=10)
        logger.log_params(host=dict(hostname=logger.hostname),
                          run=dict(status="running",
                                   startTime=logger.now(),
                                   job_id=logger.job_id))

        import time
        try:
            _KWARGS = {**KWARGS}
            _KWARGS.update(**kwargs)

            results = fn(*(args or ARGS), **_KWARGS)

            logger.log_line("========== execution is complete ==========")
            logger.log_params(
                run=dict(status="completed", completeTime=logger.now()))
            logger.flush()
            time.sleep(3)
        except Exception as e:
            tb = traceback.format_exc()
            with logger.SyncContext(
            ):  # Make sure uploaded finished before termination.
                logger.print(tb, color="red")
                logger.log_text(tb, filename="traceback.err")
                logger.log_params(
                    run=dict(status="error", exitTime=logger.now()))
                logger.flush()
            time.sleep(3)
            raise e

        return results

    return thunk
Exemplo n.º 22
0
def thunk(fn, *ARGS, __prefix="", __timestamp='%H.%M/%S.%f', **KWARGS):
    """
    thunk for configuring the logger. The reason why this is not a decorator is

    :param fn: function to be called
    :param *ARGS: position arguments for the call
    :param __prefix: logging prefix for this run, default to "", where it does not do much.
    :param __timestamp: bool, default to True, whether post-fix with time stamps.
    :param **KWARGS: keyword arguments for the call
    :return: a thunk that can be called without parameters
    """
    from ml_logger import logger

    caller_script = abspath(inspect.getmodule(inspect.stack()[1][0]).__file__)
    # note: for scripts in the `plan2vec` module this also works -- b/c we truncate fixed depth.
    script_path = logger.truncate(caller_script,
                                  depth=len(__file__.split('/')) - 1)
    _ = [logger.now(__timestamp)] if __timestamp else []
    PREFIX = join(RUN.prefix, logger.stem(script_path), __prefix, *_)

    # todo: there should be a better way to log these.
    # todo: we shouldn't need to log to the same directory, and the directory for the run shouldn't be fixed.
    logger.configure(
        log_directory=RUN.server,
        prefix=PREFIX,
        asynchronous=False,  # use sync logger
        max_workers=4,
        register_experiment=False)
    # the tension is in between creation vs run. Code snapshot are shared, but runs need to be unique.
    logger.log_params(
        run=logger.run_info(status="created", script_path=script_path),
        revision=logger.rev_info(),
        fn=logger.fn_info(fn),
    )
    logger.log_params(args=ARGS, kwargs=KWARGS)
    logger.diff(silent=True)

    import jaynes  # now set the job name to prefix
    if jaynes.RUN.mode != "local":
        runner_class, runner_args = jaynes.RUN.config['runner']
        if 'name' in runner_args:  # ssh mode does not have 'name'.
            runner_args['name'] = PREFIX.replace("geyang/",
                                                 "")  # destroy my traces.
        del logger, jaynes, runner_args, runner_class
        cprint(f'{__file__}: Set up job name', "green")

    def _(*args, **kwargs):
        import traceback
        from ml_logger import logger

        assert not (args and ARGS), f"can not use position argument at both thunk creation as well as " \
            f"run.\n_args: {args}\nARGS: {ARGS}"

        logger.configure(log_directory=RUN.server,
                         prefix=PREFIX,
                         register_experiment=False,
                         max_workers=10)
        logger.log_params(host=dict(hostname=logger.hostname),
                          run=dict(status="running", startTime=logger.now()))

        try:
            _KWARGS = KWARGS.copy()
            _KWARGS.update(kwargs)

            fn(*(args or ARGS), **_KWARGS)

            logger.log_line("========= execution is complete ==========")
            logger.log_params(
                run=dict(status="completed", completeTime=logger.now()))
        except Exception as e:
            import time
            time.sleep(1)
            tb = traceback.format_exc()
            with logger.SyncContext(
            ):  # Make sure uploaded finished before termination.
                logger.log_text(tb, filename="traceback.err")
                logger.log_params(
                    run=dict(status="error", exitTime=logger.now()))
                logger.log_line(tb)
                logger.flush()
            time.sleep(30)
            raise e

        import time
        time.sleep(30)

    return _