Пример #1
0
def main(config):
    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices("GPU"):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy("mixed_float16"))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print("Logdir", config.logdir)

    # Create environments.
    datadir = config.logdir / "episodes"
    writer = tf.summary.create_file_writer(
        str(config.logdir), max_queue=1000, flush_millis=20000
    )
    writer.set_as_default()
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, "train", datadir, store=True),
            config.parallel,
        )
        for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, "test", datadir, store=False),
            config.parallel,
        )
        for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f"Prefill dataset with {prefill} steps.")
    random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None)
    tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
    writer.flush()

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f"Simulating agent for {config.steps-step} steps.")
    agent = Dreamer(config, datadir, actspace, writer)
    if (config.logdir / "variables.pkl").exists():
        print("Load checkpoint.")
        agent.load(config.logdir / "variables.pkl")
    state = None
    while step < config.steps:
        print("Start evaluation.")
        tools.simulate(functools.partial(agent, training=False), test_envs, episodes=1)
        writer.flush()
        print("Start collection.")
        steps = config.eval_every // config.action_repeat
        state = tools.simulate(agent, train_envs, steps, state=state)
        step = count_steps(datadir, config)
        agent.save(config.logdir / "variables.pkl")
    for env in train_envs + test_envs:
        env.close()
Пример #2
0
def main(config):
    print(config)
    os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_id)
    if config.gpu_growth:
        # for gpu in tf.config.experimental.list_physical_devices('GPU'):
        #   tf.config.experimental.set_memory_growth(gpu, True)
        print(tf.config.experimental.list_physical_devices('GPU'))
        tf.config.experimental.set_memory_growth(
            tf.config.experimental.list_physical_devices('GPU')[0], True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print('Logdir', config.logdir)

    # Create environments.
    datadir = config.logdir / 'episodes'
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'train', datadir, store=True),
            config.parallel) for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'test', datadir, store=False),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f'Prefill dataset with {prefill} steps.')
    random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None)
    tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
    writer.flush()

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f'Simulating agent for {config.steps-step} steps.')
    agent = Dreamer(config, datadir, actspace, writer)
    if (config.logdir / 'variables.pkl').exists():
        print('Load checkpoint.')
        agent.load(config.logdir / 'variables.pkl')
    state = None
    last_time = time.time()
    while step < config.steps:
        print("current_time is:", time.time() - last_time)
        last_time = time.time()
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        writer.flush()
        print('Start collection.')
        steps = config.eval_every // config.action_repeat
        state = tools.simulate(agent, train_envs, steps, state=state)
        step = count_steps(datadir, config)
        agent.save(config.logdir / 'variables.pkl')
    for env in train_envs + test_envs:
        env.close()
Пример #3
0
def main(config):
    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print('Logdir', config.logdir)

    # Create environments.
    datadir = config.logdir / 'episodes'
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    train_sim_envs = [
        wrappers.Async(
            lambda: make_env(config,
                             writer,
                             'sim_train',
                             datadir,
                             store=True,
                             real_world=False), config.parallel)
        for i in range(config.envs)
    ]
    if config.real_world_prob > 0:
        train_real_envs = [
            wrappers.Async(
                lambda: make_env(config,
                                 writer,
                                 'real_train',
                                 datadir,
                                 store=True,
                                 real_world=True), config.parallel)
            for _ in range(config.envs)
        ]
    else:
        train_real_envs = None
    test_envs = [
        wrappers.Async(
            lambda: make_env(
                config, writer, 'test', datadir, store=False, real_world=True),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_sim_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f'Prefill dataset with {prefill} steps.')
    random_agent = lambda o, d, _: ([actspace.sample() for _ in d], None)
    tools.simulate(random_agent, train_sim_envs,
                   prefill / config.action_repeat)
    writer.flush()
    train_real_step_target = config.sample_real_every * config.time_limit

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f'Simulating agent for {config.steps-step} steps.')
    agent = Dreamer(config, datadir, actspace, writer)
    if (config.logdir / 'variables.pkl').exists():
        print('Load checkpoint.')
        agent.load(config.logdir / 'variables.pkl')
    else:
        print("checkpoint not loaded")
        print(config.logdir / 'variables.pkl')
        print((config.logdir / 'variables.pkl').exists())
    state = None
    while step < config.steps:
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        writer.flush()
        steps = config.eval_every // config.action_repeat
        print('Start collection from simulator.')
        state = tools.simulate(agent, train_sim_envs, steps, state=state)
        if step >= train_real_step_target and train_real_envs is not None:
            print("Start collection from the real world")
            state = tools.simulate(agent,
                                   train_real_envs,
                                   episodes=1,
                                   state=state)
            train_real_step_target += config.sample_real_every * config.time_limit
        old_step = step
        step = count_steps(datadir, config)
        agent.save(config.logdir / 'variables.pkl')
    for env in train_sim_envs + test_envs:
        env.close()
    if train_real_envs is not None:
        for env in train_real_envs:
            env.close()
Пример #4
0
def main(config):
    print(config)

    #Set random seeds
    os.environ['PYTHONHASHSEED'] = str(config.seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    random.seed(config.seed)
    np.random.seed(config.seed)
    tf.random.set_seed(config.seed)

    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)

    config.logdir = config.logdir / config.task
    config.logdir = config.logdir / 'seed_{}'.format(config.seed)
    config.logdir.mkdir(parents=True, exist_ok=True)
    datadir = config.datadir
    tf_dir = config.logdir / 'tensorboard'
    writer = tf.summary.create_file_writer(str(tf_dir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()

    # Create environments.
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'train', '.', store=False),
            config.parallel) for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'test', '.', store=False),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Train and regularly evaluate the agent.
    agent = Lompo(config, datadir, actspace, writer)

    if agent._c.load_model:
        agent.load_model(config.logdir / 'final_model')
        print('Load pretarined model')
        if agent._c.load_buffer:
            agent.latent_buffer.load(agent._c.logdir / 'buffer.h5py')
        else:
            agent._process_data_to_latent()
            agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py')
    else:
        agent.fit_model(agent._c.model_train_steps)
        #agent.save_model(config.logdir)
        #agent._generate_real_data(steps = 5000)
        agent._process_data_to_latent()
        agent.latent_buffer.save(agent._c.logdir / 'buffer.h5py')

    if agent._c.load_agent:
        agent.load_agent(config.logdir)
        print('Load pretarined actor')

    while agent.latent_buffer._latent_stored_steps < agent._c.start_training:
        agent._generate_latent_data(next(agent._dataset))

    while agent._agent_step < int(config.agent_train_steps):
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        #agent._latent_evaluate(train_envs[0])
        writer.flush()
        print('Start collection.')
        agent.train_agent(agent._c.agent_itters_per_step)
        #agent._generate_real_data(steps = 5)

        if config.sample:
            agent._add_data(num_episodes=1)
        else:
            agent._process_data_to_latent(num_episodes=1)

    for env in train_envs + test_envs:
        env.close()
Пример #5
0
def main(config):
    if config.gpu_growth:
        for gpu in tf.config.experimental.list_physical_devices('GPU'):
            tf.config.experimental.set_memory_growth(gpu, True)
    assert config.precision in (16, 32), config.precision
    if config.precision == 16:
        prec.set_policy(prec.Policy('mixed_float16'))
    config.steps = int(config.steps)
    config.logdir.mkdir(parents=True, exist_ok=True)
    print('Logdir', config.logdir)

    arg_dict = vars(config).copy()
    del arg_dict['logdir']

    with open(os.path.join(config.logdir, 'args.json'), 'w') as fout:
        import json
        json.dump(arg_dict, fout)

    # Create environments.
    datadir = config.logdir / 'episodes'
    writer = tf.summary.create_file_writer(str(config.logdir),
                                           max_queue=1000,
                                           flush_millis=20000)
    writer.set_as_default()
    train_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'train', datadir, train=True),
            config.parallel) for _ in range(config.envs)
    ]
    test_envs = [
        wrappers.Async(
            lambda: make_env(config, writer, 'test', datadir, train=False),
            config.parallel) for _ in range(config.envs)
    ]
    actspace = train_envs[0].action_space

    # Prefill dataset with random episodes.
    step = count_steps(datadir, config)
    prefill = max(0, config.prefill - step)
    print(f'Prefill dataset with {prefill} steps.')

    def random_agent(o, d, _):
        return ([actspace.sample() for _ in d], None)

    tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
    writer.flush()

    # Train and regularly evaluate the agent.
    step = count_steps(datadir, config)
    print(f'Simulating agent for {config.steps-step} steps.')
    agent = CVRL(config, datadir, actspace, writer)
    if (config.logdir / 'variables.pkl').exists():
        print('Load checkpoint.')
        agent.load(config.logdir / 'variables.pkl')
    state = None
    while step < config.steps:
        print('Start evaluation.')
        tools.simulate(functools.partial(agent, training=False),
                       test_envs,
                       episodes=1)
        writer.flush()
        print('Start collection.')
        steps = config.eval_every // config.action_repeat
        state = tools.simulate(agent, train_envs, steps, state=state)
        step = count_steps(datadir, config)
        agent.save(config.logdir / 'variables.pkl')
    for env in train_envs + test_envs:
        env.close()