Exemplo n.º 1
0
    def train(self):
        training_config = self.config['training']

        trainer_seed = training_config['global_seed'] + MAGIC_NUMBER * self.p_id
        set_seeds(trainer_seed)

        trainer_logdir = '{}/train_thread_{}'.format(
            training_config["log_dir"], self.p_id)
        make_dir_if_required(trainer_logdir)
        self.logger = Logger(trainer_logdir)

        self.start_time = time.time()

        update_step = 0
        received_examples = 1  # just hack
        buffer_size = 0

        self.criterion = torch.nn.MSELoss()

        self.actor_decay = TrainingDecay(training_config['actor_train_decay'])
        self.critic_decay = TrainingDecay(
            training_config['critic_train_decay'])

        while True:
            #critic_lr = self.critic_lr_decay_fn(self.global_update_step.value)
            #actor_lr = self.actor_lr_decay_fn(self.global_update_step.value)

            if update_step > 0:
                train_data, received_examples, buffer_size = self.sample_queue.get(
                )

                step_metrics, step_info = self.update(train_data)

                for key, value in step_metrics.items():
                    self.logger.scalar_summary(key, value, update_step)

                #self.logger.scalar_summary("actor lr", actor_lr, update_step)
                #self.logger.scalar_summary("critic lr", critic_lr, update_step)
            else:
                time.sleep(training_config['train_delay'])

            update_step += 1

            self.logger.scalar_summary("buffer size", buffer_size,
                                       self.global_episode.value)
            self.logger.scalar_summary(
                "updates per example", update_step *
                training_config['batch_size'] / received_examples,
                self.global_episode.value)
            self.logger.scalar_summary(
                "updates per example global", self.global_update_step.value *
                training_config['batch_size'] / received_examples,
                self.global_episode.value)
Exemplo n.º 2
0
def evaluate_single_thread(p_id, model, config, seeds_per_thread, output: Queue):
    rewards = []
    modified_rewards = []
    steps_counts = []
    infos = []
    for seed_plus in range(p_id * seeds_per_thread, (p_id + 1) * seeds_per_thread):
        explorer_seed = 721 + seed_plus * 29
        set_seeds(explorer_seed)

        internal_env_args = {'env_type': 'virtual',
                             'env_init_args': {
                                 'host_tcp': config['training']['client']['host_tcp'],
                                 'port_tcp': config['training']['client']['port_tcp_start'] + p_id
                             },
                             'env_config': config['environment']['core']
                             }
        internal_env_args['env_config']['seed'] = explorer_seed

        env = create_env(config, internal_env_args, transfer=config['training']['transfer'])
        observation = env.reset()

        done = False
        steps = 0
        reward_sum = 0.0
        reward_modified_sum = 0.0

        while not done:
            observation_transformed, _ = observation

            observation, (reward, reward_modified), done, _ = env.step(model.act(observation_transformed))

            reward_sum += reward
            reward_modified_sum += reward_modified

            steps += config["environment"]["wrapper"]["repeat_actions"]
        target_velocities = [[float(v) for v in tv]
                             for tv in np.unique([obs["target_vel"]
                                                  for obs in env.observations], axis=0)]
        velocity_similarity_measure = [np.linalg.norm(np.array(obs["target_vel"])[[0, 2]]
                                                      - np.array(obs["body_vel"]["pelvis"])[[0, 2]])
                                       for obs in env.observations]
        velocity_confidence_intervals = [mean_confidence_interval(velocity_similarity_measure, 0.95),
                                         mean_confidence_interval(velocity_similarity_measure, 0.99)]
        rewards.append(reward_sum)
        modified_rewards.append(reward_modified_sum)
        steps_counts.append(steps)
        print(explorer_seed, ':', reward_sum, ':', steps)
        infos.append({"target": target_velocities,
                      "target_similarity_confidence_intervals": velocity_confidence_intervals,
                      "seed": explorer_seed})
    output.put((rewards, modified_rewards, steps_counts, infos))
Exemplo n.º 3
0
def client_sampling_worker(config, p_id, global_update_step, sample_queues, episode_queue, filter_idx=None):
    set_seeds(p_id * MAGIC_SEED + (p_id if filter_idx is None else filter_idx))
    training_config = config['training']
    buffer = create_buffer(training_config)
    received_examples = 1

    counter = 0

    while True:
        taken_replays = 0
        while True:
            if taken_replays > 128:
                print("Episode queue is too big!")
                episode_queue.clear()
                break
            try:
                replays = episode_queue.get_nowait()
                for (observation, action, reward, next_observation, done) in replays:
                    buffer.add(observation, action, reward, next_observation, done)
                received_examples += len(replays)
                taken_replays += 1
            except py_queue.Empty:
                break

        if len(buffer) < training_config['batch_size']:
            time.sleep(1)
            continue

        train_data_list = []

        for _ in range(len(sample_queues)):
            train_data = buffer.sample(batch_size=training_config['batch_size'])
            train_data_list.append(train_data)

        if counter % 10000 == 0:
            for sample_queue in sample_queues:
                print('sampling queue size: ', sample_queue.qsize())
            print()

        counter += 1

        buffer_size = len(buffer)

        for sample_queue, train_data in zip(sample_queues, train_data_list):
            sample_queue.put((train_data, received_examples, buffer_size))
Exemplo n.º 4
0
    def __init__(self, exploration_type, config, p_id, model,
                 internal_env_args, episodes_queues, best_reward,
                 global_episode, global_update_step):

        self.exploration_type = exploration_type
        self.config = config
        self.p_id = p_id

        if exploration_type == 'exploiting' or exploration_type == 'exploiting_virtual':
            self.model = create_model(config['model'])
            device = torch.device(
                'cuda') if torch.cuda.is_available() else torch.device('cpu')
            self.model.train()
            self.model.to(device)
            self.training_model = model
        else:
            self.model = model

        self.explorer_seed = config['training'][
            'global_seed'] + MAGIC_NUMBER * self.p_id
        self.explore_after = config['training'].get('explore_after', 0)
        self.steps_per_action = config['environment']['wrapper'][
            'repeat_actions']
        set_seeds(self.explorer_seed)

        internal_env_args['env_config']['seed'] = self.explorer_seed

        self.environment = create_env(self.config,
                                      internal_env_args,
                                      transfer=config['training']['transfer'])

        self.episodes_queues = episodes_queues
        self.best_reward = best_reward
        self.saving_best_reward = -np.inf
        self.saving_reward_tolerance = None
        self.global_episode = global_episode
        self.global_update_step = global_update_step
        self.start_time = None
        self.logger = None
Exemplo n.º 5
0
def submit(config, directories, repeats=1):
    explorer_seed = config['training']['global_seed'] + 0
    set_seeds(explorer_seed)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    avg_models = []

    for dirs in directories:
        models = []
        for model_directory in dirs:
            model = create_model(config['model'])
            model.load(model_directory)
            model.train()
            model.to(device)
            models.append(model)
        avg_models.append(AverageModel(models, config, repeats))

    model = ChooseRandomModel(avg_models)

    internal_env_args = {
        'env_type': 'submit',
        'env_init_args': {
            'test_speed': False
        },
        'env_config': {
            "model": "3D",
            "prosthetic": True,
            "difficulty": 1,
            'seed': explorer_seed
        }
    }

    env = create_env(config, internal_env_args, config['training']['transfer'])

    observation = env.get_observation()

    episodes = 0
    counter = 0
    reward_sum = 0.0

    while True:
        action = model.act(observation)

        (observation, _), (reward, _), done, _ = env.step(action)
        counter += 1

        reward_sum += reward
        print(counter, reward, reward_sum)

        if done:
            print()
            counter = 0
            reward_sum = 0
            save_info("submit_logs/second/log_{}.json".format(episodes),
                      env.get_episode_info())
            episodes += 1
            (observation, _) = env.reset(False)
            if observation is None:
                break
Exemplo n.º 6
0
    def explore(self):
        training_config = self.config['training']
        self.saving_reward_tolerance = training_config[
            'saving_reward_tolerance']

        dir_pattern = '{}' + '/{}_thread_{}'.format(self.exploration_type,
                                                    self.p_id)

        logdir = dir_pattern.format(training_config['log_dir'])
        replays_dir = dir_pattern.format(training_config['replays_dir'])

        make_dir_if_required(logdir)
        if training_config['saving_replays']:
            make_dir_if_required(replays_dir)

        self.logger = Logger(logdir)

        episode_counter = 0
        step_counter = 0
        self.start_time = time.time()

        action_random_process = create_action_random_process(self.config)

        epsilon_cycle_len = random.randint(
            training_config['epsilon_cycle_len'] // 2,
            training_config['epsilon_cycle_len'] * 2)

        epsilon_decay_fn = create_decay_fn(
            "cycle",
            initial_value=training_config['initial_epsilon'],
            final_value=training_config['final_epsilon'],
            cycle_len=epsilon_cycle_len,
            num_cycles=training_config['max_episodes'] // epsilon_cycle_len)

        while True:
            try:
                if self.exploration_type == 'exploiting' or self.exploration_type == 'exploiting_virtual':
                    hard_update_ddpg(self.model, self.training_model)

                if episode_counter > 0 and episode_counter % 128 == 0:
                    self.environment.collect_garbage()

                epsilon = min(
                    training_config['initial_epsilon'],
                    max(training_config['final_epsilon'],
                        epsilon_decay_fn(episode_counter)))

                if self.exploration_type == 'exploiting' or self.exploration_type == 'exploiting_virtual':
                    epsilon = 0.0
                    self.explorer_seed = training_config[
                        'global_seed'] + MAGIC_NUMBER * self.p_id + episode_counter % 5
                    set_seeds(self.explorer_seed)

                episode_metrics = {
                    "reward": 0.0,
                    "reward_modified": 0.0,
                    "step": 0,
                    "epsilon": epsilon
                }

                action_random_process.reset_states()

                replay, timings = self._explore_episode(
                    action_random_process, epsilon, episode_metrics,
                    training_config)

                for episode_queue in self.episodes_queues:
                    episode_queue.put(replay)

                self.global_episode.value += 1

                episode_counter += 1
                episode_metrics["step"] *= self.config['environment'][
                    'wrapper']['repeat_actions']
                step_counter += episode_metrics["step"]

                reward_scale = self.config['environment']['wrapper'][
                    'reward_scale']
                episode_metrics['reward'] /= reward_scale
                episode_metrics['reward_modified'] /= reward_scale

                saving_best_cond = episode_metrics['reward'] > (
                    self.saving_best_reward + self.saving_reward_tolerance)

                if saving_best_cond:
                    self.saving_best_reward = episode_metrics['reward']

                if (episode_counter % self.config['training']['save_every_episode'] == 0 or saving_best_cond) and \
                        (self.exploration_type == 'exploiting' or self.exploration_type == 'exploiting_virtual'):

                    save_dir = dir_pattern.format(
                        self.config['training']['save_dir'])

                    self.model.save(self.config, save_dir, episode_counter,
                                    episode_metrics["reward"])

                self.log(episode_metrics, timings, episode_counter,
                         step_counter, saving_best_cond)

            except ValueError as e:
                print('timedout process {} {} with {}'.format(
                    self.exploration_type, self.p_id, e))
Exemplo n.º 7
0
def train(args):
    epochs = 350
    batch_size = 288
    util.set_seeds(args.rank)
    model = nn.EfficientNet().cuda()
    lr = batch_size * torch.cuda.device_count() * 0.256 / 4096
    optimizer = nn.RMSprop(util.add_weight_decay(model), lr, 0.9, 1e-3, momentum=0.9)
    ema = nn.EMA(model)

    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank])
    else:
        model = torch.nn.DataParallel(model)
    criterion = nn.CrossEntropyLoss().cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                   transforms.Compose([util.RandomResize(),
                                                       transforms.ColorJitter(0.4, 0.4, 0.4),
                                                       transforms.RandomHorizontalFlip(),
                                                       util.RandomAugment(),
                                                       transforms.ToTensor(), normalize]))
    if args.distributed:
        sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    else:
        sampler = None

    loader = data.DataLoader(dataset, batch_size, sampler=sampler, num_workers=8, pin_memory=True)

    scheduler = nn.StepLR(optimizer)
    amp_scale = torch.cuda.amp.GradScaler()
    with open(f'weights/{scheduler.__str__()}.csv', 'w') as f:
        if args.local_rank == 0:
            writer = csv.DictWriter(f, fieldnames=['epoch', 'acc@1', 'acc@5'])
            writer.writeheader()
        best_acc1 = 0
        for epoch in range(0, epochs):
            if args.distributed:
                sampler.set_epoch(epoch)
            if args.local_rank == 0:
                print(('\n' + '%10s' * 2) % ('epoch', 'loss'))
                bar = tqdm.tqdm(loader, total=len(loader))
            else:
                bar = loader
            model.train()
            for images, target in bar:
                loss = batch(images, target, model, criterion)
                optimizer.zero_grad()
                amp_scale.scale(loss).backward()
                amp_scale.step(optimizer)
                amp_scale.update()

                ema.update(model)
                torch.cuda.synchronize()
                if args.local_rank == 0:
                    bar.set_description(('%10s' + '%10.4g') % ('%g/%g' % (epoch + 1, epochs), loss))

            scheduler.step(epoch + 1)
            if args.local_rank == 0:
                acc1, acc5 = test(ema.model.eval())
                writer.writerow({'acc@1': str(f'{acc1:.3f}'),
                                 'acc@5': str(f'{acc5:.3f}'),
                                 'epoch': str(epoch + 1).zfill(3)})
                util.save_checkpoint({'state_dict': ema.model.state_dict()}, acc1 > best_acc1)
                best_acc1 = max(acc1, best_acc1)
    if args.distributed:
        torch.distributed.destroy_process_group()
    torch.cuda.empty_cache()
Exemplo n.º 8
0
def evaluate(config, directory, directories, seed_plus):
    explorer_seed = config['training']['global_seed'] + seed_plus * 29
    set_seeds(explorer_seed)

    directories.append(directory)
    models = []

    for model_directory in directories:
        models.append(load_model(model_directory))

    model = ChooseRandomModel(models)

    # env = create_env(config['environment'], visualize=True, adapting=True)
    # config['environment']['wrapper']['features']['body_rot_relative'] = ["pelvis", "torso", "head"]
    # config['environment']['wrapper']['repeat_actions'] = 3

    internal_env_args = {'env_type': 'normal',
                         'env_init_args': {
                             'env_type': 'normal',
                             'env_init_args': {
                                 'visualize': False,
                                 'integrator_accuracy': 5e-4
                             },
                             'visualizers_configs': [
                                 {'save_file': './videos/side_{}'.format(seed_plus), 'camera_rotation': [-0.3, 0., 0.]},
                                 {'save_file': './videos/front_{}'.format(seed_plus),
                                  'camera_rotation': [-0.3, -math.pi / 2, 0.]},
                                 {'save_file': './videos/half_{}'.format(seed_plus),
                                  'camera_rotation': [-0.3, -math.pi / 4, 0.]}
                             ]
                         },
                         'env_config': {
                             "model": "3D",
                             "prosthetic": True,
                             "difficulty": 1,
                             "max_steps": 1000,
                             'seed': explorer_seed}
                         }

    env = create_env(config, internal_env_args, transfer=config['training']['transfer'])

    # config['environment']['core']['prosthetic'] = True
    # config['environment']['wrapper']['repeat_frames'] = 1
    # env = create_env(config['environment'], visualize=True, transfer=True)

    reward_sum = 0.0
    reward_modified_sum = 0.0

    observation = env.reset()

    replays_list = []

    repeats = 1
    done = False
    j = 0

    while not done:
        observation_transformed, _ = observation

        action = model.act(observation_transformed)

        observation, (reward, reward_modified), done, _ = env.step(action)

        reward_sum += reward
        reward_modified_sum += reward_modified

        # print(j, reward, reward_modified, reward_sum, reward_modified_sum)
        print('{} {:.2f} {:.2f}'.format(j, reward, reward_modified, reward_sum))
        j += config["environment"]["wrapper"]["repeat_actions"]
        # if j == 2:
        #     break
        if done:
            print(np.unique(np.array(list(map(lambda obs: obs["target_vel"], env.observations))), axis=0))
            return reward_sum