示例#1
0
def test(epoch):

    agent.eval()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader),
                                                  total=len(testloader)):

        inputs, targets = Variable(
            inputs, volatile=True), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()

        # Get the low resolution agent images
        inputs_agent = inputs.clone()
        inputs_agent = torch.nn.functional.interpolate(
            inputs_agent, (args.lr_size, args.lr_size))
        probs = F.sigmoid(
            agent.forward(inputs_agent,
                          args.model.split('_')[1], 'lr'))

        # Sample the test-time policy
        policy = probs.data.clone()
        policy[policy < 0.5] = 0.0
        policy[policy >= 0.5] = 1.0

        # Get the masked high-res image and perform inference
        inputs = utils.agent_chosen_input(inputs, policy, mappings, patch_size)
        preds = rnet.forward(inputs, args.model.split('_')[1], 'hr')

        reward, match = utils.compute_reward(preds, targets, policy.data,
                                             args.penalty)

        matches.append(match)
        rewards.append(reward)
        policies.append(policy.data)

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    print('Test - Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' %
          (accuracy, reward, sparsity, variance, len(policy_set)))
    log_value('test_accuracy', accuracy, epoch)
    log_value('test_reward', reward, epoch)
    log_value('test_sparsity', sparsity, epoch)
    log_value('test_variance', variance, epoch)
    log_value('test_unique_policies', len(policy_set), epoch)

    # Save the Policy Network - Classifier is fixed in this phase
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()
    state = {
        'agent': agent_state_dict,
        'epoch': epoch,
        'reward': reward,
        'acc': accuracy
    }
    torch.save(
        state,
        args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E' % (epoch, accuracy, reward))
示例#2
0
def test(epoch):
    agent.eval()
    rewards, metrics, policies, set_labels = [], [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader),
                                                  total=len(testloader)):

        inputs = Variable(inputs, volatile=True)
        if not args.parallel:
            inputs = inputs.cuda()

        # Actions by the Policy Network
        probs = F.sigmoid(agent(inputs))

        # Sample the policy from the agents output
        policy = probs.data.clone()
        policy[policy < 0.5] = 0.0
        policy[policy >= 0.5] = 1.0
        policy = Variable(policy)

        offset_fd, offset_cd = utils.read_offsets(targets, num_actions)

        reward = utils.compute_reward(offset_fd, offset_cd, policy.data,
                                      args.beta, args.sigma)
        metrics, set_labels = utils.get_detected_boxes(policy, targets,
                                                       metrics, set_labels)

        rewards.append(reward)
        policies.append(policy.data)

    true_positives, pred_scores, pred_labels = [
        np.concatenate(x, 0) for x in list(zip(*metrics))
    ]
    precision, recall, AP, f1, ap_class = utils_detector.ap_per_class(
        true_positives, pred_scores, pred_labels, set_labels)
    reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards)

    print('Test - AP: %.3f | AR : %.3f' % (AP[0], recall.mean()))
    print('Test - Rw: %.2E | S: %.3f | V: %.3f | #: %d' %
          (reward, sparsity, variance, len(policy_set)))

    log_value('test_reward', reward, epoch)
    log_value('test_AP', AP[0], epoch)
    log_value('test_AR', recall.mean(), epoch)
    log_value('test_sparsity', sparsity, epoch)
    log_value('test_variance', variance, epoch)
    log_value('test_unique_policies', len(policy_set), epoch)

    # save the model --- agent
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()
    state = {
        'agent': agent_state_dict,
        'epoch': epoch,
        'reward': reward,
    }
    torch.save(state, args.cv_dir + '/ckpt_E_%d_R_%.2E' % (epoch, reward))
示例#3
0
def train(epoch):
    agent.train()
    matches, rewards, rewards_baseline, policies = [], [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)):
        inputs = Variable(inputs)
        if not args.parallel:
    	    inputs = inputs.cuda()

        # Actions by the Agent
        probs = F.sigmoid(agent.forward(inputs))
        alpha_hp = np.clip(args.alpha + epoch * 0.001, 0.6, 0.95)
        probs = probs*alpha_hp + (1-alpha_hp) * (1-probs)

        # Sample the policies from the Bernoulli distribution characterized by agent
        distr = Bernoulli(probs)
        policy_sample = distr.sample()

        # Test time policy - used as baseline policy in the training step
        policy_map = probs.data.clone()
        policy_map[policy_map<0.5] = 0.0
        policy_map[policy_map>=0.5] = 1.0
        policy_map = Variable(policy_map)

        # Get the batch wise metrics
        offset_fd, offset_cd = utils.read_offsets(targets, num_actions)

        # Find the reward for baseline and sampled policy
        reward_map = utils.compute_reward(offset_fd, offset_cd, , policy_map.data, args.beta, args.sigma)
        reward_sample = utils.compute_reward(offset_fd, offset_cd, policy_sample.data, args.beta, args.sigma)
        advantage = reward_sample.cuda().float() - reward_map.cuda().float()

        # Find the loss for only the policy network
        loss = -distr.log_prob(policy_sample)
        loss = loss * Variable(advantage).expand_as(policy_sample)
        loss = loss.mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        rewards.append(reward_sample.cpu())
        rewards_baseline.append(reward_map.cpu())
        policies.append(policy_sample.data.cpu())

    reward, sparsity, variance, policy_set = utils.performance_stats(policies, rewards)

    # Compute the Precision and Recall Performance of the Agent and Detectors
    print 'Train: %d | Rw: %.2E | S: %.3f | V: %.3f | #: %d'%(epoch, reward, sparsity, variance, len(policy_set))

    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_baseline_reward', torch.cat(rewards_baseline, 0).mean(), epoch)
    log_value('train_unique_policies', len(policy_set), epoch)
示例#4
0
def train(epoch):
    agent.train()
    rnet_hr.train()

    matches, rewards, rewards_baseline, policies = [], [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader),
                                                  total=len(trainloader)):

        inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()

        # Get the low resolution agent images
        inputs_agent = inputs.clone()
        inputs_agent = torch.nn.functional.interpolate(
            inputs_agent, (args.lr_size, args.lr_size))
        probs = F.sigmoid(
            agent.forward(inputs_agent,
                          args.model.split('_')[1], 'lr'))
        probs = probs * args.alpha + (1 - probs) * (1 - args.alpha)

        # sample the policies from the Bernoulli distribution characterized by agent's output
        distr = Bernoulli(probs)
        policy_sample = distr.sample()

        # Test time policy - used as baseline policy in the training step
        policy_map = probs.data.clone()
        policy_map[policy_map < 0.5] = 0.0
        policy_map[policy_map >= 0.5] = 1.0

        # Sample the high resolution patches using the actions
        inputs_map = inputs.clone()
        inputs_sample = inputs.clone()
        inputs_map = utils.agent_chosen_input(inputs_map, policy_map, mappings,
                                              patch_size)
        inputs_sample = utils.agent_chosen_input(inputs_sample,
                                                 policy_sample.int(), mappings,
                                                 patch_size)

        # Perform inference and combine low and high resolution classifier
        preds_lr = rnet_lr.forward(inputs_agent,
                                   args.model.split('_')[1], 'lr')
        preds_map = rnet_hr.forward(inputs_map, args.model.split('_')[1], 'hr')
        preds_sample = rnet_hr.forward(inputs_sample,
                                       args.model.split('_')[1], 'hr')
        preds_map = preds_map + preds_lr
        preds_sample = preds_sample + preds_lr

        # Get the rewards for baseline and sampled policy
        reward_map, match = utils.compute_reward(preds_map, targets,
                                                 policy_map.data, args.penalty)
        reward_sample, _ = utils.compute_reward(preds_sample, targets,
                                                policy_sample.data,
                                                args.penalty)
        advantage = reward_sample - reward_map

        # Find the joint loss from combined classifier and agent
        loss = -distr.log_prob(policy_sample).sum(
            1, keepdim=True) * Variable(advantage)
        loss = loss.mean()
        loss += F.cross_entropy(preds_sample, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        rewards_baseline.append(reward_map.cpu())
        policies.append(policy_sample.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    print('Train: %d | Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' %
          (epoch, accuracy, reward, sparsity, variance, len(policy_set)))
    log_value('train_accuracy', accuracy, epoch)
    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_baseline_reward',
              torch.cat(rewards_baseline, 0).mean(), epoch)
    log_value('train_unique_policies', len(policy_set), epoch)
示例#5
0
def train(epoch):
    # This steps trains the policy network only
    agent.train()

    matches, rewards, rewards_baseline, policies = [], [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader),
                                                  total=len(trainloader)):

        inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()
        inputs_agent = inputs.clone()
        inputs_map = inputs.clone()
        inputs_sample = inputs.clone()

        # Run the low-res image through Policy Network
        inputs_agent = torch.nn.functional.interpolate(
            inputs_agent, (args.lr_size, args.lr_size))
        probs = F.sigmoid(
            agent.forward(inputs_agent,
                          args.model.split('_')[1], 'lr'))
        probs = probs * args.alpha + (1 - args.alpha) * (1 - probs)

        # Sample the policies from the Bernoulli distribution characterized by agent's output
        distr = Bernoulli(probs)
        policy_sample = distr.sample()

        # Test time policy - used as baseline policy in the training step
        policy_map = probs.data.clone()
        policy_map[policy_map < 0.5] = 0.0
        policy_map[policy_map >= 0.5] = 1.0

        # Agent sampled high resolution images
        inputs_map = utils.agent_chosen_input(inputs_map, policy_map, mappings,
                                              patch_size)
        inputs_sample = utils.agent_chosen_input(inputs_sample,
                                                 policy_sample.int(), mappings,
                                                 patch_size)

        # Forward propagate images through the classifiers
        preds_map = rnet.forward(inputs_map, args.model.split('_')[1], 'hr')
        preds_sample = rnet.forward(inputs_sample,
                                    args.model.split('_')[1], 'hr')

        # Find the reward for baseline and sampled policy
        reward_map, match = utils.compute_reward(preds_map, targets,
                                                 policy_map.data, args.penalty)
        reward_sample, _ = utils.compute_reward(preds_sample, targets,
                                                policy_sample.data,
                                                args.penalty)
        advantage = reward_sample.cuda().float() - reward_map.cuda().float()

        # Find the loss for only the policy network
        loss = -distr.log_prob(policy_sample)
        loss = loss * Variable(advantage).expand_as(policy_sample)
        loss = loss.mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        rewards_baseline.append(reward_map.cpu())
        policies.append(policy_sample.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    print('Train: %d | Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' %
          (epoch, accuracy, reward, sparsity, variance, len(policy_set)))
    log_value('train_accuracy', accuracy, epoch)
    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_baseline_reward',
              torch.cat(rewards_baseline, 0).mean(), epoch)
    log_value('train_unique_policies', len(policy_set), epoch)