def test(epoch): agent.eval() matches, rewards, policies = [], [], [] for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader), total=len(testloader)): inputs, targets = Variable( inputs, volatile=True), Variable(targets).cuda(async=True) if not args.parallel: inputs = inputs.cuda() # Get the low resolution agent images inputs_agent = inputs.clone() inputs_agent = torch.nn.functional.interpolate( inputs_agent, (args.lr_size, args.lr_size)) probs = F.sigmoid( agent.forward(inputs_agent, args.model.split('_')[1], 'lr')) # Sample the test-time policy policy = probs.data.clone() policy[policy < 0.5] = 0.0 policy[policy >= 0.5] = 1.0 # Get the masked high-res image and perform inference inputs = utils.agent_chosen_input(inputs, policy, mappings, patch_size) preds = rnet.forward(inputs, args.model.split('_')[1], 'hr') reward, match = utils.compute_reward(preds, targets, policy.data, args.penalty) matches.append(match) rewards.append(reward) policies.append(policy.data) accuracy, reward, sparsity, variance, policy_set = utils.performance_stats( policies, rewards, matches) print('Test - Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' % (accuracy, reward, sparsity, variance, len(policy_set))) log_value('test_accuracy', accuracy, epoch) log_value('test_reward', reward, epoch) log_value('test_sparsity', sparsity, epoch) log_value('test_variance', variance, epoch) log_value('test_unique_policies', len(policy_set), epoch) # Save the Policy Network - Classifier is fixed in this phase agent_state_dict = agent.module.state_dict( ) if args.parallel else agent.state_dict() state = { 'agent': agent_state_dict, 'epoch': epoch, 'reward': reward, 'acc': accuracy } torch.save( state, args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E' % (epoch, accuracy, reward))
def test(epoch): agent.eval() rewards, metrics, policies, set_labels = [], [], [], [] for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader), total=len(testloader)): inputs = Variable(inputs, volatile=True) if not args.parallel: inputs = inputs.cuda() # Actions by the Policy Network probs = F.sigmoid(agent(inputs)) # Sample the policy from the agents output policy = probs.data.clone() policy[policy < 0.5] = 0.0 policy[policy >= 0.5] = 1.0 policy = Variable(policy) offset_fd, offset_cd = utils.read_offsets(targets, num_actions) reward = utils.compute_reward(offset_fd, offset_cd, policy.data, args.beta, args.sigma) metrics, set_labels = utils.get_detected_boxes(policy, targets, metrics, set_labels) rewards.append(reward) policies.append(policy.data) true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*metrics)) ] precision, recall, AP, f1, ap_class = utils_detector.ap_per_class( true_positives, pred_scores, pred_labels, set_labels) reward, sparsity, variance, policy_set = utils.performance_stats( policies, rewards) print('Test - AP: %.3f | AR : %.3f' % (AP[0], recall.mean())) print('Test - Rw: %.2E | S: %.3f | V: %.3f | #: %d' % (reward, sparsity, variance, len(policy_set))) log_value('test_reward', reward, epoch) log_value('test_AP', AP[0], epoch) log_value('test_AR', recall.mean(), epoch) log_value('test_sparsity', sparsity, epoch) log_value('test_variance', variance, epoch) log_value('test_unique_policies', len(policy_set), epoch) # save the model --- agent agent_state_dict = agent.module.state_dict( ) if args.parallel else agent.state_dict() state = { 'agent': agent_state_dict, 'epoch': epoch, 'reward': reward, } torch.save(state, args.cv_dir + '/ckpt_E_%d_R_%.2E' % (epoch, reward))
def train(epoch): agent.train() matches, rewards, rewards_baseline, policies = [], [], [], [] for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)): inputs = Variable(inputs) if not args.parallel: inputs = inputs.cuda() # Actions by the Agent probs = F.sigmoid(agent.forward(inputs)) alpha_hp = np.clip(args.alpha + epoch * 0.001, 0.6, 0.95) probs = probs*alpha_hp + (1-alpha_hp) * (1-probs) # Sample the policies from the Bernoulli distribution characterized by agent distr = Bernoulli(probs) policy_sample = distr.sample() # Test time policy - used as baseline policy in the training step policy_map = probs.data.clone() policy_map[policy_map<0.5] = 0.0 policy_map[policy_map>=0.5] = 1.0 policy_map = Variable(policy_map) # Get the batch wise metrics offset_fd, offset_cd = utils.read_offsets(targets, num_actions) # Find the reward for baseline and sampled policy reward_map = utils.compute_reward(offset_fd, offset_cd, , policy_map.data, args.beta, args.sigma) reward_sample = utils.compute_reward(offset_fd, offset_cd, policy_sample.data, args.beta, args.sigma) advantage = reward_sample.cuda().float() - reward_map.cuda().float() # Find the loss for only the policy network loss = -distr.log_prob(policy_sample) loss = loss * Variable(advantage).expand_as(policy_sample) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() rewards.append(reward_sample.cpu()) rewards_baseline.append(reward_map.cpu()) policies.append(policy_sample.data.cpu()) reward, sparsity, variance, policy_set = utils.performance_stats(policies, rewards) # Compute the Precision and Recall Performance of the Agent and Detectors print 'Train: %d | Rw: %.2E | S: %.3f | V: %.3f | #: %d'%(epoch, reward, sparsity, variance, len(policy_set)) log_value('train_reward', reward, epoch) log_value('train_sparsity', sparsity, epoch) log_value('train_variance', variance, epoch) log_value('train_baseline_reward', torch.cat(rewards_baseline, 0).mean(), epoch) log_value('train_unique_policies', len(policy_set), epoch)
def train(epoch): agent.train() rnet_hr.train() matches, rewards, rewards_baseline, policies = [], [], [], [] for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)): inputs, targets = Variable(inputs), Variable(targets).cuda(async=True) if not args.parallel: inputs = inputs.cuda() # Get the low resolution agent images inputs_agent = inputs.clone() inputs_agent = torch.nn.functional.interpolate( inputs_agent, (args.lr_size, args.lr_size)) probs = F.sigmoid( agent.forward(inputs_agent, args.model.split('_')[1], 'lr')) probs = probs * args.alpha + (1 - probs) * (1 - args.alpha) # sample the policies from the Bernoulli distribution characterized by agent's output distr = Bernoulli(probs) policy_sample = distr.sample() # Test time policy - used as baseline policy in the training step policy_map = probs.data.clone() policy_map[policy_map < 0.5] = 0.0 policy_map[policy_map >= 0.5] = 1.0 # Sample the high resolution patches using the actions inputs_map = inputs.clone() inputs_sample = inputs.clone() inputs_map = utils.agent_chosen_input(inputs_map, policy_map, mappings, patch_size) inputs_sample = utils.agent_chosen_input(inputs_sample, policy_sample.int(), mappings, patch_size) # Perform inference and combine low and high resolution classifier preds_lr = rnet_lr.forward(inputs_agent, args.model.split('_')[1], 'lr') preds_map = rnet_hr.forward(inputs_map, args.model.split('_')[1], 'hr') preds_sample = rnet_hr.forward(inputs_sample, args.model.split('_')[1], 'hr') preds_map = preds_map + preds_lr preds_sample = preds_sample + preds_lr # Get the rewards for baseline and sampled policy reward_map, match = utils.compute_reward(preds_map, targets, policy_map.data, args.penalty) reward_sample, _ = utils.compute_reward(preds_sample, targets, policy_sample.data, args.penalty) advantage = reward_sample - reward_map # Find the joint loss from combined classifier and agent loss = -distr.log_prob(policy_sample).sum( 1, keepdim=True) * Variable(advantage) loss = loss.mean() loss += F.cross_entropy(preds_sample, targets) optimizer.zero_grad() loss.backward() optimizer.step() matches.append(match.cpu()) rewards.append(reward_sample.cpu()) rewards_baseline.append(reward_map.cpu()) policies.append(policy_sample.data.cpu()) accuracy, reward, sparsity, variance, policy_set = utils.performance_stats( policies, rewards, matches) print('Train: %d | Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' % (epoch, accuracy, reward, sparsity, variance, len(policy_set))) log_value('train_accuracy', accuracy, epoch) log_value('train_reward', reward, epoch) log_value('train_sparsity', sparsity, epoch) log_value('train_variance', variance, epoch) log_value('train_baseline_reward', torch.cat(rewards_baseline, 0).mean(), epoch) log_value('train_unique_policies', len(policy_set), epoch)
def train(epoch): # This steps trains the policy network only agent.train() matches, rewards, rewards_baseline, policies = [], [], [], [] for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)): inputs, targets = Variable(inputs), Variable(targets).cuda(async=True) if not args.parallel: inputs = inputs.cuda() inputs_agent = inputs.clone() inputs_map = inputs.clone() inputs_sample = inputs.clone() # Run the low-res image through Policy Network inputs_agent = torch.nn.functional.interpolate( inputs_agent, (args.lr_size, args.lr_size)) probs = F.sigmoid( agent.forward(inputs_agent, args.model.split('_')[1], 'lr')) probs = probs * args.alpha + (1 - args.alpha) * (1 - probs) # Sample the policies from the Bernoulli distribution characterized by agent's output distr = Bernoulli(probs) policy_sample = distr.sample() # Test time policy - used as baseline policy in the training step policy_map = probs.data.clone() policy_map[policy_map < 0.5] = 0.0 policy_map[policy_map >= 0.5] = 1.0 # Agent sampled high resolution images inputs_map = utils.agent_chosen_input(inputs_map, policy_map, mappings, patch_size) inputs_sample = utils.agent_chosen_input(inputs_sample, policy_sample.int(), mappings, patch_size) # Forward propagate images through the classifiers preds_map = rnet.forward(inputs_map, args.model.split('_')[1], 'hr') preds_sample = rnet.forward(inputs_sample, args.model.split('_')[1], 'hr') # Find the reward for baseline and sampled policy reward_map, match = utils.compute_reward(preds_map, targets, policy_map.data, args.penalty) reward_sample, _ = utils.compute_reward(preds_sample, targets, policy_sample.data, args.penalty) advantage = reward_sample.cuda().float() - reward_map.cuda().float() # Find the loss for only the policy network loss = -distr.log_prob(policy_sample) loss = loss * Variable(advantage).expand_as(policy_sample) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() matches.append(match.cpu()) rewards.append(reward_sample.cpu()) rewards_baseline.append(reward_map.cpu()) policies.append(policy_sample.data.cpu()) accuracy, reward, sparsity, variance, policy_set = utils.performance_stats( policies, rewards, matches) print('Train: %d | Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' % (epoch, accuracy, reward, sparsity, variance, len(policy_set))) log_value('train_accuracy', accuracy, epoch) log_value('train_reward', reward, epoch) log_value('train_sparsity', sparsity, epoch) log_value('train_variance', variance, epoch) log_value('train_baseline_reward', torch.cat(rewards_baseline, 0).mean(), epoch) log_value('train_unique_policies', len(policy_set), epoch)