예제 #1
0
def train(epoch):

    agent.train()
    rnet.train()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)):

        inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()

        probs, value = agent(inputs)

        #---------------------------------------------------------------------#

        policy_map = probs.data.clone()
        policy_map[policy_map<0.5] = 0.0
        policy_map[policy_map>=0.5] = 1.0
        policy_map = Variable(policy_map)

        probs = probs*args.alpha + (1-probs)*(1-args.alpha)
        distr = Bernoulli(probs)
        policy = distr.sample()

        v_inputs = Variable(inputs.data, volatile=True)
        preds_map = rnet.forward(v_inputs, policy_map)
        preds_sample = rnet.forward(inputs, policy)

        reward_map, _ = get_reward(preds_map, targets, policy_map.data)
        reward_sample, match = get_reward(preds_sample, targets, policy.data)

        advantage = reward_sample - reward_map
        # advantage = advantage.expand_as(policy)
        loss = -distr.log_prob(policy).sum(1, keepdim=True) * Variable(advantage)
        loss = loss.sum()

        #---------------------------------------------------------------------#
        loss += F.cross_entropy(preds_sample, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        policies.append(policy.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(policies, rewards, matches)

    log_str = 'E: %d | A: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d'%(epoch, accuracy, reward, sparsity, variance, len(policy_set))
    print log_str

    log_value('train_accuracy', accuracy, epoch)
    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_unique_policies', len(policy_set), epoch)
def test(epoch):

    agent.eval()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader),
                                                  total=len(testloader)):

        # inputs, targets = Variable(inputs, volatile=True), Variable(targets).cuda(async=True)
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        if not args.parallel:
            # inputs = inputs.cuda()
            inputs = inputs

        probs, _ = agent(inputs)

        policy = probs.data.clone()
        policy[policy < 0.5] = 0.0
        policy[policy >= 0.5] = 1.0
        policy = Variable(policy)

        if args.cl_step < num_blocks:
            policy[:, :-args.cl_step] = 1

        preds = rnet.forward(inputs, policy)
        reward, match = get_reward(preds, targets, policy.data)

        matches.append(match)
        rewards.append(reward)
        policies.append(policy.data)

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    log_str = 'TS - A: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d' % (
        accuracy, reward, sparsity, variance, len(policy_set))
    print log_str

    log_value('test_accuracy', accuracy, epoch)
    log_value('test_reward', reward, epoch)
    log_value('test_sparsity', sparsity, epoch)
    log_value('test_variance', variance, epoch)
    log_value('test_unique_policies', len(policy_set), epoch)

    # save the model
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()

    state = {
        'agent': agent_state_dict,
        'epoch': epoch,
        'reward': reward,
        'acc': accuracy
    }
    torch.save(
        state, args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E_S_%.2f_#_%d.t7' %
        (epoch, accuracy, reward, sparsity, len(policy_set)))
예제 #3
0
def test(P=None, mode='auto'):
    dices, policies = [], []

    # make file
    path = 'nii_image'
    utils.mkdir(path)
    for batch_idx, (input, target) in tqdm.tqdm(enumerate(testloader),
                                                total=len(testloader)):

        input, target = input.cuda(), target.cuda()

        if mode == 'auto':
            probs, _ = agent(input)

            policy = probs.clone()
            policy[policy < 0.5] = 0.0
            policy[policy >= 0.5] = 1.0
        else:
            assert P is not None, f"P can not be None when mode is {mode}."
            policy = P

        seg_map = torch.sigmoid(
            seg_model.forward_single(input, policy.data.squeeze(0)))
        seg_map = F.interpolate(seg_map,
                                size=(target.size(1), target.size(2),
                                      target.size(3)),
                                mode="trilinear",
                                align_corners=True)

        dice = compute_dice(seg_map, target)

        # save image
        seg_map_numpy = seg_map.cpu().detach().numpy()

        seg_map_numpy_s = np.squeeze(seg_map_numpy)
        sitk_img = sitk.GetImageFromArray(seg_map_numpy_s)
        sitk.WriteImage(sitk_img, path + '/' + mode + str(batch_idx) + '.nii',
                        True)

        dices.append(dice)
        policies.append(policy.data)

    dice, _, sparsity, variance, policy_set, policy_list = utils.performance_stats(
        policies, dices, dices)

    log_str = u'''
    Dice: %.6f
    Block Usage: %.3f \u00B1 %.3f
    Unique Policies: %d
    ''' % (dice, sparsity, variance, len(policy_set))

    print(log_str)
    print('policy_set', policy_set)
    print('policy_list', policy_list)
    print('dices', list(map(lambda x: x.item(), dices)))
    return dice  #
예제 #4
0
def test(budget_constraint):

    total_ops = []
    matches, policies = [], []
    inference_time = []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader),
                                                  total=len(testloader)):

        targets = targets.cuda(async=True)
        inputs = inputs.cuda()
        with torch.no_grad():
            time_st = time.time()
            budget = torch.ones(targets.shape).cuda() * budget_constraint
            probs, _ = agent(inputs, budget)

            policy = probs.clone()
            policy[policy < 0.5] = 0.0
            policy[policy >= 0.5] = 1.0

            preds = rnet.forward_single(inputs, policy.data.squeeze(0))
            inference_time.append((time.time() - time_st) * 1000.0)
            _, pred_idx = preds.max(1)
            match = (pred_idx == targets).data.float()

            matches.append(match)
            policies.append(policy.data)

            ops = count_flops(agent) + count_flops(rnet)
            total_ops.append(ops)

    accuracy, _, sparsity, variance, policy_set = utils.performance_stats(
        policies, matches, matches)
    ops_mean, ops_std = np.mean(total_ops), np.std(total_ops)
    inference_time_mean, inference_time_std = np.mean(inference_time), np.std(
        inference_time)
    log_str = u'''
    Accuracy: %.3f
    Block Usage: %.3f \u00B1 %.3f
    FLOPs/img: %.2E \u00B1 %.2E
    Unique Policies: %d
    Average Inference time: %.3f \u00B1 %.3f
    ''' % (accuracy, sparsity, variance, ops_mean, ops_std, len(policy_set),
           inference_time_mean, inference_time_std)
    print("======================== budget constraint: " +
          str(budget_constraint) + " =========================")
    print(log_str)
    print("%.3f/%.3f/%.3f/%.2E/%.2E/%.3f/%.3f/%d" %
          (accuracy, sparsity, variance, ops_mean, ops_std,
           inference_time_mean, inference_time_std, len(policy_set)))
    with open(args.output, 'a') as f:
        f.write("%.2f,%.3f,%.3f,%.3f,%.2E,%.2E,%.3f,%.3f,%.3f,%.3f,%d\n" %
                (budget_constraint, accuracy, sparsity, variance, ops_mean,
                 ops_std, ops_mean, ops_std, inference_time_mean,
                 inference_time_std, len(policy_set)))
예제 #5
0
def test(epoch):

    agent.eval()
    rnet.eval()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, locations, targets) in enumerate(testloader):

        inputs, targets = Variable(
            inputs, volatile=True), Variable(targets).cuda(async=True)
        locations = Variable(locations, volatile=True)

        inputs = inputs.cuda()
        locations = locations.cuda()

        probs = agent(inputs, locations)

        policy = probs.data.clone()
        policy[policy < 0.5] = 0.0
        policy[policy >= 0.5] = 1.0
        policy = Variable(policy)

        preds = rnet.forward(inputs, policy)
        reward, match = get_reward(preds, targets, policy.data)

        matches.append(match)
        rewards.append(reward)
        policies.append(policy.data)

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    log_str = 'TS - A: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d' % (
        accuracy, reward, sparsity, variance, len(policy_set))
    print log_str

    # save the model
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()
    rnet_state_dict = rnet.module.state_dict(
    ) if args.parallel else rnet.state_dict()

    state = {
        'agent': agent_state_dict,
        'resnet': rnet_state_dict,
        'epoch': epoch,
        'reward': reward,
        'acc': accuracy
    }
    torch.save(
        state, args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E_S_%.2f_#_%d.t7' %
        (epoch, accuracy, reward, sparsity, len(policy_set)))
예제 #6
0
def test(epoch):

    vmodel.eval()

    accuracies = []
    log.info("test Epoch is %d" %(epoch))
    for idx, data in enumerate(testloader):
        
        data = [d.to(vdevice) for d in data]
#        _, [attr_pred, obj_pred, _] = vmodel(data,(attr_emb,obj_emb))
        _, [attr_pred, obj_pred, _] = vmodel(data)
        # shape of attr_pred is [bs,3]:: 3 is open,closed nd orcl attr_id
        match_stats = utils.performance_stats(attr_pred, obj_pred, data)
        accuracies.append(match_stats)

    accuracies = zip(*accuracies)
    accuracies = map(torch.mean, map(torch.cat, accuracies))
    attr_acc, obj_acc, closed_acc, open_acc, objoracle_acc = accuracies
    print( '(test) E: %d | A: %.3f | O: %.3f | Cl: %.3f | Op: %.4f | OrO: %.4f'%(epoch, attr_acc, obj_acc, closed_acc, open_acc, objoracle_acc))
예제 #7
0
def test(limit):

    total_ops = []
    matches, policies = [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader),
                                                  total=len(testloader)):

        inputs, targets = Variable(
            inputs, volatile=True).cuda(), Variable(targets).cuda()
        probs, _ = agent(inputs)

        _, order = torch.sort(probs, 1)

        policy = probs.clone()
        policy[order < limit] = 0.0
        policy[order >= limit] = 1.0

        preds = rnet.forward(inputs, policy)
        _, pred_idx = preds.max(1)
        match = (pred_idx == targets).data.float()

        matches.append(match)
        policies.append(policy.data)

        ops = count_flops(agent) + count_flops(rnet)
        total_ops.append(ops)

    accuracy, _, sparsity, variance, policy_set = utils.performance_stats(
        policies, matches, matches)
    ops_mean, ops_std = np.mean(total_ops), np.std(total_ops)

    log_str = u'''
    Accuracy: %.3f
    Block Usage: %.3f \u00B1 %.3f
    FLOPs/img: %.2E \u00B1 %.2E
    Unique Policies: %d
    ''' % (accuracy, sparsity, variance, ops_mean, ops_std, len(policy_set))
    log_value('test_acc', accuracy, limit)
    print(log_str)
예제 #8
0
def train(epoch):
    agent.train()
    rnet.train()

    matches, rewards, rewards_baseline, policies = [], [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader),
                                                  total=len(trainloader)):

        inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()

        # Get the low resolution agent images
        inputs_agent = inputs.clone()
        inputs_agent = torch.nn.functional.interpolate(
            inputs_agent, (args.lr_size, args.lr_size))
        probs = F.sigmoid(
            agent.forward(inputs_agent,
                          args.model.split('_')[1], 'lr'))
        probs = probs * args.alpha + (1 - probs) * (1 - args.alpha)

        # Sample the policies from the Bernoulli distribution characterized by agent's output
        distr = Bernoulli(probs)
        policy_sample = distr.sample()

        # Test time policy - used as baseline policy in the training step
        policy_map = probs.data.clone()
        policy_map[policy_map < 0.5] = 0.0
        policy_map[policy_map >= 0.5] = 1.0

        # Agent sampled high resolution images
        inputs_map = inputs.clone()
        inputs_sample = inputs.clone()
        inputs_map = utils.agent_chosen_input(inputs_map, policy_map, mappings,
                                              patch_size)
        inputs_sample = utils.agent_chosen_input(inputs_sample,
                                                 policy_sample.int(), mappings,
                                                 patch_size)

        # Get the predictions for baseline and sampled policy
        preds_map = rnet.forward(inputs_map, args.model.split('_')[1], 'hr')
        preds_sample = rnet.forward(inputs_sample,
                                    args.model.split('_')[1], 'hr')

        # Get the rewards for both policies
        reward_map, match = utils.compute_reward(preds_map, targets,
                                                 policy_map.data, args.penalty)
        reward_sample, _ = utils.compute_reward(preds_sample, targets,
                                                policy_sample.data,
                                                args.penalty)

        # Find the joint loss from the classifier and agent
        advantage = reward_sample - reward_map
        loss = -distr.log_prob(policy_sample).sum(
            1, keepdim=True) * Variable(advantage)
        loss = loss.mean()
        loss += F.cross_entropy(preds_sample, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        rewards_baseline.append(reward_map.cpu())
        policies.append(policy_sample.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    print('Train: %d | Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' %
          (epoch, accuracy, reward, sparsity, variance, len(policy_set)))
    log_value('train_accuracy', accuracy, epoch)
    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_baseline_reward',
              torch.cat(rewards_baseline, 0).mean(), epoch)
    log_value('train_unique_policies', len(policy_set), epoch)
예제 #9
0
def test(epoch):
    agent.eval()
    rnet.eval()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(testloader),
                                                  total=len(testloader)):

        inputs, targets = Variable(
            inputs, volatile=True), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()

        # Get the low resolution agent images
        inputs_agent = inputs.clone()
        inputs_agent = torch.nn.functional.interpolate(
            inputs_agent, (args.lr_size, args.lr_size))
        probs = F.sigmoid(
            agent.forward(inputs_agent,
                          args.model.split('_')[1], 'lr'))

        # Sample Test time Policy Using Bernoulli Distribution
        policy = probs.data.clone()
        policy[policy < 0.5] = 0.0
        policy[policy >= 0.5] = 1.0

        # Get the Agent Determined Images
        inputs = utils.agent_chosen_input(inputs, policy, mappings, patch_size)

        # Get the predictions from the high resolution classifier
        preds = rnet.forward(inputs, args.model.split('_')[1], 'hr')

        # Get the reward for the sampled policy and given predictions
        reward, match = utils.compute_reward(preds, targets, policy.data,
                                             args.penalty)

        matches.append(match)
        rewards.append(reward)
        policies.append(policy.data)

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    print('Test - Acc: %.3f | Rw: %.2E | S: %.3f | V: %.3f | #: %d' %
          (accuracy, reward, sparsity, variance, len(policy_set)))
    log_value('test_accuracy', accuracy, epoch)
    log_value('test_reward', reward, epoch)
    log_value('test_sparsity', sparsity, epoch)
    log_value('test_variance', variance, epoch)
    log_value('test_unique_policies', len(policy_set), epoch)

    # Save the Policy Network and High-res Classifier
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()
    rnet_state_dict = rnet.module.state_dict(
    ) if args.parallel else rnet.state_dict()
    state = {
        'agent': agent_state_dict,
        'resnet_hr': rnet_state_dict,
        'epoch': epoch,
        'reward': reward,
        'acc': accuracy
    }
    torch.save(
        state,
        args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E' % (epoch, accuracy, reward))
예제 #10
0
def test(epoch):
    dices, rewards, policies = [], [], []
    for batch_idx, (input, target) in tqdm.tqdm(enumerate(testloader),
                                                total=len(testloader)):

        input, target = input, target.cuda()
        if not args.parallel:
            input = input.cuda()

        probs, _ = agent(input)

        policy = probs.data.clone()

        policy[policy < 0.5] = 0.0
        policy[policy >= 0.5] = 1.0
        policy = Variable(policy)

        if args.cl_step < num_blocks:
            policy[:, :-args.cl_step] = 1

        preds = torch.sigmoid(seg_model.forward(input, policy))
        preds = F.interpolate(preds,
                              size=(target.size(1), target.size(2),
                                    target.size(3)),
                              mode="trilinear",
                              align_corners=True)
        reward, dice = get_reward(preds, target, policy.data)

        dices.append(dice)
        rewards.append(reward)
        policies.append(policy.data)

    dice, reward, sparsity, variance, policy_set, policy_list = utils.performance_stats(
        policies, rewards, dices)

    log_str = 'TS - D: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d' % (
        dice, reward, sparsity, variance, len(policy_set))
    print(log_str)
    print("policy_list:", policy_list)

    writer.add_scalar('test_accuracy', dice, epoch)
    writer.add_scalar('test_reward', reward, epoch)
    writer.add_scalar('test_sparsity', sparsity, epoch)
    writer.add_scalar('test_variance', variance, epoch)
    writer.add_scalar('test_unique_policies', len(policy_set), epoch)

    # save the model
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()

    global best_dice
    if dice >= best_dice:
        state = {
            'agent': agent_state_dict,
            'epoch': epoch,
            'reward': reward,
            'dice': dice
        }
        torch.save(
            state, args.cv_dir + '/ckpt_E_%d_D_%.3f_R_%.2E_S_%.2f_#_%d.t7' %
            (epoch, dice, reward, sparsity, len(policy_set)))
        torch.save(state, args.cv_dir + '/best.t7')
예제 #11
0
def train(epoch):
    dices, rewards, policies = [], [], []
    for batch_idx, (input, target) in tqdm.tqdm(enumerate(trainloader),
                                                total=len(trainloader)):

        input, target = input, target.cuda()
        if not args.parallel:
            input = input.cuda()

        probs, value = agent(input)

        policy_map = probs.data.clone()

        policy_map[policy_map < 0.5] = 0.0
        policy_map[policy_map >= 0.5] = 1.0
        policy_map = Variable(policy_map)

        probs = probs * args.alpha + (1 - probs) * (1 - args.alpha)
        distr = Bernoulli(probs)
        policy = distr.sample()

        if args.cl_step < num_blocks:
            policy[:, :-args.cl_step] = 1
            policy_map[:, :-args.cl_step] = 1

            policy_mask = Variable(torch.ones(input.size(0),
                                              policy.size(1))).cuda()
            policy_mask[:, :-args.cl_step] = 0
        else:
            policy_mask = None

        seg_map = torch.sigmoid(seg_model.forward(input, policy_map))
        seg_sample = torch.sigmoid(seg_model.forward(input, policy))

        seg_map = F.interpolate(seg_map,
                                size=(target.size(1), target.size(2),
                                      target.size(3)),
                                mode="trilinear",
                                align_corners=True)
        seg_sample = F.interpolate(seg_sample,
                                   size=(target.size(1), target.size(2),
                                         target.size(3)),
                                   mode="trilinear",
                                   align_corners=True)

        reward_map, _ = get_reward(seg_map, target, policy_map.data)
        reward_sample, dice = get_reward(seg_sample, target, policy.data)

        advantage = reward_sample - reward_map

        loss = -distr.log_prob(policy)
        loss = loss * advantage.expand_as(policy)

        if policy_mask is not None:
            loss = policy_mask * loss  # mask for curriculum learning

        loss = loss.sum()

        probs = probs.clamp(1e-15, 1 - 1e-15)
        entropy_loss = -probs * torch.log(probs)
        entropy_loss = args.beta * entropy_loss.sum()

        loss = (loss - entropy_loss) / input.size(0)

        # ---------------------------------------------------------------------#

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        dices.append(dice.cpu())
        rewards.append(reward_sample.cpu())
        policies.append(policy.data.cpu())

    dice, reward, sparsity, variance, policy_set, policy_list = utils.performance_stats(
        policies, rewards, dices)

    log_str = 'TRAIN - E: %d | D: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d' % (
        epoch, dice, reward, sparsity, variance, len(policy_set))
    print(log_str)
    print("policy_list:", policy_list)

    writer.add_scalar('train_dice', dice, epoch)
    writer.add_scalar('train_reward', reward, epoch)
    writer.add_scalar('train_sparsity', sparsity, epoch)
    writer.add_scalar('train_variance', variance, epoch)
    writer.add_scalar('train_unique_policies', len(policy_set), epoch)
def train(epoch):

    agent.train()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, locations, targets) in enumerate(trainloader):

        inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        locations = Variable(locations)
        if not args.parallel:
            inputs = inputs.cuda()
            locations = locations.cuda()

        #probs, value = agent(inputs)
        probs = agent(inputs, locations)

        #---------------------------------------------------------------------#

        policy_map = probs.data.clone()
        policy_map[policy_map < 0.5] = 0.0
        policy_map[policy_map >= 0.5] = 1.0
        policy_map = Variable(policy_map)

        probs = probs * args.alpha + (1 - probs) * (1 - args.alpha)
        distr = Bernoulli(probs)
        policy = distr.sample()

        if args.cl_step < num_blocks:
            policy[:, :-args.cl_step] = 1
            policy_map[:, :-args.cl_step] = 1

            policy_mask = Variable(torch.ones(inputs.size(0),
                                              policy.size(1))).cuda()
            policy_mask[:, :-args.cl_step] = 0
        else:
            policy_mask = None

        v_inputs = Variable(inputs.data, volatile=True)
        preds_map = rnet.forward(v_inputs, policy_map)
        preds_sample = rnet.forward(v_inputs, policy)

        reward_map, _ = get_reward(preds_map, targets, policy_map.data)
        reward_sample, match = get_reward(preds_sample, targets, policy.data)

        advantage = reward_sample - reward_map

        loss = -distr.log_prob(policy)
        loss = loss * Variable(advantage).expand_as(policy)

        if policy_mask is not None:
            loss = policy_mask * loss  # mask for curriculum learning

        loss = loss.sum()

        probs = probs.clamp(1e-15, 1 - 1e-15)
        entropy_loss = -probs * torch.log(probs)
        entropy_loss = args.beta * entropy_loss.sum()

        loss = (loss - entropy_loss) / inputs.size(0)

        #---------------------------------------------------------------------#

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        policies.append(policy.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    log_str = 'E: %d | A: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d' % (
        epoch, accuracy, reward, sparsity, variance, len(policy_set))
    print log_str
def train(epoch):

    agent.train()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader),
                                                  total=len(trainloader)):

        noiseType = "gaussian"
        noiseType = "poisson"
        noiseType = "salt_pepper"
        showIamge(inputs[0:4].permute(0, 3, 2, 1), 1)
        noisy_inputs = addImageNoise(inputs.permute(0, 3, 2, 1), noiseType)
        showIamge(noisy_inputs[0:4].permute(0, 3, 2, 1), 2)

        targets_img = inputs.clone()
        inputs = noisy_inputs.clone()

        # inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        inputs, targets = Variable(inputs), Variable(targets)
        if not args.parallel:
            # inputs = inputs.cuda()
            inputs = inputs

        probs, value = agent(inputs)

        #---------------------------------------------------------------------#

        policy_map = probs.data.clone()
        policy_map[policy_map < 0.5] = 0.0
        policy_map[policy_map >= 0.5] = 1.0
        policy_map = Variable(policy_map)

        probs = probs * args.alpha + (1 - probs) * (1 - args.alpha)
        distr = Bernoulli(probs)
        policy = distr.sample()

        if args.cl_step < num_blocks:
            policy[:, :-args.cl_step] = 1
            policy_map[:, :-args.cl_step] = 1

            # policy_mask = Variable(torch.ones(inputs.size(0), policy.size(1))).cuda()
            policy_mask = Variable(torch.ones(inputs.size(0), policy.size(1)))
            policy_mask[:, :-args.cl_step] = 0
        else:
            policy_mask = None

        v_inputs = Variable(inputs.data, volatile=True)
        preds_map = rnet.forward(v_inputs, policy_map)
        preds_sample = rnet.forward(v_inputs, policy)

        reward_map, _ = get_reward(preds_map, targets, policy_map.data)
        reward_sample, match = get_reward(preds_sample, targets, policy.data)

        advantage = reward_sample - reward_map

        loss = -distr.log_prob(policy)
        loss = loss * Variable(advantage).expand_as(policy)

        if policy_mask is not None:
            loss = policy_mask * loss  # mask for curriculum learning

        loss = loss.sum()

        probs = probs.clamp(1e-15, 1 - 1e-15)
        entropy_loss = -probs * torch.log(probs)
        entropy_loss = args.beta * entropy_loss.sum()

        loss = (loss - entropy_loss) / inputs.size(0)

        #---------------------------------------------------------------------#

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        policies.append(policy.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(
        policies, rewards, matches)

    log_str = 'E: %d | A: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d' % (
        epoch, accuracy, reward, sparsity, variance, len(policy_set))
    print log_str

    log_value('train_accuracy', accuracy, epoch)
    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_unique_policies', len(policy_set), epoch)
예제 #14
0
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, Convolution2D, MaxPooling2D, BatchNormalization
import numpy as np
from tensorflow.keras.callbacks import TensorBoard
import pickle
import time

import utils

from collections import Counter 

from sklearn.metrics import classification_report

#build model
model_1 = utils.create_model(128, 3, 3)

#check performance
utils.performance_stats(model_1)