def train(epoch):
    net.train()
    net.training = True
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)

    for name, param in net.named_parameters():
        print(name, param.shape)
        if "param" not in name:
            param.register_hook(lambda grad: grad * 0)

    # for name, module in net.named_modules():
    #     module.register_hook(lambda grad: grad * 0)

    #net.module.conv1.weight.register_hook(lambda grad: grad * 0)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        #print(net.module.conv1.weight[1])
        # print(net.module.layer1[1].conv1.weight[1])
        # print(net.module.layer1[1].parameter)

        feature_ranks = {}
        for name, param in net.named_parameters():
            print(name, param.shape)
            if "param" in name:
                print(name)

                sorted_features = torch.argsort(param, descending=True)
                feature_ranks[name] = sorted_features
                print(sorted_features)
        np.save("ranks.npy", feature_ranks)

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset) // batch_size) + 1, loss.item(),
             100. * correct / total))
        sys.stdout.flush()
示例#2
0
def train(epoch):
    net.train()
    net.training = True
    train_loss = 0
    correct = 0
    total = 0

    optimizer =optim.SGD([
                {'params': param_core,'weight_decay': 5e-4},
                {'params': params_multi, 'weight_decay': 0.0}
            ], lr=cf.learning_rate(args.lr, epoch), momentum=0.9)

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs=tile(inputs,0,ensemble_size)
        targets=tile(targets,0,ensemble_size)
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        loss = criterion(outputs, targets)  + mu_div*loss_latent_from_nn(net)# Loss
        loss.backward()  # Backward Propagation
        optimizer.step() # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                %(epoch, num_epochs, batch_idx+1,
                    (len(trainset)//batch_size)+1, loss.item(), 100.*correct/total))
        sys.stdout.flush()
示例#3
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))

    for batch_idx, (inputs, targets, weights) in enumerate(api.train_loader):
        if use_cuda:
            inputs, targets, weights = inputs.cuda(), targets.cuda(
            ), weights.cuda()  # GPU settings

        optimizer.zero_grad()

        #print('data shapes ', 'inputs ', inputs.shape, 'targets ', targets.shape, 'weights ', weights.shape)

        inputs, targets = Variable(inputs), Variable(targets)

        outputs = net(inputs)  # Forward Propagation
        loss = api.loss_func(outputs, targets, weights)  # Loss

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    # cluster trajectory + reweight data
    if epoch >= args.burn_in and ((epoch - args.burn_in) % args.interval) == 0:
        api.clusterTrajectory()  # run gmm cluster
        api.reweightData(net, 1000000)  # update train_loader
        weight_his.append(
            np.expand_dims(api.weight_tensor.detach().numpy(),
                           axis=1).tolist())

    api.generateTrainLoader()

    train_loss = train_loss / total
    acc = 100. * correct.item() / total

    print('train loss\t\t', train_loss)
    print('correct\t\t', correct, '\t\ttotal\t\t', total)
    print('acc\t\t', acc)

    train_loss_his.append(train_loss)
    train_acc_his.append(acc)

    # record trajectory
    api.createTrajectory(net)

    print('| Epoch [%3d/%3d] \t\tLoss: %.4f Acc@1: %.3f%%' %
          (epoch, num_epochs, train_loss, 100. * correct / total))
示例#4
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset) // batch_size) + 1, loss.data[0],
             100. * correct.numpy() / total))
        sys.stdout.flush()
def train(net, dataloader, optimizer, epoch):
    criterion = nn.CrossEntropyLoss()
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    print('\n=> [%s] Training Epoch #%d, lr=%.4f' %
          (model_name, epoch, cf.learning_rate(args.lr, epoch)))
    log_file.write('\n=> [%s] Training Epoch #%d, lr=%.4f\n' %
                   (model_name, epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        # obtain soft_target by forwarding data in test mode
        if epoch >= args.distill_from and args.distill > 0:
            with torch.no_grad():
                net.eval()
                soft_target = net(inputs)

        net.train()
        optimizer.zero_grad()
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss

        # compute distillation loss
        if epoch >= args.distill_from and args.distill > 0:
            heat_output = outputs / args.temp
            heat_soft_target = soft_target / args.temp

            distill_loss = F.kl_div(
                F.log_softmax(heat_output, 1),
                F.softmax(heat_soft_target),
                size_average=False) / targets.size(0) * (args.temp * args.temp)
            loss = loss + args.distill * distill_loss

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.detach(), 1)
        total += targets.size(0)
        correct += predicted.eq(targets.detach()).long().sum().item()

        if math.isnan(loss.item()):
            print('@@@@@@@nan@@@@@@@@@@@@')
            log_file.write('@@@@@@@@@@@nan @@@@@@@@@@@@@\n')
            sys.exit(0)

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, args.num_epochs, batch_idx + 1,
             (len(trainset) // args.bs) + 1, loss.item(),
             100. * correct / total))
        sys.stdout.flush()
    log_file.write(
        '| Epoch [%3d/%3d] \t\tLoss: %.4f Acc@1: %.3f%%' %
        (epoch, args.num_epochs, loss.item(), 100. * correct / total))
示例#6
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if optim_type == 'SGD':
        optimizer = optim.SGD(net.parameters(),
                              lr=cf.learning_rate(args.lr, epoch),
                              momentum=0.9,
                              weight_decay=5e-4,
                              nesterov=True)
    elif optim_type == 'ADAM':
        optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=5e-4)
    else:
        raise AssertionError("Unknown optimizer name: {}".format(optim_type))

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
            net.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation
        loss1 = criterion(outputs, targets)  # Loss
        loss2, e_loss, v_loss, E_loss, V_loss = regularization(net)
        loss = loss1 + loss2

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum().float()

        acc = 100.0 * correct / total

        iter = (epoch - 1) * iters_in_epoch + batch_idx
        writer.add_scalar('train/accuracy', acc, iter)
        writer.add_scalar('train/loss1', loss1.item(), iter)
        writer.add_scalar('train/loss2', loss2.item(), iter)
        writer.add_scalar('train/loss2_e', e_loss.item(), iter)
        writer.add_scalar('train/loss2_v', v_loss.item(), iter)
        writer.add_scalar('train/loss', loss.item(), iter)
        scalar_to_tensorboard(E_loss, 'train/E_loss', writer, iter)
        scalar_to_tensorboard(V_loss, 'train/V_loss', writer, iter)
        histogram_to_tensorboard(net.e_net, 'train/e_net', writer, iter)
        histogram_to_tensorboard(net.v_net, 'train/v_net', writer, iter)

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset) // batch_size) + 1, loss.item(), acc))
        sys.stdout.flush()
示例#7
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.Adam(net.parameters(),
                           lr=cf.learning_rate(cf.lr, epoch),
                           weight_decay=cf.weight_decay)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(cf.lr, epoch))),
    m = math.ceil(len(testset) / cf.batch_size)
    for batch_idx, (inputs_value, targets) in enumerate(trainloader):
        # targets = torch.tensor(targets)
        x = inputs_value.view(-1, inputs, resize, resize)
        y = targets
        if use_cuda:
            x, y = x.cuda(), y.cuda()  # GPU settings

        if cf.beta_type is "Blundell":
            beta = 2**(m - (batch_idx + 1)) / (2**m - 1)
        elif cf.beta_type is "Soenderby":
            beta = min(epoch / (cf.num_epochs // 4), 1)
        elif cf.beta_type is "Standard":
            beta = 1 / m
        else:
            beta = 0
        # Forward Propagation
        x, y = Variable(x), Variable(y)
        outputs, kl = net.probforward(x)
        loss = vi(outputs, y, kl, beta)  # Loss
        optimizer.zero_grad()
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)

        total += targets.size(0)
        correct += predicted.eq(y.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, cf.num_epochs, batch_idx + 1,
             (len(trainset) // cf.batch_size) + 1, loss.item(),
             (100 * (correct.item() / total))))

        sys.stdout.flush()
    trainLoss.append(loss.item())
    trainAcc.append((100 * (correct.item() / total)))
    diagnostics_to_write = {
        'Epoch': epoch,
        'Loss': loss.item(),
        'Accuracy': (100 * (correct.item() / total))
    }
    with open(logfile, 'a') as lf:
        lf.write(str(diagnostics_to_write))
示例#8
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    temp1_accum = 0
    temp2_accum = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          nesterov=True,
                          momentum=0.9,
                          weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        temp1_accum = loss.detach().cpu() * (
            1. / (batch_idx + 1.)) + temp1_accum * (batch_idx /
                                                    (batch_idx + 1.))
        if args.loss == 'bce':
            loss = torch.zeros(1).cuda()
        if 'bce' in args.loss:
            bce_targets = target_transform_for_elementwise_bce(
                targets, num_classes).cuda()
            if num_classes > 30:
                new_outputs, new_targets = sampling_for_loss(outputs, targets)
                temp2 = criterion2(F.sigmoid(new_outputs), new_targets).cuda()
            else:
                temp2 = criterion2(F.sigmoid(outputs), bce_targets)
            temp2_accum = temp2.detach().cpu() * (
                1. / (batch_idx + 1.)) + temp2_accum * (batch_idx /
                                                        (batch_idx + 1.))

            loss += temp2
            loss = loss

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %4f Acc@1: %.3f%%'
            % (epoch, num_epochs, batch_idx + 1,
               (len(trainset) // batch_size) + 1, loss.item(), temp1_accum,
               temp2_accum, 100. * correct / total))
        sys.stdout.flush()
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    if (args.resume):
        params = net.module.linear.parameters()
    else:
        params = net.parameters()
    optimizer = optim.SGD(params,
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)
    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    if (use_noise):
        loader = trainloader_noise
    else:
        loader = trainloader_clean
    for batch_idx, (inputs_c, targets_c) in enumerate(loader):
        if use_cuda:
            inputs_c, targets_c = inputs_c.cuda(), targets_c.cuda()
        optimizer.zero_grad()
        if (sim_learning):
            (outputs, matrices_reg) = net(inputs_c, compute_similarity=True)
            (_, matrices_rob) = robustNet(inputs_c,
                                          img_type="clean",
                                          compute_similarity=True)

            loss_similarity = 0.
            for i, (r, g) in enumerate(zip(matrices_reg, matrices_rob)):

                sim_loss = get_sim_loss(i, r, g, 1e-4)
                loss_similarity = loss_similarity + sim_loss
            loss = criterion(outputs, targets_c) + loss_similarity  # Loss
        else:
            outputs = net(inputs_c, compute_similarity=False)
            loss = criterion(outputs, targets_c)

        loss.backward()
        optimizer.step()  # Optimizer update
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets_c.size(0)
        correct += predicted.eq(targets_c.data).cpu().sum()
        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\t Loss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset_noise) // batch_size) + 1, loss.item(),
             100. * correct / total))
        sys.stdout.flush()
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, ((inputs1, targets1), (inputs2, targets2)) in enumerate(
            zip(trainloader_noise, trainloader_clean)):

        if use_cuda:
            inputs1, targets1 = inputs1.cuda(), targets1.cuda()  # GPU settings
            inputs2, targets2 = inputs2.cuda(), targets2.cuda()
        optimizer.zero_grad()
        outputs_n = net(inputs1, img_type="noise", compute_similarity=False)
        l1 = criterion(outputs_n, targets1)
        #l1.backward(retain_graph=False)
        #optimizer.step()

        #optimizer.zero_grad()
        outputs_c = net(inputs2, img_type="clean", compute_similarity=False)
        l2 = criterion(outputs_c, targets2)
        #l2.backward(retain_graph=False)
        #optimizer.step()

        #optimizer.zero_grad()
        l3 = w_loss(outputs_n, outputs_c)
        readout_losses.append(l3.item())
        #l3.backward(retain_graph=False)
        #optimizer.step() # Optimizer update

        loss = l1 + l2 + l3
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs_c.data, 1)
        total += targets2.size(0)
        correct += predicted.eq(targets2.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\t Total Loss: %.4f Acc@1: %.3f%%'
            % (epoch, num_epochs, batch_idx + 1,
               (len(trainset_noise) // batch_size) + 1, loss.item(),
               100. * correct / total))
        sys.stdout.flush()
示例#11
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs_value, targets) in enumerate(trainloader):
        if use_cuda:
            inputs_value, targets = inputs_value.cuda(), targets.cuda() # GPU settings
        optimizer.zero_grad()
        inputs_value, targets = Variable(inputs_value), Variable(targets)
        outputs = net.forward(inputs_value)               # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step() # Optimizer update

        train_loss += loss.data
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Cor@1: %.3f%% \tTotal%.3f%%'
                %(epoch, num_epochs, batch_idx+1,
                    (len(trainset)//batch_size)+1, loss.data, correct,total))
        sys.stdout.flush()
    diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data, 'Accuracy': 100*correct / total}
    with open(logfile, 'a') as lf:
        lf.write(str(diagnostics_to_write))
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()

        #print('data shapes ', 'inputs ', inputs.shape, 'targets ', targets.shape)

        inputs, targets = Variable(inputs), Variable(targets)

        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    train_loss = train_loss / total
    acc = 100. * correct.item() / total
    print('train loss\t\t', train_loss)
    print('correct\t\t', correct, '\t\ttotal\t\t', total)
    print('acc\t\t', acc)

    train_loss_his.append(train_loss)
    train_acc_his.append(acc)

    #sys.stdout.write('\r')
    #sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]'
    #        %(epoch, num_epochs, batch_idx+1,
    #            (len(trainset)//batch_size)+1))
    #sys.stdout.flush()
    print('| Epoch [%3d/%3d] \t\tLoss: %.4f Acc@1: %.3f%%' %
          (epoch, num_epochs, train_loss, acc))
示例#13
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch,
           cf.learning_rate(args.lr * batch_size, epoch, args.warmup_epoch, 0,
                            len(trainloader), hvd.size())))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        lr = cf.learning_rate(args.lr * batch_size, epoch, args.warmup_epoch,
                              batch_idx, len(trainloader), hvd.size())
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        print(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%% LR: %.8f'
            % (epoch, num_epochs, batch_idx + 1,
               (len(trainset) // batch_size) + 1, loss.data.item(),
               100. * correct / total, lr))
    if hvd.rank() == 0:
        save_dict = {
            "epoch": epoch,
            "optimizer": optimizer.state_dict(),
            "state_dict": net.state_dict()
        }
        torch.save(
            save_dict,
            os.path.join(
                '/home/lunit/Pytorch-Horovod-Examples/examples/cifar100/checkpoints/',
                'cifar100_last.pth.tar'))
示例#14
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    num_classes = args.num_classes
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          nesterov=True,
                          momentum=0.9,
                          weight_decay=5e-4)
    # optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(args.lr, epoch), betas=(0.5,0.999), weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, _) in enumerate(out_testloader):
        if use_cuda:
            inputs = inputs.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs = Variable(inputs)
        outputs = net(inputs)  # Forward Propagation

        # targets = torch.ones_like(outputs[:,-1]).cuda()
        targets = torch.zeros_like(outputs).cuda()
        targets[:, -1] = 1

        loss = F.binary_cross_entropy_with_logits(outputs, targets)
        loss.backward()

        optimizer.step()

        max_logit, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(10).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(out_testset) // batch_size) + 1, loss.item(),
             float(100.00 * float(correct) / float(total))))
        sys.stdout.flush()
示例#15
0
def train(epoch):
    net.train()
    net.training = True
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(
        net.parameters(),
        lr=cf.learning_rate(args.lr, epoch),
        momentum=0.9,
        weight_decay=5e-4,
    )

    logbook.write_message(
        f"Training Epoch {epoch}, LR {cf.learning_rate(args.lr, epoch)}"
    )
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        # sys.stdout.write("\r")
        # sys.stdout.write(
        #     "| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%"
        #     % (
        #         epoch,
        #         num_epochs,
        #         batch_idx + 1,
        #         (len(trainset) // batch_size) + 1,
        #         loss.item(),
        #         100.0 * correct / total,
        #     )
        # )
        logbook.write_metric(
            {
                "epoch": epoch,
                "iter": batch_idx + 1,
                "loss": loss.item(),
                "acc@1": 100.0 * correct.item() / total,
                "mode": "train",
            }
        )
示例#16
0
def train(epoch):
    model.train()
    train_loss = 0
    correct = 0

    optimizer = optim.SGD(model.parameters(),
                          lr=config.learning_rate(0.1, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)
    #optimizer = optim.SGD(model.parameters(), lr=0.1*0.0008, momentum=0.9, weight_decay=5e-4)

    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()

        data, target = Variable(data), Variable(target)
        if batch_idx == 0:
            torch.save(data, './data.pkl')
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            log_value('loss', loss, 391 * (epoch - 1) + batch_idx)

        # sum up batch loss
        train_loss += criterion(output, target).item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    if epoch % 20 == 0:
        if torch.cuda.device_count() > 1:
            torch.save(model.module.state_dict(), OUTPATH + str(epoch))

    train_loss = train_loss / (len(train_loader.dataset) // BATCH)
    print('\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
          format(train_loss, correct, len(train_loader.dataset),
                 100. * correct / len(train_loader.dataset)))
    log_value('train_acc', 100. * correct / len(train_loader.dataset), epoch)
示例#17
0
def train(epoch, global_step):
    print('Epoch {:3d} {:3.2f}'.format(epoch, 0), end='')
    for batch in range(len(labels) // config.batch_size()):
        L = batch * config.batch_size()
        R = L + config.batch_size()
        mini_batch_images = images[:, L:R, :]
        mini_batch_labels = labels[L:R]
        feed_dict = {
            input_images: mini_batch_images,
            input_labels: mini_batch_labels,
            learning_rate: config.learning_rate(epoch=epoch, steps=global_step)
        }
        session.run(optimizer, feed_dict=feed_dict)
        # grads = tape.gradient(loss, model.trainable_variables)
        # optimizer.apply_gradients(zip(grads, model.trainable_variables), global_step=tf.train.get_or_create_global_step())
        global_step += 1
        print('\rEpoch {:3d} {:3.2f}'.format(epoch, L * 100.0 / len(labels)),
              end='')
    print('\rEpoch {:3d} {:3.2f}'.format(epoch, 100.0), end='')
示例#18
0
def mlp_run(experiment_name, operand_bits, operator, hidden_units,
            str_device_num, nn_model_type, tlu_on):
    def train(sess, batch_input, batch_target, float_epoch, all_correct_val):
        _, _, _ = sess.run(
            [loss, op_accuracy, train_op],
            feed_dict={
                inputs: batch_input,
                targets: batch_target,
                condition_tlu: False,
                training_epoch: float_epoch,
                big_batch_training: big_batch_training_val,
                all_correct_epoch: (all_correct_val * float_epoch),
                all_correct: all_correct_val
            })

    def write_train_summary(sess, compute_nodes, batch_input, batch_target,
                            float_epoch, all_correct_val, step):
        # Run computing train loss, accuracy
        train_loss, train_accuracy, merged_summary_op_val = sess.run(
            compute_nodes,
            feed_dict={
                inputs: batch_input,
                targets: batch_target,
                condition_tlu: False,
                training_epoch: float_epoch,
                big_batch_training: big_batch_training_val,
                all_correct_epoch: (all_correct_val * float_epoch),
                all_correct: all_correct_val
            })

        ##print("epoch: {}, step: {}, train_loss: {}, train_accuracy: {}".format(epoch, step, train_loss, train_accuracy))
        #train_summary_writer.add_summary(merged_summary_op_val, step)

        return (train_loss, train_accuracy)

    def write_dev_summary(sess, compute_nodes, float_epoch, all_correct_val,
                          step):

        dev_loss, dev_accuracy, merged_summary_op_val, dev_op_wrong_val, per_digit_accuracy_val, per_digit_wrong_val = sess.run(
            compute_nodes,
            feed_dict={
                inputs: input_dev,
                targets: target_dev,
                condition_tlu: False,
                training_epoch: float_epoch,
                big_batch_training: big_batch_training_val,
                all_correct_epoch: (all_correct_val * float_epoch),
                all_correct: all_correct_val
            })

        ##print("└ epoch: {}, step: {}, dev_loss: {}, dev_accuracy: {}, op_wrong: {}".format(epoch, step, dev_loss, dev_accuracy, op_wrong_val))
        #dev_summary_writer.add_summary(merged_summary_op_val, step)

        return (dev_loss, dev_accuracy, dev_op_wrong_val,
                per_digit_accuracy_val, per_digit_wrong_val)

    def write_tlu_dev_summary(sess, compute_nodes, float_epoch,
                              all_correct_val, step):
        dev_loss_tlu, dev_accuracy_tlu, merged_summary_op_val, dev_op_wrong_val_tlu, _, _ = sess.run(
            compute_nodes,
            feed_dict={
                inputs: input_dev,
                targets: target_dev,
                condition_tlu: True,
                training_epoch: float_epoch,
                big_batch_training: big_batch_training_val,
                all_correct_epoch: (all_correct_val * float_epoch),
                all_correct: all_correct_val
            })

        ##print("└ [TLU] epoch: {}, step: {}, dev_loss: {}, dev_accuracy: {}, op_wrong: {}".format(epoch, step, dev_loss_tlu, dev_accuracy_tlu, op_wrong_val_tlu))
        #tlu_summary_writer.add_summary(merged_summary_op_val, step)

        return (dev_loss_tlu, dev_accuracy_tlu, dev_op_wrong_val_tlu)

    def write_test_summary(sess, compute_nodes, float_epoch, all_correct_val,
                           step):
        test_loss, test_accuracy, merged_summary_op_val, op_wrong_val = sess.run(
            compute_nodes,
            feed_dict={
                inputs: input_test,
                targets: target_test,
                condition_tlu: False,
                training_epoch: float_epoch,
                big_batch_training: big_batch_training_val,
                all_correct_epoch: (all_correct_val * float_epoch),
                all_correct: all_correct_val
            })
        print(
            "└ epoch: {}, step: {}, test_loss: {}, test_accuracy: {}, op_wrong: {}"
            .format(epoch, step, test_loss, test_accuracy, op_wrong_val))
        #test_summary_writer.add_summary(merged_summary_op_val, step)

        return (test_loss, test_accuracy, op_wrong_val)

    def write_carry_datasets_summary(sess, compute_nodes, float_epoch,
                                     all_correct_val, step):
        value_dict = dict()
        for n_carries in carry_datasets.keys():
            carry_dataset_input = carry_datasets[n_carries]['input']
            carry_dataset_output = carry_datasets[n_carries]['output']

            carry_loss_val, carry_accuracy_val, merged_summary_op_val, carry_op_wrong_val, carry_per_digit_accuracy_val, carry_per_digit_wrong_val = sess.run(
                compute_nodes,
                feed_dict={
                    inputs: carry_dataset_input,
                    targets: carry_dataset_output,
                    condition_tlu: False,
                    training_epoch: float_epoch,
                    big_batch_training: big_batch_training_val,
                    all_correct_epoch: (all_correct_val * float_epoch),
                    all_correct: all_correct_val
                })

            value_dict[n_carries] = (carry_loss_val, carry_accuracy_val,
                                     carry_op_wrong_val,
                                     carry_per_digit_accuracy_val,
                                     carry_per_digit_wrong_val)
            #carry_datasets_summary_writers[n_carries].add_summary(merged_summary_op_val, step)

        return value_dict

    def write_embeddings_summary(sess, h1):
        # Reference: https://stackoverflow.com/questions/40849116/how-to-use-tensorboard-embedding-projector
        dir_logs = os.path.join(config.dir_saved_models(), experiment_name)
        metadata = os.path.join(dir_logs, 'metadata.tsv')
        carry_datasets = data_utils.import_carry_datasets(
            operand_bits, operator)
        input_arrays = list()
        with open(metadata, 'w') as f:
            for carries in carry_datasets.keys():
                input_arrays.append(carry_datasets[carries]['input'])
                f.write('{}\n'.format(carries))

        carry_inputs = np.concatenate(input_arrays, axis=0)

        [h1_val] = sess.run([h1],
                            feed_dict={
                                inputs: carry_inputs,
                                condition_tlu: False
                            })

        h1_var = tf.Variable(h1_val, name='h1_var')
        saver = tf.train.Saver([h1_var])
        sess.run(h1_var.initializer)
        saver.save(sess, os.path.join(dir_logs, 'h1_var.ckpt'))

        pconfig = projector.ProjectorConfig()
        pconfig.model_checkpoint_path = os.path.join(dir_logs, 'h1_var.ckpt')
        embedding = pconfig.embeddings.add()
        embedding.tensor_name = h1_var.name
        embedding.metadata_path = metadata
        projector.visualize_embeddings(tf.summary.FileWriter(dir_logs),
                                       pconfig)

    def create_carry_datasets_summary_writers(logdir, carry_datasets):
        carry_datasets_summary_writers = dict()
        for n_carries in carry_datasets.keys():
            carry_datasets_summary_writers[n_carries] = tf.summary.FileWriter(
                logdir + '/carry-{}'.format(n_carries))
        return carry_datasets_summary_writers

    def close_carry_datasets_summary_writers(carry_datasets_summary_writers):
        for n_carries in carry_datasets_summary_writers.keys():
            carry_datasets_summary_writers[n_carries].close()

    def get_all_correct_val(op_wrong_val):
        if op_wrong_val == 0:
            return True
        else:
            return False

    def is_last_batch(i_batch):
        if i_batch == (n_batch - 1):
            return True
        else:
            return False

    def decrease_dev_summary_period(dev_accuracy_val, op_wrong_val):
        # Preconditions
        if not decreasing_dev_summary_period:
            return
        if dev_accuracy_val < 0.999:
            return

        # If the preconditions are satisfied, ...
        if op_wrong_val <= 8:
            dev_summary_period = int(init_dev_summary_period // 128)
        elif op_wrong_val <= 16:
            dev_summary_period = int(init_dev_summary_period // 64)
        if op_wrong_val <= 32:
            dev_summary_period = int(init_dev_summary_period // 32)
        elif op_wrong_val <= 64:
            dev_summary_period = int(init_dev_summary_period // 16)
        elif op_wrong_val <= 128:
            dev_summary_period = int(init_dev_summary_period // 8)

        if op_wrong_val > 512:
            dev_summary_period = init_dev_summary_period

    ############################################################################
    # Running point.

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = str_device_num  # 0, 1
    os.environ[
        'TF_CPP_MIN_LOG_LEVEL'] = '3'  # Disable all debugging logs: Unable to display GPU info when running on the bash

    # Import datasets
    (train_ratio, dev_ratio, test_ratio) = config.dataset_ratio()
    (input_train, input_dev, input_test, target_train, target_dev,
     target_test) = data_utils.import_op_dataset(operator,
                                                 operand_bits,
                                                 train_ratio=train_ratio,
                                                 dev_ratio=dev_ratio,
                                                 test_ratio=test_ratio)

    if operator in config.operators_list():
        carry_datasets = data_utils.import_carry_datasets(
            operand_bits, operator)

    # If the training dataset takes all examples, then the dev and test datasets are the same as the training one.
    if dev_ratio == 0.0 and test_ratio == 0.0:
        input_dev = input_train
        target_dev = target_train
        input_test = input_train
        target_test = target_train
    if dev_ratio == 0.0 and test_ratio != 0.0:
        input_dev = input_test
        target_dev = target_test

    # Contants
    NN_INPUT_DIM = input_train.shape[1]
    NN_OUTPUT_DIM = target_train.shape[1]

    # Hyperparameters - training
    batch_size = config.batch_size()
    big_batch_size = config.big_batch_size()
    n_epoch = config.n_epoch()
    learning_rate = config.learning_rate()
    all_correct_stop = config.all_correct_stop()
    big_batch_saturation = config.big_batch_saturation()
    if big_batch_saturation:
        all_correct_stop = False

    # Hyperparameters - model
    activation = config.activation()  # tf.nn.sigmoid, tf.nn.tanh, tf.nn.relu
    str_activation = utils.get_str_activation(activation)
    h_layer_dims = [hidden_units]  # h_layer_dims[0]: dim of h1 layer
    last_size = NN_OUTPUT_DIM

    # Variables determined by other variables
    train_size = input_train.shape[0]
    n_batch = train_size // batch_size

    # Print periods
    train_summary_period = n_batch // 4  # 4 times per epoch
    init_dev_summary_period = n_batch  # n_batch: print at every epoch
    dev_summary_period = init_dev_summary_period
    decreasing_dev_summary_period = config.decreasing_dev_summary_period()

    # Weight initialization
    ## https://www.tensorflow.org/api_docs/python/tf/contrib/layers/variance_scaling_initializer
    if activation == tf.nn.relu:
        init_factor = 2.0
    if activation == tf.nn.sigmoid:
        init_factor = 1.0
    if activation == tf.nn.tanh:
        init_factor = 1.0

    fan_in_1 = NN_INPUT_DIM
    fan_in_2 = h_layer_dims[0]

    ############################################################################
    # Creating a computational graph.

    # Initializing paraters to learn.
    with tf.name_scope('parameter'):
        W1 = tf.Variable(tf.truncated_normal(
            (NN_INPUT_DIM, h_layer_dims[0]),
            stddev=np.sqrt(init_factor / fan_in_1)),
                         name="W1")
        b1 = tf.Variable(tf.zeros((h_layer_dims[0])), name="b1")
        W2 = tf.Variable(tf.truncated_normal(
            (h_layer_dims[0], NN_OUTPUT_DIM),
            stddev=np.sqrt(init_factor / fan_in_2)),
                         name="W2")
        b2 = tf.Variable(tf.zeros((NN_OUTPUT_DIM)), name="b2")

    # Setting the input and target output.
    inputs = tf.placeholder(tf.float32,
                            shape=(None, input_train.shape[1]),
                            name='inputs')  # None for mini-batch size
    targets = tf.placeholder(tf.float32,
                             shape=(None, target_train.shape[1]),
                             name='targets')

    condition_tlu = tf.placeholder(tf.int32, shape=(), name="tlu_condition")
    is_tlu_hidden = tf.greater(condition_tlu, tf.constant(0, tf.int32))
    #is_tlu_hidden = tf.constant(condition_tlu == True, dtype=tf.bool) # https://github.com/pkmital/tensorflow_tutorials/issues/36

    # NN structure
    with tf.name_scope('layer1'):
        h1_logits = tf.add(tf.matmul(inputs, W1), b1)
        h1 = tf.cond(
            is_tlu_hidden, lambda: utils.tf_tlu(h1_logits, name='h1_tlu'),
            lambda: activation(h1_logits, name='h1')
        )  # https://stackoverflow.com/questions/35833011/how-to-add-if-condition-in-a-tensorflow-graph / https://www.tensorflow.org/versions/r1.7/api_docs/python/tf/cond
    with tf.name_scope('layer2'):
        last_logits = tf.add(tf.matmul(h1, W2), b2)
        sigmoid_outputs = tf.sigmoid(last_logits)
    predictions = utils.tf_tlu(sigmoid_outputs, name='predictions')

    # Loss: objective function
    with tf.name_scope('loss'):
        loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=targets, logits=last_logits
        )  # https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
        loss = tf.reduce_mean(loss)
        if config.l1_coef() != 0:
            loss = loss \
                + config.l1_coef() / (2 * batch_size) * (tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2)))
            #    + config.l1_coef() / (2 * batch_size) * (tf.reduce_sum(tf.abs(tf.abs(W1) - 1)) + tf.reduce_sum(tf.abs(tf.abs(W2) - 1)))
        if config.l2_coef() != 0:
            loss = loss \
                + config.l2_coef() / (2 * batch_size) * (tf.reduce_sum(tf.square(W1)) + tf.reduce_sum(tf.square(W2)))

    # Get measures:
    # [1] operation measures (accuracy, n_wrong, n_correct)
    # [2] mean digits accuracy (mean_digits_accuracy)
    # [3] per digit accuracy (per_digit_accuracy)
    (op_accuracy, op_wrong, op_correct, digits_mean_accuracy,
     digits_mean_wrong, digits_mean_correct, per_digit_accuracy,
     per_digit_wrong,
     per_digit_correct) = utils.get_measures(targets, predictions)

    # Training, optimization
    train_op = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(loss)
    init = tf.global_variables_initializer()

    training_epoch = tf.placeholder(tf.float32, shape=None)
    all_correct_epoch = tf.placeholder(tf.float32, shape=None)
    big_batch_training = tf.placeholder(tf.int32, shape=None)
    all_correct = tf.placeholder(tf.int32, shape=None)

    # Summary: Scalar
    ## Measures
    tf.summary.scalar('loss', loss)

    with tf.name_scope('operation'):
        tf.summary.scalar('accuracy', op_accuracy)
        tf.summary.scalar('wrong', op_wrong)

    with tf.name_scope('digits'):
        tf.summary.scalar('mean_accuracy', digits_mean_accuracy)
        tf.summary.scalar('mean_wrong', digits_mean_wrong)

    with tf.name_scope('per_digit'):
        for i in range(NN_OUTPUT_DIM):
            tf.summary.scalar('digit-{}/accuracy'.format(i + 1),
                              per_digit_accuracy[-(i + 1)])
            tf.summary.scalar('digit-{}/wrong'.format(i + 1),
                              per_digit_wrong[-(i + 1)])
            # add per_digit_correct

    tf.summary.scalar('epoch', training_epoch)
    tf.summary.scalar('all_correct_epoch', all_correct_epoch)
    tf.summary.scalar('big_batch_training', big_batch_training)
    tf.summary.scalar('all_correct', all_correct)
    tf.summary.scalar('condition_tlu', condition_tlu)

    # Summary: Histogram
    with tf.name_scope('layer1'):
        tf.summary.histogram('weight', W1)
        tf.summary.histogram('bias', b1)
        tf.summary.histogram('activation', h1)
    with tf.name_scope('layer2'):
        tf.summary.histogram('weight', W2)
        tf.summary.histogram('bias', b2)
        tf.summary.histogram('activation', sigmoid_outputs)

    # Merge summary operations
    merged_summary_op = tf.summary.merge_all()

    run_info = utils.init_run_info(NN_OUTPUT_DIM)

    # Experiment info
    run_info['experiment_name'] = experiment_name

    # Problem info
    run_info['operator'] = operator
    run_info['operand_bits'] = operand_bits
    run_info['result_bits'] = target_train.shape[1]

    # Network info
    run_info['network_input_dimension'] = input_train.shape[1]
    run_info['network_output_dimension'] = target_train.shape[1]
    run_info['hidden_activation'] = str_activation
    run_info['hidden_dimensions'] = h_layer_dims

    # Dataset info
    run_info['train_set_size'] = input_train.shape[0]
    run_info['dev_set_size'] = input_dev.shape[0]
    run_info['test_set_size'] = input_test.shape[0]

    # Optimizer info
    run_info['batch_size'] = batch_size
    run_info['optimizer'] = train_op.name
    run_info['learning_rate'] = learning_rate
    run_info['all_correct_stop'] = all_correct_stop

    run_id = datetime.now().strftime('%Y%m%d%H%M%S')
    run_info['run_id'] = run_id

    # Train logging
    logdir = '{}/{}/{}_{}bit_{}_{}_h{}_run-{}/'.format(
        config.dir_logs(), experiment_name, operator, operand_bits,
        nn_model_type, str_activation, h_layer_dims, run_id)

    #train_summary_writer = tf.summary.FileWriter(logdir + '/train', graph=tf.get_default_graph())
    #dev_summary_writer = tf.summary.FileWriter(logdir + '/dev')
    #if tlu_on:
    #    tlu_summary_writer = tf.summary.FileWriter(logdir + '/tlu')
    #test_summary_writer = tf.summary.FileWriter(logdir + '/test')
    #if operator in config.operators_list():
    #    carry_datasets_summary_writers = create_carry_datasets_summary_writers(logdir, carry_datasets)

    # Model saving
    #dir_saved_model = '{}/{}/{}_{}bit_{}_{}_h{}/run-{}/'.format(
    #    config.dir_saved_models(), experiment_name, operator, operand_bits, nn_model_type, str_activation, h_layer_dims, run_id)
    #utils.create_dir(dir_saved_model)

    #model_saver = tf.train.Saver()
    #init_all_correct_model_saver = tf.train.Saver()

    # Compute nodes
    train_compute_nodes = [loss, op_accuracy, merged_summary_op]
    dev_compute_nodes = [
        loss, op_accuracy, merged_summary_op, op_wrong, per_digit_accuracy,
        per_digit_wrong
    ]
    test_compute_nodes = [loss, op_accuracy, merged_summary_op, op_wrong]

    # Session configuration
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    print("Run ID: {}".format(run_id))
    print(logdir)
    #print(dir_saved_model)

    with tf.Session(config=tf_config) as sess:
        sess.run(init)

        float_epoch = 0.0
        all_correct_val = False
        big_batch_training_val = False
        init_all_correct_model_saved = False

        for epoch in range(n_epoch):
            input_train, target_train = utils.shuffle_np_arrays(
                input_train, target_train)

            if big_batch_saturation and all_correct_val:
                big_batch_training_val = True
                batch_size = big_batch_size

            for i_batch in range(n_batch):
                # Get mini-batch
                batch_input, batch_target = utils.get_batch(
                    i_batch, batch_size, input_train, target_train)

                # Initial state evalutation: No training
                if epoch == 0 and i_batch == 0:
                    step = 0
                    float_epoch = 0.0

                    write_train_summary(sess, train_compute_nodes, batch_input,
                                        batch_target, float_epoch,
                                        all_correct_val, step)
                    write_dev_summary(sess, dev_compute_nodes, float_epoch,
                                      all_correct_val, step)
                    if tlu_on:
                        write_tlu_dev_summary(sess, dev_compute_nodes,
                                              float_epoch, all_correct_val,
                                              step)

                # Set step, float_epoch
                ## 1 <= (i_batch + 1) <= n_batch
                step = n_batch * epoch + (i_batch + 1)
                float_epoch = epoch + float(i_batch + 1) / n_batch

                # Training operation ##################################################################
                train(sess, batch_input, batch_target, float_epoch,
                      all_correct_val)

                # training set summary writer###########################################################
                if step % train_summary_period == 0:
                    (train_loss, train_accuracy) = write_train_summary(
                        sess, train_compute_nodes, batch_input, batch_target,
                        float_epoch, all_correct_val, step)

                # Development loss evalution
                # After dev_summary_period batches are trained
                if (step % dev_summary_period == 0) or is_last_batch(i_batch):
                    # dev set summary writer#############################################################
                    dev_run_outputs = (
                        dev_loss_val, dev_accuracy_val, dev_op_wrong_val,
                        per_digit_accuracy_val,
                        per_digit_wrong_val) = write_dev_summary(
                            sess, dev_compute_nodes, float_epoch,
                            all_correct_val, step)

                    # carry datasets summary writer #####################################################
                    if operator in config.operators_list():
                        carry_run_outputs = write_carry_datasets_summary(
                            sess, dev_compute_nodes, float_epoch,
                            all_correct_val, step)

                    # TLU-dev summary writer#############################################################
                    # tlu_on
                    if tlu_on:
                        dev_tlu_run_outputs = (
                            dev_loss_tlu_val, dev_accuracy_tlu_val,
                            dev_op_wrong_tlu_val) = write_tlu_dev_summary(
                                sess, dev_compute_nodes, float_epoch,
                                all_correct_val, step)
                    else:
                        dev_tlu_run_outputs = None

                    # Write running information################################
                    if operator in config.operators_list():
                        run_info = utils.write_run_info(
                            run_info, float_epoch, dev_run_outputs,
                            dev_tlu_run_outputs, carry_run_outputs)
                    else:
                        run_info = utils.write_run_info(
                            run_info, float_epoch, dev_run_outputs,
                            dev_tlu_run_outputs)

                    # Write the logs of measures################################
                    #utils.write_measures(run_info, float_epoch,
                    #                        dev_run_outputs, dev_tlu_run_outputs)

                    #if is_last_batch(i_batch):
                    # After one epoch is trained
                    # Save the trained model ################################################
                    #model_saver.save(sess, '{}/dev-{}.ckpt'.format(dir_saved_model, run_id))
                    ##print("Model saved.")
                    # decrease_dev_summary_period

                    decrease_dev_summary_period(dev_accuracy_val,
                                                dev_op_wrong_val)

                    # If there is no wrong operation, then ...
                    all_correct_val = get_all_correct_val(dev_op_wrong_val)

                    # If the model is  trained with 100% accuracy,
                    if all_correct_val and (not init_all_correct_model_saved):
                        # Save the model.
                        model_name = 'epoch{}-batch{}'.format(
                            float_epoch, i_batch)
                        #init_all_correct_model_saver.save(sess, '{}/{}-init-all-correct.ckpt'.format(
                        #    dir_saved_model, model_name))
                        #write_embeddings_summary(sess, h1)
                        init_all_correct_model_saved = True

                    if all_correct_val and all_correct_stop:
                        break  # Break the batch for-loop

            # End of one epoch
            if all_correct_val and all_correct_stop:
                break  # Break the epoch for-loop

        # End of all epochs

        # Test loss evalution
        # Run computing test loss, accuracy
        # test set summary writer#############################################################
        (test_loss, test_accuracy,
         test_op_wrong_val) = write_test_summary(sess, test_compute_nodes,
                                                 float_epoch, all_correct_val,
                                                 step)

        #model_saver.save(sess, '{}/{}.ckpt'.format(dir_saved_model, run_id))
        print("Model saved.")

    # Write running information################################
    if operator in config.operators_list():
        run_info = utils.write_run_info(run_info,
                                        float_epoch,
                                        dev_run_outputs,
                                        dev_tlu_run_outputs,
                                        carry_run_outputs,
                                        final=True)
    else:
        run_info = utils.write_run_info(run_info,
                                        float_epoch,
                                        dev_run_outputs,
                                        dev_tlu_run_outputs,
                                        final=True)

    #train_summary_writer.close()
    #dev_summary_writer.close()
    #if tlu_on:
    #    tlu_summary_writer.close()
    #test_summary_writer.close()
    #if operator in config.operators_list():
    #    close_carry_datasets_summary_writers(carry_datasets_summary_writers)

    print("The training is over.")
示例#19
0
    def train(self, epoch):
        self.net.train()
        total_train_loss = 0
        total_num = 0
        total_train_correct = 0

        cur_lr = cf.learning_rate(self.lr, epoch)
        self.optimizer = optim.SGD(self.net.parameters(),
                                   lr=cur_lr,
                                   momentum=0.9,
                                   weight_decay=5e-4)

        if self.show_log:
            print('\n=> Training Epoch #%d, LR=%.4f' % (epoch, cur_lr))

        self.train_batch_loss_list = []
        self.train_batch_acc_list = []
        for batch_idx, (inputs, targets) in enumerate(self.train_loader):
            # inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = inputs.half().cuda(), targets.cuda()

            self.optimizer.zero_grad()
            outputs = self.net(inputs)  # Forward Propagation
            loss = self.criterion(outputs, targets)  # Loss
            loss.backward()  # Backward Propagation
            self.optimizer.step()  # Optimizer update

            # loss
            train_loss = loss.item()
            total_train_loss += train_loss
            self.train_batch_loss_list.append(train_loss)

            # accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_num += targets.size(0)
            train_correct = predicted.eq(targets.data).cpu().sum().item()
            total_train_correct += train_correct
            train_acc = train_correct / targets.size(0)
            self.train_batch_acc_list.append(train_acc)

            # update visdom
            if self.show_vis:
                self.vis.line(np.array(self.train_batch_loss_list),
                              X=np.arange(len(self.train_batch_loss_list)),
                              win='train_batch_loss',
                              opts={'title': 'train_batch_loss'})
                self.vis.line(np.array(self.train_batch_acc_list),
                              X=np.arange(len(self.train_batch_acc_list)),
                              win='train_batch_acc',
                              opts={'title': 'train_batch_acc'})

            # update output
            if self.show_log:
                sys.stdout.write('\r')
                sys.stdout.write(
                    '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                    % (epoch, self.num_epochs, batch_idx + 1,
                       (len(self.train_set) // self.batch_size) + 1,
                       train_loss, 100 * train_acc))
                sys.stdout.flush()

        # total loss and accuracy
        epoch_average_train_loss = total_train_loss / len(self.train_loader)
        epoch_average_train_acc = total_train_correct / total_num

        self.train_epoch_loss_list.append(epoch_average_train_loss)
        self.train_epoch_acc_list.append(epoch_average_train_acc)

        # l2 norm of all parameters
        square_params_sum = 0
        for param in self.net.parameters():
            square_params_sum += (param**2).sum()
        params_l2_norm = (square_params_sum**0.5).item()
        self.params_l2_norm_list.append(params_l2_norm)

        # update visdom
        if self.show_vis:
            self.vis.line(np.array(self.train_epoch_loss_list),
                          X=np.arange(len(self.train_epoch_loss_list)),
                          win='train_epoch_loss',
                          opts={'title': 'train_epoch_loss'})
            self.vis.line(np.array(self.train_epoch_acc_list),
                          X=np.arange(len(self.train_epoch_acc_list)),
                          win='train_epoch_acc',
                          opts={'title': 'train_epoch_acc'})
            self.vis.line(np.array(self.params_l2_norm_list),
                          X=np.arange(len(self.params_l2_norm_list)),
                          win='params_l2_norm',
                          opts={'title': 'params_l2_norm'})
示例#20
0
def train(epoch):
    def gradi10(module):
        module[unimportant_channels["layer1.0"]] = 0

    def gradi11(module):
        module[unimportant_channels["layer1.1"]] = 0

    def gradi12(module):
        module[unimportant_channels["layer1.2"]] = 0

    def gradi13(module):
        module[unimportant_channels["layer1.3"]] = 0

    def gradi20(module):
        module[unimportant_channels["layer2.0"]] = 0

    def gradi21(module):
        module[unimportant_channels["layer2.1"]] = 0

    def gradi22(module):
        module[unimportant_channels["layer2.2"]] = 0

    def gradi23(module):
        #print("23",module.shape)
        module[unimportant_channels["layer2.3"]] = 0

    def gradi30(module):
        #print("30",module.shape)
        module[unimportant_channels["layer3.0"]] = 0

    def gradi31(module):
        #print("31", module.shape)
        module[unimportant_channels["layer3.1"]] = 0

    def gradi32(module):
        module[unimportant_channels["layer3.2"]] = 0

    def gradi33(module):
        module[unimportant_channels["layer3.3"]] = 0

    if use_cuda:
        net.module.layer1[0].conv1.weight.register_hook(gradi10)
        net.module.layer1[0].conv1.bias.register_hook(gradi10)
        net.module.layer1[0].bn2.weight.register_hook(gradi10)
        net.module.layer1[0].bn2.bias.register_hook(gradi10)

        net.module.layer1[1].conv1.weight.register_hook(gradi11)
        net.module.layer1[1].conv1.bias.register_hook(gradi11)
        net.module.layer1[1].bn2.weight.register_hook(gradi11)
        net.module.layer1[1].bn2.bias.register_hook(gradi11)

        net.module.layer1[2].conv1.weight.register_hook(gradi12)
        net.module.layer1[2].conv1.bias.register_hook(gradi12)
        net.module.layer1[2].bn2.weight.register_hook(gradi12)
        net.module.layer1[2].bn2.bias.register_hook(gradi12)

        net.module.layer1[3].conv1.weight.register_hook(gradi13)
        net.module.layer1[3].conv1.bias.register_hook(gradi13)
        net.module.layer1[3].bn2.weight.register_hook(gradi13)
        net.module.layer1[3].bn2.bias.register_hook(gradi13)

        net.module.layer2[0].conv1.weight.register_hook(gradi20)
        net.module.layer2[0].conv1.bias.register_hook(gradi20)
        net.module.layer2[0].bn2.weight.register_hook(gradi20)
        net.module.layer2[0].bn2.bias.register_hook(gradi20)

        net.module.layer2[1].conv1.weight.register_hook(gradi21)
        net.module.layer2[1].conv1.bias.register_hook(gradi21)
        net.module.layer2[1].bn2.weight.register_hook(gradi21)
        net.module.layer2[1].bn2.bias.register_hook(gradi21)

        net.module.layer2[2].conv1.weight.register_hook(gradi22)
        net.module.layer2[2].conv1.bias.register_hook(gradi22)
        net.module.layer2[2].bn2.weight.register_hook(gradi22)
        net.module.layer2[2].bn2.bias.register_hook(gradi22)

        net.module.layer2[3].conv1.weight.register_hook(gradi23)
        net.module.layer2[3].conv1.bias.register_hook(gradi23)
        net.module.layer2[3].bn2.weight.register_hook(gradi23)
        net.module.layer2[3].bn2.bias.register_hook(gradi23)

        net.module.layer3[0].conv1.weight.register_hook(gradi30)
        net.module.layer3[0].conv1.bias.register_hook(gradi30)
        net.module.layer3[0].bn2.weight.register_hook(gradi30)
        net.module.layer3[0].bn2.bias.register_hook(gradi30)

        net.module.layer3[1].conv1.weight.register_hook(gradi31)
        net.module.layer3[1].conv1.bias.register_hook(gradi31)
        net.module.layer3[1].bn2.weight.register_hook(gradi31)
        net.module.layer3[1].bn2.bias.register_hook(gradi31)

        net.module.layer3[2].conv1.weight.register_hook(gradi32)
        net.module.layer3[2].conv1.bias.register_hook(gradi32)
        net.module.layer3[2].bn2.weight.register_hook(gradi32)
        net.module.layer3[2].bn2.bias.register_hook(gradi32)

        net.module.layer3[3].conv1.weight.register_hook(gradi33)
        net.module.layer3[3].conv1.bias.register_hook(gradi33)
        net.module.layer3[3].bn2.weight.register_hook(gradi33)
        net.module.layer3[3].bn2.bias.register_hook(gradi33)
    else:
        net.layer1[0].conv1.weight.register_hook(gradi10)
        net.layer1[0].conv1.bias.register_hook(gradi10)
        net.layer1[0].bn2.weight.register_hook(gradi10)
        net.layer1[0].bn2.bias.register_hook(gradi10)

        net.layer1[1].conv1.weight.register_hook(gradi11)
        net.layer1[1].conv1.bias.register_hook(gradi11)
        net.layer1[1].bn2.weight.register_hook(gradi11)
        net.layer1[1].bn2.bias.register_hook(gradi11)

        net.layer1[2].conv1.weight.register_hook(gradi12)
        net.layer1[2].conv1.bias.register_hook(gradi12)
        net.layer1[2].bn2.weight.register_hook(gradi12)
        net.layer1[2].bn2.bias.register_hook(gradi12)

        net.layer1[3].conv1.weight.register_hook(gradi13)
        net.layer1[3].conv1.bias.register_hook(gradi13)
        net.layer1[3].bn2.weight.register_hook(gradi13)
        net.layer1[3].bn2.bias.register_hook(gradi13)

        net.layer2[0].conv1.weight.register_hook(gradi20)
        net.layer2[0].conv1.bias.register_hook(gradi20)
        net.layer2[0].bn2.weight.register_hook(gradi20)
        net.layer2[0].bn2.bias.register_hook(gradi20)

        net.layer2[1].conv1.weight.register_hook(gradi21)
        net.layer2[1].conv1.bias.register_hook(gradi21)
        net.layer2[1].bn2.weight.register_hook(gradi21)
        net.layer2[1].bn2.bias.register_hook(gradi21)

        net.layer2[2].conv1.weight.register_hook(gradi22)
        net.layer2[2].conv1.bias.register_hook(gradi22)
        net.layer2[2].bn2.weight.register_hook(gradi22)
        net.layer2[2].bn2.bias.register_hook(gradi22)

        net.layer2[3].conv1.weight.register_hook(gradi23)
        net.layer2[3].conv1.bias.register_hook(gradi23)
        net.layer2[3].bn2.weight.register_hook(gradi23)
        net.layer2[3].bn2.bias.register_hook(gradi23)

        net.layer3[0].conv1.weight.register_hook(gradi30)
        net.layer3[0].conv1.bias.register_hook(gradi30)
        net.layer3[0].bn2.weight.register_hook(gradi30)
        net.layer3[0].bn2.bias.register_hook(gradi30)

        net.layer3[1].conv1.weight.register_hook(gradi31)
        net.layer3[1].conv1.bias.register_hook(gradi31)
        net.layer3[1].bn2.weight.register_hook(gradi31)
        net.layer3[1].bn2.bias.register_hook(gradi31)

        net.layer3[2].conv1.weight.register_hook(gradi32)
        net.layer3[2].conv1.bias.register_hook(gradi32)
        net.layer3[2].bn2.weight.register_hook(gradi32)
        net.layer3[2].bn2.bias.register_hook(gradi32)

        net.layer3[3].conv1.weight.register_hook(gradi33)
        net.layer3[3].conv1.bias.register_hook(gradi33)
        net.layer3[3].bn2.weight.register_hook(gradi33)
        net.layer3[3].bn2.bias.register_hook(gradi33)

    net.train()
    net.training = True
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          momentum=0.9,
                          weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        #print(net.layer1[0].conv1.weight)

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset) // batch_size) + 1, loss.item(),
             100. * correct / total))
        sys.stdout.flush()
def train(net, dataloader, optimizer, epoch, num_classes):
    criterion = nn.CrossEntropyLoss()
    net.train()
    hard_loss_sum = 0
    soft_loss_sum = 0
    loss_sum = 0
    correct = 0
    total = 0

    print('\n=> [%s] Training Epoch #%d, lr=%.4f' %
          (model_name, epoch, cf.learning_rate(args.lr, epoch)))
    log_file.write('\n=> [%s] Training Epoch #%d, lr=%.4f\n' %
                   (model_name, epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        # obtain soft_target by forwarding data in test mode
        if epoch >= args.distill_from and args.distill > 0:
            soft_target = F.softmax(
                torch.randn(inputs.size(0), num_classes, device=device) *
                args.rand_std,
                dim=1)

        net.train()
        optimizer.zero_grad()
        outputs = net(inputs)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        hard_loss_sum = hard_loss_sum + loss.item() * targets.size(0)

        # compute distillation loss
        if epoch >= args.distill_from and args.distill > 0:
            heat_output = outputs
            heat_soft_target = soft_target

            distill_loss = F.kl_div(F.log_softmax(heat_output, 1),
                                    F.softmax(heat_soft_target),
                                    size_average=False) / targets.size(0)
            soft_loss_sum = soft_loss_sum + distill_loss.item() * targets.size(
                0)

            distill_loss = distill_loss
            loss = loss + args.distill * distill_loss

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        loss_sum = loss_sum + loss.item() * targets.size(0)
        _, predicted = torch.max(outputs.detach(), 1)
        total += targets.size(0)
        correct += predicted.eq(targets.detach()).long().sum().item()

        if math.isnan(loss.item()):
            print('@@@@@@@nan@@@@@@@@@@@@')
            log_file.write('@@@@@@@@@@@nan @@@@@@@@@@@@@\n')
            sys.exit(0)

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\tLoss: %.4g Acc@1: %.2f%% Hard: %.4g Soft: %.4g'
            % (epoch, args.num_epochs, batch_idx + 1,
               (len(trainset) // args.bs) + 1, loss_sum / total, 100. *
               correct / total, hard_loss_sum / total, soft_loss_sum / total))
        sys.stdout.flush()
    log_file.write(
        '| Epoch [%3d/%3d] \tLoss: %.4f Acc@1: %.2f%% Hard: %.4f Soft: %.4f' %
        (epoch, args.num_epochs, loss_sum / total, 100. * correct / total,
         hard_loss_sum / total, soft_loss_sum / total))
        net = VGGNetDropConnect(num_classes, args.drop_p, args.drop_last_only,
                                args.feat_dim)
    else:
        print('Error : Network should be either [ResNet34]')
        sys.exit(0)

    net.init_weights()
    net.to(device)

    # Training
    print('\n[Phase 3] : Training model')
    print('| Training Epochs = ' + str(args.num_epochs))
    print('| Initial Learning Rate = ' + str(args.lr))

    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, 1),
                          momentum=0.9,
                          weight_decay=args.wd)

    elapsed_time = 0
    for epoch in range(1, args.num_epochs + 1):
        start_time = time.time()
        set_learning_rate(optimizer, cf.learning_rate(args.lr, epoch))
        train(net, trainloader, optimizer, epoch)
        test(net, testloader, epoch)

        epoch_time = time.time() - start_time
        elapsed_time += epoch_time
        print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))
        log_file.write('| Elapsed time : %d:%02d:%02d\n' %
                       (cf.get_hms(elapsed_time)))
示例#23
0
def train(epoch):
    net.train()

    fake = netG(fixed_noise)
    torchvision.utils.save_image(fake.data,
                                 '%s/gan_samples_epoch_%03d.png' %
                                 (args.out_folder, epoch),
                                 normalize=True)

    train_loss = 0
    entropy_loss = 0
    correct = 0
    total = 0
    temp1_accum = 0
    temp2_accum = 0
    sigmoid_sum_loss = 0
    sharing_node_loss = 0
    num_classes = args.num_classes
    # optimizer = optim.SGD(net.parameters(), lr=cf.learning_rate(args.lr, epoch), nesterov=True, momentum=0.9, weight_decay=5e-4)
    optimizer = optim.Adam(net.parameters(),
                           lr=cf.learning_rate(args.lr, epoch),
                           betas=(0.5, 0.999),
                           weight_decay=5e-4)
    if args.gan:
        # optimizerD = optim.SGD(netD.parameters(), lr=cf.learning_rate(1e-5, epoch), nesterov=True, momentum=0.9, weight_decay=5e-4)
        # optimizerG = optim.SGD(netG.parameters(), lr=cf.learning_rate(1e-3, epoch), nesterov=True, momentum=0.9, weight_decay=5e-4)
        optimizerD = optim.Adam(netD.parameters(),
                                lr=cf.learning_rate(1e-6, epoch),
                                betas=(0.5, 0.999))
        optimizerG = optim.Adam(netG.parameters(),
                                lr=cf.learning_rate(2e-5, epoch),
                                betas=(0.5, 0.999))

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        inputs, targets = Variable(inputs), Variable(targets)

        optimizer.zero_grad()
        outputs = net(inputs)

        if args.gan:
            gan_target = torch.FloatTensor(targets.size()).fill_(0)
            uniform_dist = torch.Tensor(inputs.size(0),
                                        args.num_classes).fill_(
                                            (1. / args.num_classes))
            uniform_dist = Variable(uniform_dist)

            if use_cuda:
                gan_target, uniform_dist = gan_target.cuda(
                ), uniform_dist.cuda()

            ###########################
            # (1) Update D network    #
            ###########################
            # train with real
            gan_target.fill_(real_label)
            targetv = Variable(gan_target)
            optimizerD.zero_grad()
            output = netD(inputs)
            errD_real = gan_criterion(output, targetv)
            errD_real.backward()
            D_x = output.data.mean()

            # train with fake
            noise = torch.FloatTensor(inputs.size(0), nz, 1,
                                      1).normal_(0, 1).cuda()
            if use_cuda:
                noise = noise.cuda()
            noise = Variable(noise)
            fake = netG(noise)
            targetv = Variable(gan_target.fill_(fake_label))
            output = netD(fake.detach())
            errD_fake = 1.0 * gan_criterion(output, targetv)
            errD_fake.backward()
            D_G_z1 = output.data.mean()
            errD = errD_real + errD_fake
            optimizerD.step()

            ###########################
            # (2) Update G network    #
            ###########################
            optimizerG.zero_grad()
            # Original GAN loss
            targetv = Variable(gan_target.fill_(real_label))
            output = netD(fake)
            errG = 1.0 * gan_criterion(output, targetv)
            D_G_z2 = output.data.mean()

            # minimize the true distribution
            KL_fake_output = F.log_softmax(net(fake)[:, :num_classes])
            errG_KL = F.kl_div(KL_fake_output, uniform_dist) * args.num_classes

            # targetv = Variable(gan_target.fill_(fake_label))
            # KL_fake_output = F.sigmoid(net(fake)[:,:num_classes])
            # errG_KL = criterion2(KL_fake_output, targetv)

            generator_loss = errG + 1.0 * errG_KL
            generator_loss.backward()
            optimizerG.step()

            # KL divergence
            noise = torch.FloatTensor(inputs.size(0), nz, 1,
                                      1).normal_(0, 1).cuda()
            if use_cuda:
                noise = noise.cuda()
            noise = Variable(noise)
            fake = netG(noise)
            # KL_fake_output = F.sigmoid(net(fake)[:,:num_classes])
            # KL_loss_fake = 1.0 * criterion2(KL_fake_output, )*args.num_classes
            KL_fake_output = F.log_softmax(net(fake)[:, :num_classes])
            KL_loss_fake = 1.0 * F.kl_div(KL_fake_output,
                                          uniform_dist) * args.num_classes

            background_node = outputs[:, -1]

            fake_node_bce_loss = args.fake_node_bce_beta * criterion2(
                F.sigmoid(background_node), targetv)

            # Forward Propagation

        num_sampling = num_classes
        if args.loss == 'ce' and args.sampling_rate != 1.:
            num_sampling = int(num_classes * args.sampling_rate)
            full_output = outputs
            outputs, targets = sampling_for_loss(outputs,
                                                 targets,
                                                 num_sampling,
                                                 num_classes=num_classes,
                                                 sharing=False)

        loss = criterion(outputs[:, :num_sampling], targets)  # Loss
        ce_loss = torch.zeros((1)).cuda()
        unknown_node_loss = torch.zeros((1)).cuda()
        temp1_accum = loss.detach().cpu() * (
            1. / (batch_idx + 1.)) + temp1_accum * (batch_idx /
                                                    (batch_idx + 1.))

        if args.loss == 'bce':
            loss = torch.zeros(1).cuda()
        if 'bce' in args.loss:
            # if num_classes > 30 :
            #     num_sampling = int(num_classes * args.sampling_rate)
            #     outputs, targets = sampling_for_loss(outputs, targets, num_sampling)
            #     new_bce_targets = target_transform_for_elementwise_bce(targets, num_sampling)
            #     temp2 = args.bce_scale * criterion2(F.sigmoid(outputs[:,:num_sampling]), new_bce_targets).cuda()
            # else:
            num_sampling = int(num_classes * args.sampling_rate)
            if args.sampling_rate != 1.:
                full_output = outputs
                outputs, targets = sampling_for_loss(outputs,
                                                     targets,
                                                     num_sampling,
                                                     num_classes=num_classes)
                new_bce_targets = target_transform_for_elementwise_bce(
                    targets, num_sampling).cuda()
                temp2 = args.bce_scale * criterion2(
                    F.sigmoid(outputs[:, :num_sampling]), new_bce_targets)
            else:
                if args.gan:
                    # if False:
                    bce_targets = target_transform_for_elementwise_bce(
                        targets, num_classes + 1).cuda()
                    temp2 = args.bce_scale * criterion2(
                        F.sigmoid(outputs), bce_targets).cuda()
                else:
                    bce_targets = target_transform_for_elementwise_bce(
                        targets, num_classes).cuda()
                    temp2 = args.bce_scale * criterion2(
                        F.sigmoid(outputs[:, :num_classes]),
                        bce_targets).cuda()

            temp2_accum = temp2.detach().cpu() * (
                1. / (batch_idx + 1.)) + temp2_accum * (batch_idx /
                                                        (batch_idx + 1.))

            if args.sigmoid_sum is not None:

                # sigmoid_sum = torch.sum(F.sigmoid(full_output[:,:num_classes]), dim=1)
                sigmoid_sum = torch.sum(F.sigmoid(
                    full_output[:, :num_sampling]),
                                        dim=1)
                sigmoid_sum_loss = F.mse_loss(
                    sigmoid_sum,
                    args.sigmoid_sum * torch.ones_like(sigmoid_sum))
                loss += 0.5 * sigmoid_sum_loss

            loss += temp2

        if args.gan:
            loss += (KL_loss_fake + fake_node_bce_loss)

        entropy_loss = args.ent * entropy(outputs)

        if args.sharing is not None:
            classifictaion_target = Variable(
                torch.zeros(targets.size(0)).long()).cuda()
            output_target_select = outputs[:, :num_sampling].gather(
                dim=1, index=targets.unsqueeze(1))
            output_target_sharing_concat = torch.cat(
                (output_target_select.view(
                    -1, 1), outputs[:, num_sampling].view(-1, 1)), 1)

            ce_loss = args.sharing * F.cross_entropy(
                F.softmax(output_target_sharing_concat), classifictaion_target)

            mask = torch.ones(outputs[:, :num_sampling].size()).byte().cuda()
            for i in range(mask.size(0)):
                mask[i, targets[i]] = torch.zeros(1)

            output_for_entropy_except_target_node = torch.masked_select(
                outputs[:, :num_sampling], mask)
            output_for_entropy_except_target_node = output_for_entropy_except_target_node.view(
                outputs[:, :num_sampling].size(0), -1)
            output_for_entropy_except_target_node = \
                torch.cat((output_for_entropy_except_target_node.view(targets.size(0),-1,1), outputs[:,num_sampling].view(-1,1,1).expand(targets.size(0),num_sampling-1,1)),2)
            # entropy_loss = args.ent * entropy(output_for_entropy_except_target_node)
            entropy_loss = args.ent * sharing_entropy(
                output_for_entropy_except_target_node)
            loss += ce_loss + entropy_loss

        # if args.ent != 0 and args.sharing is None:
        #     loss += entropy_loss

        train_loss += loss.item()
        max_logit, predicted = torch.max(outputs[:, :num_sampling].data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if args.unknown_is_True != False:
            # weight = 0.01 * epoch
            weight = 0.1

            select1 = (F.sigmoid(max_logit) < 0.5)
            select2 = (F.sigmoid(outputs[:, targets]) < 0.1)
            select = (select1 + select2) >= 1
            if args.sepa_unknown_sharing:
                output_gather = F.sigmoid(outputs[:, num_classes +
                                                  1].masked_select(select))
            else:
                output_gather = F.sigmoid(
                    outputs[:, num_classes].masked_select(select))
            node_target = torch.ones(output_gather.size()).cuda()
            if output_gather.size(0) > 0:

                # if weight >= 0.1 :
                #     weight = 0.1
                sharing_node_loss = weight * criterion2(
                    output_gather, node_target)
                loss += sharing_node_loss

                if args.sepa_unknown_sharing:
                    select_unknown = (select1 + select2) < 1
                    unknown_output_gather = F.sigmoid(
                        outputs[:,
                                num_classes + 1].masked_select(select_unknown))
                    if unknown_output_gather.size(0) > 0:
                        unknown_node_target = torch.zeros(
                            unknown_output_gather.size()).cuda()
                        unknown_node_loss = weight * criterion2(
                            unknown_output_gather, unknown_node_target)
                        loss += unknown_node_loss
            else:
                pass
            # for i in range(outputs.size(0)):

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        sys.stdout.write('\r')

        if args.gan:
            sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tD_x:%.2f D_G_z1:%.2f D_G_z2:%.2f BCE : %.4f Ent_los : %.4f Sha_los : %.4f node_loss : %.4f kl_fake : %.4f Acc@1: %.3f%%'
                        %(epoch, num_epochs, batch_idx+1,(len(trainset)//batch_size)+1, \
                            D_x, D_G_z1, D_G_z2,
                            temp2_accum, entropy_loss, ce_loss, fake_node_bce_loss.item(), KL_loss_fake.item(), float(100.00*float(correct)/float(total))))

        # if not args.sepa_unknown_sharing :
        #     if args.sigmoid_sum is not None :
        #         sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %.4f Ent_loss : %.4f Sha_loss : %.4f node_loss : %.4f sum_loss : %.4f Acc@1: %.3f%%'
        #                 %(epoch, num_epochs, batch_idx+1,
        #                     (len(trainset)//batch_size)+1, loss.item(), temp1_accum, temp2_accum, entropy_loss, ce_loss, sharing_node_loss, sigmoid_sum_loss, float(100.00*float(correct)/float(total))))
        #     else :
        #         sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %.4f Ent_loss : %.4f Sha_loss : %.4f node_loss : %.4f Acc@1: %.3f%%'
        #                 %(epoch, num_epochs, batch_idx+1,
        #                     (len(trainset)//batch_size)+1, loss.item(), temp1_accum, temp2_accum, entropy_loss, ce_loss, sharing_node_loss, float(100.00*float(correct)/float(total))))
        # else :
        #     sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %.4f Ent_loss : %.4f \
        #         Sha_loss : %.4f node_loss : %.4f unknown_loss : %.4f Acc@1: %.3f%%'
        #         %(epoch, num_epochs, batch_idx+1,(len(trainset)//batch_size)+1,
        #             loss.item(), temp1_accum, temp2_accum, entropy_loss, ce_loss, \
        #                 sharing_node_loss, unknown_node_loss,float(100.00*float(correct)/float(total))))

        sys.stdout.flush()
示例#24
0
def train(epoch):
    compare_grad_vs_approx = False
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(), lr=cf.learning_rate(args.lr, epoch), momentum=0.9, weight_decay=5e-4)
    if compare_grad_vs_approx == True:
        num_approx_layers = len(get_approx_layers(net))
        avg_mean = torch.zeros(num_approx_layers, len(trainloader))
        avg_mse = torch.zeros(num_approx_layers, len(trainloader))
        avg_std = torch.zeros(num_approx_layers, len(trainloader))
        max_diff = torch.zeros(num_approx_layers, len(trainloader))

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        #with torch.autograd.profiler.profile(use_cuda=True) as prof:
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
        
        if compare_grad_vs_approx == True:
            # get gradients with non-approximate calculations:
            acc_grads = []
            for layer in get_approx_layers(net):
                layer.eval() 
            optimizer.zero_grad()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)               # Forward Propagation
            loss = criterion(outputs, targets)  # Loss
            loss.backward()  # Backward Propagation
            for layer in get_approx_layers(net):
                for n,p in layer.named_parameters():
                    if ('weight' in n):
                        acc_grads.append(p.grad.clone())
            
            approx_grads = []
            net.train()
            optimizer.zero_grad()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)               # Forward Propagation
            loss = criterion(outputs, targets)  # Loss
            loss.backward()  # Backward Propagation
            for layer in get_approx_layers(net):
                for n,p in layer.named_parameters():
                    if ('weight' in n):
                        approx_grads.append(p.grad.clone())
            
            optimizer.step() # Optimizer update
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            sys.stdout.write('\n')
            sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.2f%%'
                    %(epoch, num_epochs, batch_idx+1,
                        (len(trainset)//batch_size)+1, loss.item(), 100.*correct.float()/total))
            
            #print('approx_grads:')
            #print (approx_grads)
            #print ('acc_grads') 
            #print (acc_grads) 
            #print('mean {}'.format(torch.mean(avg_mean,dim=1)))
            #print('relative MSE {}'.format(torch.mean(avg_mse,dim=1)))
            #print('std {}'.format(torch.mean(avg_std,dim=1)))
            for i, (approx_grad,acc_grad) in enumerate(zip(approx_grads,acc_grads)):
                #print('index {}'.format(i))
                #print('mean {}'.format((approx_grad-acc_grad).flatten().mean()))
                avg_mean[i,batch_idx] = (approx_grad-acc_grad).flatten().mean()
                #print('relative MSE {}'.format((approx_grad-acc_grad).norm()/acc_grad.norm()))
                avg_mse[i,batch_idx] = (approx_grad-acc_grad).norm()/acc_grad.norm()
                #print('std {}'.format((approx_grad-acc_grad).flatten().std()))
                avg_std[i,batch_idx] = (approx_grad-acc_grad).flatten().std()
                #print('max diff {}'.format((approx_grad-acc_grad).norm(p=float('inf'))))
                max_diff[i,batch_idx] = (approx_grad-acc_grad).norm(p=float('inf'))
            sys.stdout.flush()
        
        else:
            optimizer.zero_grad()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)               # Forward Propagation
            loss = criterion(outputs, targets)  # Loss
            loss.backward()  # Backward Propagation
            optimizer.step() # Optimizer update

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

            sys.stdout.write('\n')
            sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.2f%%'
                    %(epoch, num_epochs, batch_idx+1,
                        (len(trainset)//batch_size)+1, loss.item(), 100.*correct.float()/total))
            sys.stdout.flush()
            #print(batch_idx)
            #print(prof.key_averages())
            #exit()
            #print(net.module.layer1[1].conv1.weight[0][0])
    
    if compare_grad_vs_approx == True:
        torch.set_printoptions(linewidth=100000)
        print()
        print('mean {}'.format(torch.mean(avg_mean,dim=1)))
        print('relative MSE {}'.format(torch.mean(avg_mse,dim=1)))
        print('std {}'.format(torch.mean(avg_std,dim=1)))
        print('max diff {}'.format(torch.norm(max_diff,p=float('inf'),dim=1)))
        print('avg max diff {}'.format(torch.mean(max_diff,dim=1)))
        torch.set_printoptions(profile="default")
示例#25
0
    print('| Building net type [' + args.net + ']...')
    if args.net == 'vgg16':
        net = VGGNet(num_classes, args.drop_p, False, args.feat_dim, args.conv == 5)
    else:
        print('Error : Network should be either [ResNet34]')
        sys.exit(0)

    net.init_weights()
    net.to(device)

    # Training
    print('\n[Phase 3] : Training model')
    print('| Training Epochs = ' + str(args.num_epochs))
    print('| Initial Learning Rate = ' + str(args.lr))

    optimizer = optim.SGD(net.parameters(), lr=cf.learning_rate(args.lr, 1), momentum=0.9, weight_decay=args.wd)

    elapsed_time = 0
    for epoch in range(1, args.num_epochs + 1):
        start_time = time.time()
        set_learning_rate(optimizer, cf.learning_rate(args.lr, epoch))
        train(net, trainloader, optimizer, epoch)
        test(net, testloader, epoch)

        epoch_time = time.time() - start_time
        elapsed_time += epoch_time
        print('| Elapsed time : %d:%02d:%02d' %(cf.get_hms(elapsed_time)))
        log_file.write('| Elapsed time : %d:%02d:%02d\n' %(cf.get_hms(elapsed_time)))
        log_file.flush()

    print('\n[Phase 4] : Testing model')
示例#26
0
    def train(self):
        elapsed_time = 0

        for self.curr_epoch in range(1, self.num_epochs + 1):
            self.model.train()
            self.model.training = True
            self.optimizer = optim.SGD(self.model.parameters(),
                                       lr=cf.learning_rate(
                                           self.learning_rate,
                                           self.curr_epoch),
                                       momentum=0.9,
                                       weight_decay=5e-4)
            train_loss = 0
            train_correct = 0
            total = 0
            time_start = time.time()

            print('\n=> Training Epoch #%d, LR=%.4f' %
                  (self.curr_epoch,
                   cf.learning_rate(self.learning_rate, self.curr_epoch)))
            for self.curr_batch, (x, y) in enumerate(self.train_loader):
                x, y = x.to(self.device), y.to(self.device)
                # perturb data during noisy training
                if self.training_type == 'noisy':
                    x = self.adversary.perturb(x, self.device, self.variance)
                x, y = Variable(x), Variable(y)
                self.optimizer.zero_grad()
                outputs = self.model(x)
                total += y.size(0)
                loss = self.criterion(outputs, y)
                train_loss += loss
                _, pred = torch.max(outputs.data, 1)
                train_correct += pred.eq(y.data).cpu().sum()
                loss.backward()
                self.optimizer.step()

                # add training on adversarial perturbation during adv training
                if self.training_type == 'adversarial':
                    delta = self.adversary.get_adversarial_examples(
                        self.model, x, y).to(self.device)
                    x, y = x.to(self.device), y.to(self.device)
                    x, y = Variable(x), Variable(y)
                    outcome = self.model(x + delta)

                    _, pred = torch.max(outcome.data, 1)
                    train_correct += pred.eq(y.data).cpu().sum()
                    total += y.size(0)
                    loss = self.criterion(outcome, y)
                    train_loss += loss
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                sys.stdout.write('\r')
                sys.stdout.write(
                    '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                    % (self.curr_epoch, self.num_epochs, self.curr_batch,
                       (len(self.train_dataset) // self.train_batch_size) + 1,
                       train_loss.item(), 100. * train_correct / total))
                sys.stdout.flush()

            train_acc = 100. * train_correct / total

            with torch.no_grad():
                # testing
                self.model.eval()
                self.training = False
                test_loss = 0.
                test_correct = 0
                total = 0
                for self.curr_batch, (x, y) in enumerate(self.test_loader):
                    x_var, y_var = Variable(x), Variable(y)
                    x_var, y_var = x_var.to(self.device), y_var.to(self.device)
                    outcome = self.model(x_var)
                    loss = self.criterion(outcome, y_var)
                    test_loss += loss
                    _, pred = torch.max(outcome.data, 1)
                    test_correct += pred.eq(y_var.data).cpu().sum()
                    total += y_var.size(0)

                test_acc = 100. * test_correct / total
                print(
                    "\n| Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" %
                    (self.curr_epoch, test_loss.item(), test_acc))

            time_epoch = time.time() - time_start
            elapsed_time += time_epoch
            print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))
            self.write_tb(train_loss.item(), train_acc, test_loss.item(),
                          test_acc)
示例#27
0
def main():
    # enable eager execution
    global optimizer
    optimizer = tf.keras.optimizers.SGD(config.learning_rate())
    print('Loading and preparing data', end='')
    (images, labels), (t_images,
                       t_labels) = tf.keras.datasets.mnist.load_data()
    images = np.asarray(images[:config.train_n()]).astype(
        np.float32) * 2.0 / 255.0 - 1.0
    t_images = np.asarray(t_images[:config.test_n()]).astype(
        np.float32) * 2.0 / 255.0 - 1.0
    labels = np.asarray(labels[:config.train_n()])
    t_labels = np.asarray(t_labels[:config.test_n()])
    # images = np.concatenate((images, np.zeros(list(np.shape(images)[: -1]) + [2], np.float32)), axis = -1)
    # t_images = np.concatenate((t_images, np.zeros(list(np.shape(t_images)[: -1]) + [2], np.float32)), axis = -1)
    print('\rGenerating random walks for creating time-series data', end='')
    print(
        '\rData Loaded' +
        '                                                                                    '
    )
    model = network.network(config.window_size()**2 + 2, config.lstm_layers(),
                            config.fc_layers())
    positions = []
    moves = []
    for _ in range(config.no_walks()):
        temp = get_random_walk(28,
                               28,
                               max_length=config.time_steps(),
                               window_size=config.window_size())
        positions.append(temp[0])
        moves.append(temp[1])
    moves = np.asarray(moves)
    positions = np.asarray(positions)
    for epoch in range(config.epochs()):
        test_results = {'loss': 0, 'success': 0}
        train_results = {'loss': 0, 'success': 0}
        # train
        for L in tqdm(range(0,
                            len(images) - len(images) % config.batch_size(),
                            config.batch_size()),
                      desc='Epoch {}: Training'.format(epoch)):
            mini_batch_images = images[L:L + config.batch_size()]
            mini_batch_labels = labels[L:L + config.batch_size()]
            random_indices = np.random.randint(0, config.no_walks(),
                                               [config.batch_size()])
            mini_batch_moves = moves[random_indices]
            mini_batch_positions = positions[random_indices]
            train(mini_batch_images, mini_batch_moves, mini_batch_positions,
                  mini_batch_labels, model)
        # test over training set
        sys.stdout.write("\033[F")
        sys.stdout.write("\033[K")
        sys.stdout.flush()
        for L in tqdm(range(0,
                            len(images) - len(images) % config.batch_size(),
                            config.batch_size()),
                      desc='Epoch {}: Testing train_set'.format(epoch)):
            mini_batch_images = images[L:L + config.batch_size()]
            mini_batch_labels = labels[L:L + config.batch_size()]
            random_indices = np.random.randint(0, config.train_random_walks(),
                                               [config.batch_size()])
            mini_batch_moves = moves[random_indices]
            mini_batch_positions = positions[random_indices]
            # train(mini_batch_images, mini_batch_moves, mini_batch_positions, mini_batch_labels, model)
            training_set_results = forward_prop(mini_batch_images,
                                                mini_batch_moves,
                                                mini_batch_positions,
                                                mini_batch_labels, model)
            for key in train_results:
                train_results[key] += training_set_results[key]
        train_results['success'] = (train_results['success'] * 100.0 /
                                    len(images)).numpy().round(2)
        train_results['loss'] = (
            train_results['loss'] /
            (len(images) // config.batch_size())).numpy().round(6)
        # test over test set
        sys.stdout.write("\033[F")
        sys.stdout.write("\033[K")
        sys.stdout.flush()
        for L in tqdm(range(
                0,
                len(t_images) - len(t_images) % config.batch_size(),
                config.batch_size()),
                      desc='Epoch {}: Testing test_set'.format(epoch)):
            mini_batch_images = t_images[L:L + config.batch_size()]
            mini_batch_labels = t_labels[L:L + config.batch_size()]
            random_indices = np.random.randint(config.train_random_walks(),
                                               config.no_walks(),
                                               [config.batch_size()])
            mini_batch_moves = moves[random_indices]
            mini_batch_positions = positions[random_indices]
            # train(mini_batch_images, mini_batch_moves, mini_batch_positions, mini_batch_labels, model)
            test_set_results = forward_prop(mini_batch_images,
                                            mini_batch_moves,
                                            mini_batch_positions,
                                            mini_batch_labels, model)
            for key in test_results:
                test_results[key] += test_set_results[key]
        test_results['success'] = (test_results['success'] * 100.0 /
                                   len(t_images)).numpy().round(2)
        test_results['loss'] = (
            test_results['loss'] /
            (len(t_images) // config.batch_size())).numpy().round(6)
        sys.stdout.write("\033[F")
        sys.stdout.write("\033[K")
        print('Epoch {:5d}: '.format(epoch), train_results, test_results)
        save_weights(model.trainable_variables, epoch, test_results['success'],
                     './weights')
示例#28
0
def train(epoch):
    global quan_cor
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=5e-4)
    if epoch > 100:
        optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * 0.1
    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs_value, targets) in enumerate(trainloader):
        if use_cuda:
            inputs_value, targets = inputs_value.cuda(), targets.cuda(
            )  # GPU settings
        optimizer.zero_grad()
        inputs_value, targets = Variable(inputs_value), Variable(targets)
        outputs = net.forward(inputs_value)  # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        '''lenet
        #layer1
        flag = net.index1
        grad_value=torch.zeros((M,)).cuda()
        for m in range(M):
            inx=torch.zeros(flag.size()).cuda()
            inx = torch.where((flag !=(m+1)),inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.conv1.weight.grad*inx).data.clamp_(-1,1)
            grad_weight_ste = torch.where((abs(grad_weight_ste)!=1), grad_weight_ste, torch.Tensor([0]).cuda()) 
            grad_value[m]= torch.sum(grad_weight_ste)
        net.levels1.grad=grad_value.cuda()
        #layer2
        flag = net.index2
        grad_value=torch.zeros((M,)).cuda()
        for m in range(M):
            inx=torch.zeros(flag.size()).cuda()
            inx = torch.where((flag !=(m+1)),inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.conv2.weight.grad*inx).data.clamp_(-1,1)
            grad_weight_ste = torch.where((abs(grad_weight_ste)!=1), grad_weight_ste, torch.Tensor([0]).cuda()) 
            grad_value[m]= torch.sum(grad_weight_ste)
        net.levels2.grad=grad_value.cuda()
        #layer 3
        flag = net.index3
        grad_value=torch.zeros((M,)).cuda()
        for m in range(M):
            inx=torch.zeros(flag.size()).cuda()
            inx = torch.where((flag !=(m+1)),inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.fc1.weight.grad*inx).data.clamp_(-1,1)
            grad_weight_ste = torch.where((abs(grad_weight_ste)!=1), grad_weight_ste, torch.Tensor([0]).cuda()) 
            grad_value[m]= torch.sum(grad_weight_ste)
        net.levels3.grad=grad_value.cuda()
        #layer 4
        flag = net.index4
        grad_value=torch.zeros((M,)).cuda()
        for m in range(M):
            inx=torch.zeros(flag.size()).cuda()
            inx = torch.where((flag !=(m+1)),inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.fc2.weight.grad*inx).data.clamp_(-1,1)
            grad_weight_ste = torch.where((abs(grad_weight_ste)!=1), grad_weight_ste, torch.Tensor([0]).cuda()) 
            grad_value[m]= torch.sum(grad_weight_ste)
        net.levels4.grad=grad_value.cuda()
        # layer5
        flag = net.index5
        grad_value=torch.zeros((M,)).cuda()
        for m in range(M):
            inx=torch.zeros(flag.size()).cuda()
            inx = torch.where((flag !=(m+1)),inx, torch.Tensor([1]).cuda())
            
            grad_weight_ste = (net.fc3.weight.grad*inx).data.clamp_(-1,1)
        #     print(grad_weight_ste[0,:])
            grad_weight_ste = torch.where((abs(grad_weight_ste)!=1), grad_weight_ste, torch.Tensor([0]).cuda()) 
            grad_value[m]= torch.sum(grad_weight_ste)
        net.levels5.grad=grad_value.cuda()
        '''
        #layer1
        flag = net.index1
        grad_value = torch.zeros((M, )).cuda()
        for m in range(M):
            inx = torch.zeros(flag.size()).cuda()
            inx = torch.where((flag != (m + 1)), inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.fc1.weight.grad * inx).data.clamp_(-1, 1)
            grad_weight_ste = torch.where((abs(grad_weight_ste) != 1),
                                          grad_weight_ste,
                                          torch.Tensor([0]).cuda())
            grad_value[m] = torch.sum(grad_weight_ste)
        net.levels1.grad = grad_value.cuda()
        #layer2
        flag = net.index2
        grad_value = torch.zeros((M, )).cuda()
        for m in range(M):
            inx = torch.zeros(flag.size()).cuda()
            inx = torch.where((flag != (m + 1)), inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.fc2.weight.grad * inx).data.clamp_(-1, 1)
            grad_weight_ste = torch.where((abs(grad_weight_ste) != 1),
                                          grad_weight_ste,
                                          torch.Tensor([0]).cuda())
            grad_value[m] = torch.sum(grad_weight_ste)
        net.levels2.grad = grad_value.cuda()
        #layer 3
        flag = net.index3
        grad_value = torch.zeros((M, )).cuda()
        for m in range(M):
            inx = torch.zeros(flag.size()).cuda()
            inx = torch.where((flag != (m + 1)), inx, torch.Tensor([1]).cuda())
            grad_weight_ste = (net.fc3.weight.grad * inx).data.clamp_(-1, 1)
            grad_weight_ste = torch.where((abs(grad_weight_ste) != 1),
                                          grad_weight_ste,
                                          torch.Tensor([0]).cuda())
            grad_value[m] = torch.sum(grad_weight_ste)
        net.levels3.grad = grad_value.cuda()
        #         for p in list(net.parameters()):
        #             if hasattr(p,'org'):
        #                 p.data.copy_(p.org)
        #         net.fc1.weight.requires_grad = False
        #         net.fc2.weight.requires_grad = False
        #         net.fc3.weight.requires_grad = False
        optimizer.step()  # Optimizer update

        sort, _ = torch.sort(net.levels1.data)
        net.levels1.data = sort.cuda()
        lev = torch.sum(sort) * 0.5
        net.partitions1.data[0] = lev

        sort, _ = torch.sort(net.levels2.data)
        net.levels2.data = sort.cuda()
        lev = torch.sum(sort) * 0.5
        net.partitions2.data[0] = lev

        sort, _ = torch.sort(net.levels3.data)
        net.levels3.data = sort.cuda()
        lev = torch.sum(sort) * 0.5
        net.partitions3.data[0] = lev

        #         sort, _ = torch.sort(net.levels4.data)
        #         net.levels4.data = sort.cuda()
        #         sort, _ = torch.sort(net.levels5.data)
        #         net.levels5.data = sort.cuda()

        train_loss += loss.data
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset) // batch_size) + 1, loss.data,
             100. * correct / total))
        sys.stdout.flush()
    acc = correct
    diagnostics_to_write = {
        'Epoch': epoch,
        'Loss': loss.data,
        'Accuracy': 100 * correct / total
    }
    with open(logfile, 'a') as lf:
        lf.write(str(diagnostics_to_write))
    if correct >= quan_cor:
        quan_cor = correct
        print('Top1:', quan_cor, '%')
        np.savez('nonbayes_param' + str(M) + '.npz',
                 par0=net.partitions1.data.cpu(),
                 lev0=net.levels1.data.cpu(),
                 par1=net.partitions2.data.cpu(),
                 lev1=net.levels2.data.cpu(),
                 par2=net.partitions3.data.cpu(),
                 lev2=net.levels3.data.cpu(),
                 par3=net.partitions4.data.cpu(),
                 lev3=net.levels4.data.cpu(),
                 par4=net.partitions5.data.cpu(),
                 lev4=net.levels5.data.cpu())
        state = {
            'net': net if use_cuda else net,
            'correct': correct,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        save_point = './checkpoint/nonBayes_quan' + args.dataset + os.sep
        if not os.path.isdir(save_point):
            os.mkdir(save_point)
        torch.save(state, save_point + file_name + str(M) + '.t7')
    print('\n')
    print('layer1\t', net.levels1.data)
    print('layer2\t', net.levels2.data)
    print('layer3\t', net.levels3.data)
    print('layer4\t', net.levels4.data)
    print('layer5\t', net.levels5.data)
示例#29
0
def train(epoch):
    net.train()
    train_loss = 0
    entropy_loss = 0
    correct = 0
    total = 0
    temp1_accum = 0
    temp2_accum = 0
    sigmoid_sum_loss = 0
    sharing_node_loss = 0
    ce_loss = torch.zeros((1)).cuda()
    entropy_loss = torch.zeros((1)).cuda()
    num_classes = args.num_classes
    optimizer = optim.SGD(net.parameters(),
                          lr=cf.learning_rate(args.lr, epoch),
                          nesterov=True,
                          momentum=0.9,
                          weight_decay=5e-4)
    # optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(args.lr, epoch), betas=(0.5,0.999), weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()  # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # Forward Propagation

        num_sampling = num_classes
        if args.loss == 'ce' and args.sampling_rate != 1.:
            num_sampling = int(num_classes * args.sampling_rate)
            full_output = outputs
            outputs, targets = sampling_for_loss(outputs,
                                                 targets,
                                                 num_sampling,
                                                 num_classes=num_classes,
                                                 sharing=False)

        loss = criterion_CE(outputs[:, :num_sampling], targets)  # Loss
        if args.loss == 'ce' and args.ce_entropy != 0.:
            entropy_loss = args.ce_entropy * entropy(outputs)
            loss += entropy_loss

        unknown_node_loss = torch.zeros((1)).cuda()
        temp1_accum = loss.detach().cpu() * (
            1. / (batch_idx + 1.)) + temp1_accum * (batch_idx /
                                                    (batch_idx + 1.))

        if args.loss == 'bce':
            loss = torch.zeros(1).cuda()
        if 'bce' in args.loss:
            # if num_classes > 30 :
            #     num_sampling = int(num_classes * args.sampling_rate)
            #     outputs, targets = sampling_for_loss(outputs, targets, num_sampling)
            #     new_bce_targets = target_transform_for_elementwise_bce(targets, num_sampling)
            #     temp2 = args.bce_scale * criterion_BCE(F.sigmoid(outputs[:,:num_sampling]), new_bce_targets).cuda()
            # else:
            num_sampling = int(num_classes * args.sampling_rate)
            if args.sampling_rate != 1.:
                full_output = outputs
                outputs, targets = sampling_for_loss(outputs,
                                                     targets,
                                                     num_sampling,
                                                     num_classes=num_classes)
                new_bce_targets = target_transform_for_elementwise_bce(
                    targets, num_sampling).cuda()
                temp2 = args.bce_scale * criterion_BCE(
                    F.sigmoid(outputs[:, :num_sampling]), new_bce_targets)
            else:
                bce_targets = target_transform_for_elementwise_bce(
                    targets, num_classes).cuda()
                temp2 = args.bce_scale * criterion_BCE(
                    F.sigmoid(outputs[:, :num_classes]), bce_targets).cuda()
            temp2_accum = temp2.detach().cpu() * (
                1. / (batch_idx + 1.)) + temp2_accum * (batch_idx /
                                                        (batch_idx + 1.))

            if args.sigmoid_sum is not None:

                # sigmoid_sum = torch.sum(F.sigmoid(full_output[:,:num_classes]), dim=1)
                sigmoid_sum = torch.sum(F.sigmoid(
                    full_output[:, :num_sampling]),
                                        dim=1)
                sigmoid_sum_loss = F.mse_loss(
                    sigmoid_sum,
                    args.sigmoid_sum * torch.ones_like(sigmoid_sum))
                loss += 0.5 * sigmoid_sum_loss

            loss += temp2

        if args.sharing is not None:
            classifictaion_target = Variable(
                torch.zeros(targets.size(0)).long()).cuda()
            output_target_select = outputs[:, :num_sampling].gather(
                dim=1, index=targets.unsqueeze(1))
            output_target_sharing_concat = torch.cat(
                (output_target_select.view(
                    -1, 1), outputs[:, num_sampling].view(-1, 1)), 1)

            ce_loss = args.sharing * F.cross_entropy(
                F.softmax(output_target_sharing_concat), classifictaion_target)

            mask = torch.ones(outputs[:, :num_sampling].size()).byte().cuda()
            for i in range(mask.size(0)):
                mask[i, targets[i]] = torch.zeros(1)

            output_for_entropy_except_target_node = torch.masked_select(
                outputs[:, :num_sampling], mask)
            output_for_entropy_except_target_node = output_for_entropy_except_target_node.view(
                outputs[:, :num_sampling].size(0), -1)
            output_for_entropy_except_target_node = \
                torch.cat((output_for_entropy_except_target_node.view(targets.size(0),-1,1), outputs[:,num_sampling].view(-1,1,1).expand(targets.size(0),num_sampling-1,1)),2)
            # entropy_loss = args.ent * entropy(output_for_entropy_except_target_node)
            entropy_loss = args.ent * sharing_entropy(
                output_for_entropy_except_target_node)
            loss += ce_loss + entropy_loss

        # if args.ent != 0 and args.sharing is None:
        #     loss += entropy_loss

        train_loss += loss.item()
        max_logit, predicted = torch.max(outputs[:, :num_sampling].data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if args.unknown_is_True:
            weight = 0.01 * epoch
            # weight = 0.1
            if weight >= 0.1:
                weight = 0.1

            select1 = (F.sigmoid(max_logit) < 0.5)
            select2 = (F.sigmoid(outputs[:, targets]) < 0.5)
            select = (select1 + select2) >= 1
            if args.sepa_unknown_sharing:
                output_gather = F.sigmoid(outputs[:, num_classes +
                                                  1].masked_select(select))
            else:
                output_gather = F.sigmoid(
                    outputs[:, num_classes].masked_select(select))
            node_target = torch.ones(output_gather.size()).cuda()
            if output_gather.size(0) > 0:

                # if weight >= 0.1 :
                #     weight = 0.1
                sharing_node_loss = weight * criterion_BCE(
                    output_gather, node_target)
                loss += sharing_node_loss

                if args.sepa_unknown_sharing:
                    select_unknown = (select1 + select2) < 1
                    unknown_output_gather = F.sigmoid(
                        outputs[:,
                                num_classes + 1].masked_select(select_unknown))
                    if unknown_output_gather.size(0) > 0:
                        unknown_node_target = torch.zeros(
                            unknown_output_gather.size()).cuda()
                        unknown_node_loss = weight * criterion_BCE(
                            unknown_output_gather, unknown_node_target)
                        loss += unknown_node_loss
            else:
                pass
            # for i in range(outputs.size(0)):

        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update

        sys.stdout.write('\r')
        if not args.sepa_unknown_sharing:
            if args.sigmoid_sum is not None:
                sys.stdout.write(
                    '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %.4f Ent_loss : %.4f Sha_loss : %.4f node_loss : %.4f sum_loss : %.4f Acc@1: %.3f%%'
                    % (epoch, num_epochs, batch_idx + 1,
                       (len(trainset) // batch_size) + 1, loss.item(),
                       temp1_accum, temp2_accum, entropy_loss, ce_loss,
                       sharing_node_loss, sigmoid_sum_loss,
                       float(100.00 * float(correct) / float(total))))
            else:
                sys.stdout.write(
                    '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %.4f Ent_loss : %.4f Sha_loss : %.4f node_loss : %.4f Acc@1: %.3f%%'
                    % (epoch, num_epochs, batch_idx + 1,
                       (len(trainset) // batch_size) + 1, loss.item(),
                       temp1_accum, temp2_accum, entropy_loss, ce_loss,
                       sharing_node_loss,
                       float(100.00 * float(correct) / float(total))))
        else:
            sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f CE_loss : %.4f BCE_loss : %.4f Ent_loss : %.4f \
                Sha_loss : %.4f node_loss : %.4f unknown_loss : %.4f Acc@1: %.3f%%'
                %(epoch, num_epochs, batch_idx+1,(len(trainset)//batch_size)+1,
                    loss.item(), temp1_accum, temp2_accum, entropy_loss, ce_loss, \
                        sharing_node_loss, unknown_node_loss,float(100.00*float(correct)/float(total))))

        sys.stdout.flush()