Пример #1
0
def train(epoch):
    global acc, acc_train
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    if epoch > lr_dec_start and lr_dec_start >= 0:
        frac = (epoch - lr_dec_start) // lr_dec_every
        dec_frac = lr_dec_rate ** frac #0.9 ** e/5
        curr_lr = lr * dec_frac         #0.9 ** e/5
        utils.set_lr(optimizer, curr_lr)
    else:
        curr_lr = lr
    # print(1)
    for batch_idx, (inputs, targets) in enumerate(tr_loader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += float(loss.item())
        _, pred = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += pred.eq(targets.data).cpu().sum()
        # print(2)
        if epoch == total_epoch - 1:
            class_mat[0] = calcl_mat(pred.cpu().numpy(), targets.data.cpu().numpy(), class_mat[0])
    print("training... batch:{}   loss={}    acc={}%({}/{})".format(batch_idx, train_loss/(batch_idx+1), 100.*correct/total,correct,total))
    acc_train = 100.*correct / total
    hists[0].append(train_loss/(batch_idx+1))
    hists[1].append(acc_train)
Пример #2
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate ** frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        
        if opt.mixup:
            inputs, targets_a, targets_b, lam = utils.mixup_data(inputs, targets, 0.6, True)
            inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))
        else:
            inputs, targets = Variable(inputs), Variable(targets)
        
        outputs = net(inputs)
        
        if opt.mixup:
            loss = utils.mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        else:
            loss = criterion(outputs, targets)
        
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()
        
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        
        if opt.mixup:
            correct += (lam * predicted.eq(targets_a.data).cpu().sum().float()
                    + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())
        else:
            correct += predicted.eq(targets.data).cpu().sum()
       
        utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (train_loss/(batch_idx+1), 100.*float(correct)/float(total), correct, total))

    Train_acc = 100.*float(correct)/float(total)
    
    return train_loss/(batch_idx+1), Train_acc
Пример #3
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate ** frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        bs, c, h, w = np.shape(inputs)
        inputs = torch.Tensor(inputs)
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        conTensor1 = net1.features(inputs).view(bs, -1)
        conTensor2 = layers(inputs, net2).view(bs, -1)
        if use_cuda:
            conTensor1, conTensor2 = conTensor1.cuda(), conTensor2.cuda()
        resTensor = torch.cat((conTensor1, conTensor2), 1)
        #print(resTensor.shape)

        resTensor = Variable(resTensor)
        outputs = netClassifier(resTensor)
        #outputs_avg = outputs.view(bs, ncrops, -1).mean(1)
        #print(outputs.shape,"train output")
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        '''
        utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
        '''

    Train_acc = float(100. * correct) / float(total)
    #if ((epoch + 1) % 10 == 0):
    Ta = float(Train_acc)
    temp_train_acc.append(Ta)
    temp_train_loss.append(train_loss)
Пример #4
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    print("Start Training!")
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    # 学习率下降
    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    # 迭代,迭代的次数 = 28708(训练集大小)/batch_size = batch数,计算loss和accuracy
    # batch_idx计迭代数
    iter_num = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()

        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)  # 正向传播
        loss = criterion(outputs, targets)
        loss.backward()  # 反向传播
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        # train_loss += loss.data[0]
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)  # 每次加上batch_size个数
        correct += predicted.eq(targets.data).cpu().sum()

        Train_acc = int(correct) / int(
            total)  # 也就是最后一次迭代的准确率,代表一个epoch的准确率(完整的数据集通过了神经网络)

        # 输出loss和训练集准确率
        # utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))

        iter_num += 1

    train_loss = train_loss / iter_num
    train_loss_list.append(train_loss)
    train_acc_list.append(Train_acc)
    print("Train Accuracy:", Train_acc * 100, "%")
    print("Train Loss:", train_loss)
Пример #5
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    global lr1
    net.train()
    train_loss = 0
    f_loss = 0.0
    correct = 0
    total = 0
    current_lr1 = 0.0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        decay_factor1 = 0.95**frac
        current_lr = opt.lr * decay_factor
        current_lr1 = lr1 * decay_factor1
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
        utils.set_lr(optimzer4center, current_lr1)
    else:
        current_lr = opt.lr
        current_lr1 = lr1
    print('learning_rate: %s' % str(current_lr))
    print('learning_rate1: %s' % str(current_lr1))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        optimzer4center.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        ip1, outputs = net(inputs)
        loss = nllloss(outputs,
                       targets) + loss_weight * centerloss(targets, ip1)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        optimzer4center.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        f_loss = float(train_loss) / float(batch_idx + 1)
        utils.progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (batch_idx + 1),
             100.0 * float(correct) / float(total), correct, total))

    Train_acc = 100.0 * float(correct) / float(total)
    training_loss.append(f_loss)
    training_acc.append(Train_acc)
Пример #6
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES))

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()

        conf_mat += utils.confusion_matrix(outputs, targets, NUM_CLASSES)

        acc = sum([conf_mat[i, i]
                   for i in range(conf_mat.shape[0])]) / conf_mat.sum()
        uacc_per_class = [
            conf_mat[i, i] / conf_mat[i].sum()
            for i in range(conf_mat.shape[0])
        ]
        unweighted_acc = sum(uacc_per_class) / len(uacc_per_class)

        prec_per_class = [
            conf_mat[i, i] / conf_mat[:, i].sum()
            for i in range(conf_mat.shape[0])
        ]
        average_precision = sum(prec_per_class) / len(prec_per_class)

        utils.progress_bar(
            batch_idx, len(trainloader),
            'Loss: %.3f | Acc: %.3f%% | unweighted_Acc: %.3f%%' %
            (train_loss / (batch_idx + 1), 100. * acc, 100. * unweighted_acc))

    Train_acc = 100. * acc
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # *********************process the weights including binarization*********************
        #print("!!!bin!!!\r\n")
        bin_op.binarization()

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        #***********************************************
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        #utils.clip_gradient(optimizer, 0.1)

        # ****************restore weights*****************
        bin_op.restore()
        bin_op.updateBinaryGradWeight()

        if opt.sr:
            updateBN()

        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        utils.progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * float(correct) / total, correct, total))

    Train_acc = 100. * float(correct) / total
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        bs, c, h, w = np.shape(inputs)
        spl = int(bs / 2)
        input1 = inputs[0:spl]
        label1 = targets[0:spl]
        input2 = inputs[spl:bs]
        label2 = targets[spl:bs]
        input1 = perturb(input1, label1, 0.2, net)
        input1 = torch.Tensor(input1)
        inputs = np.vstack((input1, input2))
        inputs = torch.Tensor(inputs)
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        '''
        utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
        '''

    Train_acc = 100. * correct / total
    if ((epoch + 1) % 10 == 0):
        Ta = float(Train_acc)
        temp_train_acc.append(Ta)
        temp_train_loss.append(train_loss)
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        ori = inputs

        # generate adversarial samples
        with ctx_noparamgrad_and_eval(net):
            inputs = adversary.perturb(inputs, targets)

        # concatenate with clean samples
        inputs = torch.cat((ori, inputs), 0)
        targets = torch.cat((targets, targets), 0)

        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        utils.progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

    Train_acc = 100. * correct / total
    Train_loss = train_loss / len(trainloader.dataset)
    return Train_acc, Train_loss
Пример #10
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    err0 = 0.005  # for FGSM

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(1), targets.cuda(1)
        optimizer.zero_grad()
        inputs, targets = Variable(inputs,
                                   requires_grad=True), Variable(targets)
        outputs = net(inputs)
        loss = alpha * criterion(outputs, targets)
        loss.backward()
        train_loss += loss.data.item()
        del loss
        inputs_prime = inputs + err0 * torch.sign(inputs.grad)
        del inputs
        outputs_prime = net(inputs_prime)
        loss2 = (1 - alpha) * criterion(outputs_prime, targets)
        loss2.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss2.data.item()
        del loss2
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum().numpy()

    utils.progress_bar(
        len(trainloader), len(trainloader),
        'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
        (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))

    Train_acc = 100. * correct / total
def train(epoch):
    if detail:
        print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    if detail:
        print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()

        # train_loss += loss.data[0]
        train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if detail:
            utils.progress_bar(
                batch_idx, len(trainloader),
                'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (train_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))

    Train_acc = 100. * correct / total
Пример #12
0
def train(epoch, start_time):
    t = datetime.datetime.now()
    t_show = datetime.datetime.strftime(t, '%Y-%m-%d %H:%M:%S')
    print('Epoch: %d\t%s\t%s' %
          (epoch, t_show, utils.cal_run_time(start_time)))
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %.6f' % current_lr)

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()

        train_loss += loss.data
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        utils.progress_bar(
            batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100 * float(correct) / total, correct, total))

    Train_acc = 100 * float(correct) / total
Пример #13
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc ##Global variable to store training accuracy.
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    ## learning rate decay tells us how fast we decrease our advancements in learning from our mistakes. As noted previously, it is ideal that the algorithm does not too stay suspicious for too long, as it
    ## might result in drastic changes which actually worsen the accuracy! As the algorithm becomes more advanced and knowledgable, it should trust its decisions more.
    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate ** frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader): ##A 'batch' is the amount of pictures that is handled each epoch.
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad() ##Don't worry about optimizers for now...
        inputs, targets = Variable(inputs), Variable(targets) ##Grab the inputs and labels (targets)...
        outputs = net(inputs) ##Based on the input, determine the output of the algorithm...
        loss = criterion(outputs, targets) ## Calculate the 'loss' (how far are we off) based on the difference between the input and the output...
        loss.backward() ##Perform backwards propegation: Based on the loss, it can be determined what should be changed in the weights and biases of the algorithm.
        utils.clip_gradient(optimizer, 0.1) ##Calculate the 'gradient': This explains in which 'direction' the weights should be changed to minimize loss (according to the calculations from the current point)
        ##                                    Moreover, it contains information about how large the effect of a small change in the individual weights and biases would presumably be on the loss.
        optimizer.step() ##Based on the gradient and using the optimizer algorithm, a 'step' is taken. This implies that all weights and biases are changed in such a way as to minimize the loss based on the gradient.
        ##                 The gradient only tells you something about the direction and magnitude of change IN A POINT --> this step is based on extrapolation. You might be able to see that the step size
        ##                 (which is dependant on the learning rate and) therefore tells us something about how much we dare to extrapolate in a certain point in the process, and why we lower this step size
        ##                 with increasing amounts of epochs.
        train_loss += loss.data[0] ##Add the loss value of this run to the total loss during training
        _, predicted = torch.max(outputs.data, 1) ##??
        total += targets.size(0) ##??
        correct += predicted.eq(targets.data).cpu().sum() ##??

        utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' ##Show a progression bar that also indicated the current loss and average accuracy of the system.
            % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))

    Train_acc = 100.*correct/total
Пример #14
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate ** frac
        current_lr = opt.lr * decay_factor ##指数衰减。例如:随着迭代轮数的增加学习率自动发生衰减
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()  #反向传播
        utils.clip_gradient(optimizer, 0.1) #梯度截断
        optimizer.step() #用SGD更新参数
        train_loss += np.array(loss.data)
        _, predicted = torch.max(outputs.data, 1) #返回每一行最大值的元素,并返回索引
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        print('Epoch: %d | Batch_index: %d | Loss: %.3f | Acc: %.3f%% (%d/%d) '
              %(epoch,batch_idx,train_loss/(batch_idx+1), 100.*correct/total, correct, total))

        # utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
        #每个批数据上的损失函数,训练集上一共有(28709/128)个批
    Train_acc = 100.*correct/total
Пример #15
0
def train(self):

    print('\nEpoch: %d' % epoch)
    if epoch > 80:
        frac = (epoch - 80) // 5
        decay_factor = 0.9**frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr

    print('learning_rate: %s' % str(current_lr))
    train_loss = 0
    total_loss = 0
    total = 0
    correct = 0
    for train_x, train_y in train_data:
        train_x, train_y = train_x.cuda(), train_y.cuda()
        train_x, train_y = Variable(train_x), Variable(train_y)
        optimizer.zero_grad()

        output = model(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()

        train_loss += loss.data[0]

        _, predicted = torch.max(output.data, 1)

        total += train_y.size(0)
        correct += predicted.eq(train_y.data).cpu().sum().item()

    train_acc = 100 * correct / total
    print(train_acc)
    print("Train_acc : %0.3f" % train_acc)
    if int(train_acc) == 2000:
        torch.save(model.state_dict(), './raf_train.t7')
Пример #16
0
    print('Start Triplet Similarity Training.')
    errors_real_D = []
    errors_fake_D = []
    errors_D = []
    errors_G = []
    losses_triplet = []
    loss_record = 0.
    best_loss = 10000.
    margin_change_count = 0
    gen_iter = 0
    bat_start = 0
    for it in range(n_iters):
        print('\rTriplet iter %05d' % (it + 1), end='')
        if it in triplet_lrs.keys():
            set_lr(triplet_lrs[it], optimizer)
        if it in triplet_margins.keys():
            margin = triplet_margins[it]
        ''' Triplet Similarity Mearsuring '''
        anchor, positive, negative = grab_triplet_batch(
            X, Y, person_cam_index, datareader.num_class, batch_size)

        x = Variable(torch.from_numpy(anchor.astype(float)).float().cuda())
        anc_feat = model.extract(x)

        x = Variable(torch.from_numpy(positive.astype(float)).float().cuda())
        pos_feat = model.extract(x)

        x = Variable(torch.from_numpy(negative.astype(float)).float().cuda())
        neg_feat = model.extract(x)
Пример #17
0
        weight_saver.restore()
    # Progress bar
    tpbar = tqdm(unit="batches", ncols=100, total=args.num_iterations)
    interval_cost = 0.0
    # Declare lists for logging metrics
    if args.logfile is not None:
        train_result = []
        test_result = []
        err_result = []
    # Iterating over the training set
    for step in range(args.num_iterations):
        feed_dict = fill_feed_dict(input_or_ph_ops=input_ops_train,
                                   dataset=train_set,
                                   learning_rate_ph=lr_ph,
                                   learning_rate_val=set_lr(
                                       base_lr, step, learning_schedule,
                                       gamma),
                                   step=step)
        # Mean batch cost
        output = train_function(feed_dict=feed_dict)

        # Update progress bar
        tpbar.update(1)
        tpbar.set_description("Training {:0.4f}".format(output[()]))
        interval_cost += output[()]
        # Every epoch print test set metrics
        if (step + 1) % args.iter_interval == 0 and step > 0:
            # Call loop_eval to calculate metric over test set
            eval_losses = loop_eval(valid_set, input_ops_valid, metric_names,
                                    eval_function, en_top5)
            tqdm.write(
Пример #18
0
    def run(self, total_steps):
        """ Runs PPO

        Args:
            total_steps (int): total number of environment steps to run for
        """
        N = self.num_workers
        T = self.worker_steps
        E = self.opt_epochs
        A = self.venv.action_space.n

        while self.taken_steps < total_steps:
            progress = self.taken_steps / total_steps

            obs, rewards, masks, actions, steps = self.interact()
            ob_shape = obs.size()[2:]

            ep_reward = self.test()
            self.reward_histr.append(ep_reward)
            self.steps_histr.append(self.taken_steps)

            # statistic logic
            group_size = len(self.steps_histr) // self.plot_points
            if self.plot_reward and len(self.steps_histr) % (
                    self.plot_points * 10) == 0 and group_size >= 10:
                x_means, _, y_means, y_stds = \
                    mean_std_groups(np.array(self.steps_histr), np.array(self.reward_histr), group_size)
                fig = plt.figure()
                fig.set_size_inches(8, 6)
                plt.ticklabel_format(axis='x', style='sci', scilimits=(-2, 6))
                plt.errorbar(x_means,
                             y_means,
                             yerr=y_stds,
                             ecolor='xkcd:blue',
                             fmt='xkcd:black',
                             capsize=5,
                             elinewidth=1.5,
                             mew=1.5,
                             linewidth=1.5)
                plt.title('Training progress')
                plt.xlabel('Total steps')
                plt.ylabel('Episode reward')
                plt.savefig(self.plot_path, dpi=200)
                plt.clf()
                plt.close()
                plot_timer = 0

            # TEMP upgrade to support recurrence

            # compute advantages, returns with GAE
            obs_ = obs.view(((T + 1) * N, ) + ob_shape)
            obs_ = Variable(obs_)
            _, values = self.policy(obs_)
            values = values.view(T + 1, N, 1)
            advantages, returns = gae(rewards, masks, values, self.gamma,
                                      self.lambd)

            self.policy_old.load_state_dict(self.policy.state_dict())
            for e in range(E):
                self.policy.zero_grad()

                MB = steps // self.minibatch_steps

                b_obs = Variable(obs[:T].view((steps, ) + ob_shape))
                b_rewards = Variable(rewards.view(steps, 1))
                b_masks = Variable(masks.view(steps, 1))
                b_actions = Variable(actions.view(steps, 1))
                b_advantages = Variable(advantages.view(steps, 1))
                b_returns = Variable(returns.view(steps, 1))

                b_inds = np.arange(steps)
                np.random.shuffle(b_inds)

                for start in range(0, steps, self.minibatch_steps):
                    mb_inds = b_inds[start:start + self.minibatch_steps]
                    mb_inds = cuda_if(
                        torch.from_numpy(mb_inds).long(), self.cuda)
                    mb_obs, mb_rewards, mb_masks, mb_actions, mb_advantages, mb_returns = \
                        [arr[mb_inds] for arr in [b_obs, b_rewards, b_masks, b_actions, b_advantages, b_returns]]

                    mb_pis, mb_vs = self.policy(mb_obs)
                    mb_pi_olds, mb_v_olds = self.policy_old(mb_obs)
                    mb_pi_olds, mb_v_olds = mb_pi_olds.detach(
                    ), mb_v_olds.detach()

                    losses = self.objective(self.clip_func(progress), mb_pis,
                                            mb_vs, mb_pi_olds, mb_v_olds,
                                            mb_actions, mb_advantages,
                                            mb_returns)
                    policy_loss, value_loss, entropy_loss = losses
                    loss = policy_loss + value_loss * self.value_coef + entropy_loss * self.entropy_coef

                    set_lr(self.optimizer, self.lr_func(progress))
                    self.optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm(self.policy.parameters(),
                                                  self.max_grad_norm)
                    self.optimizer.step()

            self.taken_steps += steps
            print(self.taken_steps)
Пример #19
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  #torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device num = %d' % args.ngpu)
  logging.info("args = %s", args)

  genotype = eval("genotypes.%s" % args.arch)
  model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype, args.residual_wei, args.shrink_channel)
  if args.parallel:
    model = nn.DataParallel(model).cuda()
    #model = nn.parallel.DistributedDataParallel(model).cuda()
  else:
    model = model.cuda()
  
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
  criterion_smooth = criterion_smooth.cuda()

  optimizer = torch.optim.SGD(
    #model.parameters(),
    utils.set_group_weight(model, args.bn_no_wd, args.bias_no_wd),
    args.learning_rate,
    momentum=args.momentum,
    weight_decay=args.weight_decay
    )

  resume = os.path.join(args.save, 'checkpoint.pth.tar')
  if os.path.exists(resume):
    print("=> loading checkpoint %s" % resume)
    #checkpoint = torch.load(resume)
    checkpoint = torch.load(resume, map_location = lambda storage, loc: storage.cuda(0))
    args.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    #optimizer.load_state_dict(checkpoint['optimizer'])
    optimizer.state_dict()['state'] = checkpoint['optimizer']['state']
    print('=> loaded checkpoint epoch %d' % args.start_epoch)
    if args.start_epoch >= args.epochs:
        print('training finished')
        sys.exit(0)

  traindir = os.path.join(args.data, 'train')
  validdir = os.path.join(args.data, 'val')
  normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  train_data = dset.ImageFolder(
    traindir,
    transforms.Compose([
      transforms.RandomResizedCrop(args.image_size),
      transforms.RandomHorizontalFlip(),
      transforms.ColorJitter(
        brightness=0.4,
        contrast=0.4,
        saturation=0.4,
        hue=0.1),
      transforms.ToTensor(),
      normalize,
    ]))
  valid_data = dset.ImageFolder(
    validdir,
    transforms.Compose([
      transforms.Resize(int((256.0 / 224) * args.image_size)),
      transforms.CenterCrop(args.image_size),
      transforms.ToTensor(),
      normalize,
    ]))

  train_queue = torch.utils.data.DataLoader(
    train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=nworker)

  valid_queue = torch.utils.data.DataLoader(
    valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=nworker)

  best_acc_top1 = 0
  for epoch in range(args.start_epoch, args.epochs):
    if args.lr_strategy == 'cos':
      lr = utils.set_lr(optimizer, epoch, args.epochs, args.learning_rate)
    #elif args.lr_strategy == 'step':
    #  scheduler.step()
    #  lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)
    if args.parallel:
      model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
    else:
      model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
    train_acc, train_obj = train(train_queue, model, criterion_smooth, optimizer, epoch)
    logging.info('train_acc %f', train_acc)

    utils.save_checkpoint({
      'epoch': epoch + 1,
      'state_dict': model.state_dict(),
      'best_acc_top1': train_acc,
      'optimizer' : optimizer.state_dict(),
      }, False, args.save)

    #if epoch >= args.early_stop:
    #  break

  valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, criterion)
  logging.info('valid_acc_top1 %f', valid_acc_top1)
  logging.info('valid_acc_top5 %f', valid_acc_top5)
def run(net,
        loader,
        optimizer,
        scheduler,
        tracker,
        train=False,
        has_answers=True,
        prefix='',
        epoch=0):
    """ Run an epoch over the given loader """
    assert not (train and not has_answers)
    if train:
        net.train()
        tracker_class, tracker_params = tracker.MovingMeanMonitor, {
            'momentum': 0.99
        }
    else:
        net.eval()
        tracker_class, tracker_params = tracker.MeanMonitor, {}
        answ = []
        idxs = []
        accs = []

    # set learning rate decay policy
    if epoch < len(config.gradual_warmup_steps
                   ) and config.schedule_method == 'warm_up':
        utils.set_lr(optimizer, config.gradual_warmup_steps[epoch])
        utils.print_lr(optimizer, prefix, epoch)
    elif (epoch in config.lr_decay_epochs
          ) and train and config.schedule_method == 'warm_up':
        utils.decay_lr(optimizer, config.lr_decay_rate)
        utils.print_lr(optimizer, prefix, epoch)
    else:
        utils.print_lr(optimizer, prefix, epoch)

    loader = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0)
    loss_tracker = tracker.track('{}_loss'.format(prefix),
                                 tracker_class(**tracker_params))
    acc_tracker = tracker.track('{}_acc'.format(prefix),
                                tracker_class(**tracker_params))

    for v, q, a, b, idx, v_mask, q_mask, q_len in loader:
        var_params = {
            'requires_grad': False,
        }
        v = Variable(v.cuda(), **var_params)
        q = Variable(q.cuda(), **var_params)
        a = Variable(a.cuda(), **var_params)
        b = Variable(b.cuda(), **var_params)
        q_len = Variable(q_len.cuda(), **var_params)
        v_mask = Variable(v_mask.cuda(), **var_params)
        q_mask = Variable(q_mask.cuda(), **var_params)

        out = net(v, b, q, v_mask, q_mask, q_len)
        if has_answers:
            answer = utils.process_answer(a)
            loss = utils.calculate_loss(answer, out, method=config.loss_method)
            acc = utils.batch_accuracy(out, answer).data.cpu()

        if train:
            optimizer.zero_grad()
            loss.backward()
            # print gradient
            if config.print_gradient:
                utils.print_grad([(n, p) for n, p in net.named_parameters()
                                  if p.grad is not None])
            # clip gradient
            clip_grad_norm_(net.parameters(), config.clip_value)
            optimizer.step()
            if (config.schedule_method == 'batch_decay'):
                scheduler.step()
        else:
            # store information about evaluation of this minibatch
            _, answer = out.data.cpu().max(dim=1)
            answ.append(answer.view(-1))
            if has_answers:
                accs.append(acc.view(-1))
            idxs.append(idx.view(-1).clone())

        if has_answers:
            loss_tracker.append(loss.item())
            acc_tracker.append(acc.mean())
            fmt = '{:.4f}'.format
            loader.set_postfix(loss=fmt(loss_tracker.mean.value),
                               acc=fmt(acc_tracker.mean.value))

    if not train:
        answ = list(torch.cat(answ, dim=0))
        if has_answers:
            accs = list(torch.cat(accs, dim=0))
        else:
            accs = []
        idxs = list(torch.cat(idxs, dim=0))
        #print('{} E{:03d}:'.format(prefix, epoch), ' Total num: ', len(accs))
        #print('{} E{:03d}:'.format(prefix, epoch), ' Average Score: ', float(sum(accs) / len(accs)))
        return answ, accs, idxs
Пример #21
0
 # Progress bar
 tpbar = tqdm(unit="batches", ncols=100, total=args.num_iterations)
 # Set interval cost to 0.0
 interval_cost = 0.0
 # Declare lists for logging metrics
 if (args.logfile is not None):
     train_result = []
     test_result = []
     err_result = []
 # Iterating over the training set
 for step, data in enumerate(train_set):
     data['iteration'] = step
     # Dictionary for training
     feed_dict = {input_ph[k]: data[k] for k in input_ph.keys()}
     # Learning Schedule
     feed_dict[lr_ph] = set_lr(base_lr, step, learning_schedule, gamma)
     # Mean batch cost
     output = train_function(feed_dict=feed_dict)
     # Update progress bar
     tpbar.update(1)
     tpbar.set_description("Training {:0.4f}".format(output[()]))
     interval_cost += output[()]
     # Every epoch print test set metrics
     if (step + 1) % args.iter_interval == 0 and step > 0:
         # Call loop_eval to calculate metric over test set
         eval_losses = loop_eval(valid_set, input_ph, metric_names,
                                 eval_function, en_top5)
         tqdm.write(
             "Interval {interval} Iteration {iteration} complete. "
             "Avg Train Cost {cost:0.4f} Test Metrics:{tcost}".format(
                 interval=step // args.iter_interval,
Пример #22
0
def train(args, model_id, tb):
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    train_data = MedicalEasyEnsembleDataloader(args.train_data, args.class_id,
                                               args.batch_size, True,
                                               args.num_workers)
    val_data = MedicalEasyEnsembleDataloader(args.val_data, args.class_id,
                                             args.batch_size, False,
                                             args.num_workers)
    if os.path.exists(args.w2v_file):
        embedding = utils.load_embedding(args.w2v_file,
                                         vocab_size=args.vocab_size,
                                         embedding_size=args.embedding_size)
    else:
        embedding = None
    if args.model_type == 'lstm':
        model = models.LSTMModel(args, embedding)
    elif args.model_type == 'conv':
        model = models.ConvModel(args, embedding)
    elif args.model_type == 'char':
        model = models.CharCNNModel(args, embedding)
    elif args.model_type == 'base':
        model = models.BaseModel(args, embedding)
    else:
        raise NotImplementedError
    if os.path.isfile(
            os.path.join(args.checkpoint_path, str(args.class_id),
                         "%s_%s" % (args.model_type, args.type_suffix),
                         "model_%d.pth" % model_id)):
        print("Load %d class %s type %dth model from previous step" %
              (args.class_id, args.model_type, model_id))
        model.load_state_dict(
            torch.load(
                os.path.join(args.checkpoint_path, str(args.class_id),
                             "%s_%s" % (args.model_type, args.type_suffix),
                             "model_%d.pth" % model_id)))
    iteration = 0
    model = model.cuda(args.device)
    model.train()
    optimizer = utils.build_optimizer(args, model)
    loss_func = MultiBceLoss()
    cur_worse = 1000
    bad_times = 0
    for epoch in range(args.epochs):
        if epoch >= args.start_epoch:
            factor = (epoch - args.start_epoch) // args.decay_every
            decay_factor = args.decay_rate**factor
            current_lr = args.lr * decay_factor
            utils.set_lr(optimizer, current_lr)
        # if epoch != 0 and epoch % args.sample_every == 0:
        #     train_data.re_sample()
        for i, data in enumerate(train_data):
            tmp = [
                _.cuda(args.device) if isinstance(_, torch.Tensor) else _
                for _ in data
            ]
            report_ids, sentence_ids, sentence_lengths, output_vec = tmp
            optimizer.zero_grad()
            loss = loss_func(model(sentence_ids, sentence_lengths), output_vec)
            loss.backward()
            train_loss = loss.item()
            optimizer.step()
            iteration += 1
            if iteration % args.print_every == 0:
                print("iter %d epoch %d loss: %.3f" %
                      (iteration, epoch, train_loss))

            if iteration % args.save_every == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(args.checkpoint_path, str(args.class_id),
                                 "%s_%s" % (args.model_type, args.type_suffix),
                                 "model_%d.pth" % model_id))
                with open(os.path.join(args.checkpoint_path,
                                       str(args.class_id), "config.json"),
                          'w',
                          encoding='utf-8') as config_f:
                    json.dump(vars(args), config_f, indent=2)
                with open(os.path.join(
                        args.checkpoint_path, str(args.class_id),
                        "%s_%s" % (args.model_type, args.type_suffix),
                        "config.json"),
                          'w',
                          encoding='utf-8') as config_f:
                    json.dump(vars(args), config_f, indent=2)
            if iteration % args.val_every == 0:
                val_loss = eval_model(model, loss_func, val_data, epoch)
                tb.add_scalar("model_%d val_loss" % model_id, val_loss,
                              iteration)
                if val_loss > cur_worse:
                    print("Bad Time Appear")
                    cur_worse = val_loss
                    bad_times += 1
                else:
                    cur_worse = val_loss
                    bad_times = 0
                if bad_times > args.patient:
                    print('Early Stop !!!!')
                    return
            if iteration % args.loss_log_every == 0:
                tb.add_scalar("model_%d train_loss" % model_id, loss.item(),
                              iteration)

    print("The train finished")
Пример #23
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    snet.train()
    if args.model == 'VID':
        VID_NET1.train()
        VID_NET2.train()
    elif args.model == 'OFD':
        OFD_NET1.train()
        OFD_NET2.train()
    elif args.model == 'AFD':
        AFD_NET1.train()
        AFD_NET2.train()
    else:
        pass
    train_loss = 0
    train_cls_loss = 0

    conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_a = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_b = np.zeros((NUM_CLASSES, NUM_CLASSES))

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = args.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = args.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (img_teacher, img_student,
                    target) in enumerate(trainloader):

        if args.cuda:
            img_teacher = img_teacher.cuda(non_blocking=True)
            img_student = img_student.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        optimizer.zero_grad()

        if args.augmentation:
            img_teacher, teacher_target_a, teacher_target_b, teacher_lam = mixup_data(
                img_teacher, target, 0.6)
            img_teacher, teacher_target_a, teacher_target_b = map(
                Variable, (img_teacher, teacher_target_a, teacher_target_b))

            img_student, student_target_a, student_target_b, student_lam = mixup_data(
                img_student, target, 0.6)
            img_student, student_target_a, student_target_b = map(
                Variable, (img_student, student_target_a, student_target_b))
        else:
            img_teacher, img_student, target = Variable(img_teacher), Variable(
                img_student), Variable(target)

        rb1_s, rb2_s, rb3_s, mimic_s, out_s = snet(img_student)
        rb1_t, rb2_t, rb3_t, mimic_t, out_t = tnet(img_teacher)

        if args.augmentation:
            cls_loss = mixup_criterion(Cls_crit, out_s, student_target_a,
                                       student_target_b, student_lam)
        else:
            cls_loss = Cls_crit(out_s, target)

        kd_loss = KD_T_crit(out_t, out_s)

        if args.model == 'Fitnet':
            #FITNETS: Hints for Thin Deep Nets
            if args.stage == 'Block1':
                Fitnet1_loss = other.Fitnet(rb1_t, rb1_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * Fitnet1_loss
            elif args.stage == 'Block2':
                Fitnet2_loss = other.Fitnet(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * Fitnet2_loss
            else:
                Fitnet1_loss = other.Fitnet(rb1_t, rb1_s).cuda()
                Fitnet2_loss = other.Fitnet(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * Fitnet1_loss + args.delta * Fitnet2_loss

        elif args.model == 'AT':  # An activation-based attention transfer with the sum of absolute values raised to the power of 2.
            #Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer
            if args.stage == 'Block1':
                AT1_loss = other.AT(rb1_t, rb1_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * AT1_loss
            elif args.stage == 'Block2':
                AT2_loss = other.AT(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * AT2_loss
            else:
                AT1_loss = other.AT(rb1_t, rb1_s).cuda()
                AT2_loss = other.AT(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * AT1_loss + args.delta * AT2_loss

        elif args.model == 'NST':  # NST (poly)
            #Like What You Like: Knowledge Distill via Neuron Selectivity Transfer
            if args.stage == 'Block1':
                NST1_loss = other.NST(rb1_t, rb1_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * NST1_loss
            elif args.stage == 'Block2':
                NST2_loss = other.NST(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * NST2_loss
            else:
                NST1_loss = other.NST(rb1_t, rb1_s).cuda()
                NST2_loss = other.NST(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * NST1_loss + args.delta * NST2_loss

        elif args.model == 'PKT':  # PKT
            #Learning Deep Representations with Probabilistic Knowledge Transfer
            if args.stage == 'Block1':
                PKT1_loss = other.PKT(rb1_t, rb1_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * PKT1_loss
            elif args.stage == 'Block2':
                PKT2_loss = other.PKT(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * PKT2_loss
            else:
                PKT1_loss = other.PKT(rb1_t, rb1_s).cuda()
                PKT2_loss = other.PKT(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * PKT1_loss + args.delta * PKT2_loss

        elif args.model == 'AB':  # AB
            #Knowledge Transfer via Distillation of Activation Boundaries Formed by Hidden Neurons
            if args.stage == 'Block1':
                AB1_loss = other.AB(rb1_t, rb1_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * AB1_loss
            elif args.stage == 'Block2':
                AB2_loss = other.AB(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * AB2_loss
            else:
                AB1_loss = other.AB(rb1_t, rb1_s).cuda()
                AB2_loss = other.AB(rb2_t, rb2_s).cuda()
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * AB1_loss + args.delta * AB2_loss

        elif args.model == 'CCKD':  #
            #Correlation Congruence for Knowledge Distillation
            if args.stage == 'Block1':
                CCKD1_loss = other.CCKD().cuda()(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * CCKD1_loss
            elif args.stage == 'Block2':
                CCKD2_loss = other.CCKD().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * CCKD2_loss
            else:
                CCKD1_loss = other.CCKD().cuda()(rb1_t, rb1_s)
                CCKD2_loss = other.CCKD().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * CCKD1_loss + args.delta * CCKD2_loss

        elif args.model == 'RKD':  # RKD-DA
            #Relational Knowledge Disitllation
            if args.stage == 'Block1':
                RKD1_loss = other.RKD().cuda()(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * RKD1_loss
            elif args.stage == 'Block2':
                RKD2_loss = other.RKD().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * RKD2_loss
            else:
                RKD1_loss = other.RKD().cuda()(rb1_t, rb1_s)
                RKD2_loss = other.RKD().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * RKD1_loss + args.delta * RKD2_loss

        elif args.model == 'SP':  # SP
            #Similarity-Preserving Knowledge Distillation
            if args.stage == 'Block1':
                SP1_loss = other.SP().cuda()(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * SP1_loss
            elif args.stage == 'Block2':
                SP2_loss = other.SP().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * SP2_loss
            else:
                SP1_loss = other.SP().cuda()(rb1_t, rb1_s)
                SP2_loss = other.SP().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * SP1_loss + args.delta * SP2_loss

        elif args.model == 'VID':  # VID-I
            #Variational Information Distillation for Knowledge Transfer
            if args.stage == 'Block1':
                VID1_loss = VID_NET1(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * VID1_loss
            elif args.stage == 'Block2':
                VID2_loss = VID_NET2(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * VID2_loss
            else:
                VID1_loss = VID_NET1(rb1_t, rb1_s)
                VID2_loss = VID_NET2(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * VID1_loss + args.delta * VID2_loss

        elif args.model == 'OFD':  # OFD
            #A Comprehensive Overhaul of Feature Distillation
            if args.stage == 'Block1':
                OFD1_loss = OFD_NET1(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * OFD1_loss
            elif args.stage == 'Block2':
                OFD2_loss = OFD_NET2(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * OFD2_loss
            else:
                OFD1_loss = OFD_NET1.cuda()(rb1_t, rb1_s)
                OFD2_loss = OFD_NET2(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * OFD1_loss + args.delta * OFD2_loss

        elif args.model == 'AFDS':  #
            #Pay Attention to Features, Transfer Learn Faster CNNs
            if args.stage == 'Block1':
                AFD1_loss = AFD_NET1(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * AFD1_loss
            elif args.stage == 'Block2':
                AFD2_loss = AFD_NET2(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * AFD2_loss
            else:
                AFD1_loss = AFD_NET1(rb1_t, rb1_s)
                AFD2_loss = AFD_NET2(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * AFD1_loss + args.delta * AFD2_loss

        elif args.model == 'FT':  #
            #Paraphrasing Complex Network: Network Compression via Factor Transfer
            if args.stage == 'Block1':
                FT1_loss = other.FT().cuda()(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * FT1_loss
            elif args.stage == 'Block2':
                FT2_loss = other.FT().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.delta * FT2_loss
            else:
                FT1_loss = other.FT().cuda()(rb1_t, rb1_s)
                FT2_loss = other.FT().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * FT1_loss + args.delta * FT2_loss

        elif args.model == 'CD':  # CD+GKD+CE
            #Channel Distillation: Channel-Wise Attention for Knowledge Distillation
            if args.stage == 'Block1':
                kd_loss_v2 = other.KDLossv2(args.T).cuda()(out_t, out_s,
                                                           target)
                CD1_loss = other.CD().cuda()(rb1_t, rb1_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss_v2 + args.gamma * CD1_loss
            elif args.stage == 'Block2':
                kd_loss_v2 = other.KDLossv2(args.T).cuda()(out_t, out_s,
                                                           target)
                CD2_loss = other.CD().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss_v2 + args.delta * CD2_loss
            else:
                kd_loss_v2 = other.KDLossv2(args.T).cuda()(out_t, out_s,
                                                           target)
                CD1_loss = other.CD().cuda()(rb1_t, rb1_s)
                CD2_loss = other.CD().cuda()(rb2_t, rb2_s)
                loss = args.alpha * cls_loss + args.beta * kd_loss_v2 + args.gamma * CD1_loss + args.delta * CD2_loss

        elif args.model == 'FAKD':  # DS+TS+SA
            #FAKD: Feature-Affinity Based Knowledge Distillation for Efficient Image Super-Resolution
            if args.stage == 'Block1':
                FAKD_DT_loss = other.FAKD_DT().cuda()(out_t, out_s, target,
                                                      NUM_CLASSES)
                FAKD_SA1_loss = other.FAKD_SA().cuda()(rb1_t, rb1_s)
                loss = args.alpha * FAKD_DT_loss + args.gamma * FAKD_SA1_loss  # No T
            elif args.stage == 'Block2':
                FAKD_DT_loss = other.FAKD_DT().cuda()(out_t, out_s, target,
                                                      NUM_CLASSES)
                FAKD_SA2_loss = other.FAKD_SA().cuda()(rb2_t, rb2_s)
                loss = args.alpha * FAKD_DT_loss + args.gamma * FAKD_SA2_loss
            else:
                FAKD_DT_loss = other.FAKD_DT().cuda()(out_t, out_s, target,
                                                      NUM_CLASSES)
                FAKD_SA1_loss = other.FAKD_SA().cuda()(rb1_t, rb1_s)
                FAKD_SA2_loss = other.FAKD_SA().cuda()(rb2_t, rb2_s)
                loss = args.alpha * FAKD_DT_loss + args.gamma * FAKD_SA1_loss + args.delta * FAKD_SA2_loss

        elif args.model == 'VKD':  #
            #Robust Re-Identification by Multiple Views Knowledge Distillation
            if args.stage == 'Block1':
                VKD_Similarity1_loss = other.VKD_SimilarityDistillationLoss(
                ).cuda()(rb1_t, rb1_s)
                VKD_OnlineTriplet1_loss = other.VKD_OnlineTripletLoss().cuda()(
                    rb1_s, target)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * VKD_Similarity1_loss \
                                             + args.delta * VKD_OnlineTriplet1_loss
            elif args.stage == 'Block2':
                VKD_Similarity2_loss = other.VKD_SimilarityDistillationLoss(
                ).cuda()(rb2_t, rb2_s)
                VKD_OnlineTriplet2_loss = other.VKD_OnlineTripletLoss().cuda()(
                    rb2_s, target)
                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * VKD_Similarity2_loss \
                                             + args.delta * VKD_OnlineTriplet2_loss
            else:
                VKD_Similarity1_loss = other.VKD_SimilarityDistillationLoss(
                ).cuda()(rb1_t, rb1_s)
                VKD_OnlineTriplet1_loss = other.VKD_OnlineTripletLoss().cuda()(
                    rb1_s, target)

                VKD_Similarity2_loss = other.VKD_SimilarityDistillationLoss(
                ).cuda()(rb2_t, rb2_s)
                VKD_OnlineTriplet2_loss = other.VKD_OnlineTripletLoss().cuda()(
                    rb2_s, target)

                loss = args.alpha * cls_loss + args.beta * kd_loss + args.gamma * VKD_Similarity1_loss \
                           + args.delta * VKD_OnlineTriplet1_loss  + args.gamma * VKD_Similarity2_loss \
                                             + args.delta * VKD_OnlineTriplet2_loss

        elif args.model == 'RAD':  # RAD:  Resolution-Adapted Distillation
            # Efficient Low-Resolution Face Recognition via Bridge Distillation
            distance = mimic_t - mimic_s
            RAD_loss = torch.pow(distance, 2).sum(dim=(0, 1), keepdim=False)
            loss = RAD_loss + cls_loss
        else:
            raise Exception('Invalid model name...')

        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()
        train_cls_loss += cls_loss.item()

        if args.augmentation:
            conf_mat_a += losses.confusion_matrix(out_s, student_target_a,
                                                  NUM_CLASSES)
            acc_a = sum([conf_mat_a[i, i] for i in range(conf_mat_a.shape[0])
                         ]) / conf_mat_a.sum()
            precision_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            recall_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[:, i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            mAP_a = sum(precision_a) / len(precision_a)
            F1_score_a = (2 * precision_a * recall_a /
                          (precision_a + recall_a + 1e-10)).mean()

            conf_mat_b += losses.confusion_matrix(out_s, student_target_b,
                                                  NUM_CLASSES)
            acc_b = sum([conf_mat_b[i, i] for i in range(conf_mat_b.shape[0])
                         ]) / conf_mat_b.sum()
            precision_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            recall_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[:, i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            mAP_b = sum(precision_b) / len(precision_b)
            F1_score_b = (2 * precision_b * recall_b /
                          (precision_b + recall_b + 1e-10)).mean()

            acc = student_lam * acc_a + (1 - student_lam) * acc_b
            mAP = student_lam * mAP_a + (1 - student_lam) * mAP_b
            F1_score = student_lam * F1_score_a + (1 -
                                                   student_lam) * F1_score_b

        else:
            conf_mat += losses.confusion_matrix(out_s, target, NUM_CLASSES)
            acc = sum([conf_mat[i, i]
                       for i in range(conf_mat.shape[0])]) / conf_mat.sum()
            precision = [
                conf_mat[i, i] / (conf_mat[i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            mAP = sum(precision) / len(precision)

            recall = [
                conf_mat[i, i] / (conf_mat[:, i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            precision = np.array(precision)
            recall = np.array(recall)
            f1 = 2 * precision * recall / (precision + recall + 1e-10)
            F1_score = f1.mean()

        #utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% | mAP: %.3f%% | F1: %.3f%%'
        #% (train_loss/(batch_idx+1), 100.*acc, 100.* mAP, 100.* F1_score))

    return train_cls_loss / (batch_idx +
                             1), 100. * acc, 100. * mAP, 100 * F1_score
Пример #24
0
def train_model(model,
                dataloaders,
                criterion,
                optimizer,
                start_epoch,
                num_epochs=args.epochs):
    '''
    Train model
    model: Model
    dataloaders: dataloader dict: {train: , val: }
    criterion: Loss function
    optimizer: Optimizer for training
    num_epochs: Number of epochs to train
    Out: Best model, val_acc_history
    '''
    since = time.time()
    val_acc_history = []
    lr = args.lr
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    learning_rate_decay_start = args.lr_decay_start
    learning_rate_decay_every = args.lr_decay_every
    learning_rate_decay_rate = args.lr_decay_rate
    for epoch in range(start_epoch, num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print("-" * 10)
        if epoch > learning_rate_decay_start and learning_rate_decay_every > 0:
            frac = (epoch -
                    learning_rate_decay_start) // learning_rate_decay_every
            decay_factor = learning_rate_decay_rate**frac
            current_lr = lr * decay_factor
            set_lr(optimizer, current_lr)
            print("Learning rate: ", current_lr)
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in dataloaders[phase]:
                t = inputs.size(0)
                if phase == "val":
                    bs, ncrops, c, h, w = np.shape(inputs)
                    inputs = inputs.view(-1, c, h, w)  #(bs*n_crops, c, h, w)
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    if phase == "val":
                        outputs = outputs.view(bs, ncrops, -1).mean(1)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    if phase == 'train':
                        loss.backward()
                        clip_gradient(optimizer, 0.1)
                        optimizer.step()
                running_loss += loss.item() * t
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / (dataloader_length[phase])
            epoch_acc = running_corrects.double() / (dataloader_length[phase])
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
        #save_checkpoint(epoch, best_model_wts, optimizer)
        print()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model, val_acc_history
Пример #25
0
def train(args):
    logging.info("Create train_loader and val_loader.........")
    train_loader_kwargs = {
        'question_pt': args.train_question_pt,
        'vocab_json': args.vocab_json,
        'feature_h5': args.train_feature_h5,
        'batch_size': args.batch_size,
        'num_workers': 4,
        'shuffle': True
    }
    train_loader = CLEVRDataLoader(**train_loader_kwargs)
    if args.val:
        val_loader_kwargs = {
            'question_pt': args.val_question_pt,
            'vocab_json': args.vocab_json,
            'feature_h5': args.val_feature_h5,
            'batch_size': args.batch_size,
            'num_workers': 2,
            'shuffle': False
        }
        val_loader = CLEVRDataLoader(**val_loader_kwargs)

    logging.info("Create model.........")
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_kwargs = {
        'vocab': train_loader.vocab,
        'dim_word': args.dim_word,
        'dim_hidden': args.hidden_size,
        'dim_vision': args.dim_vision,
        'state_size': args.state_size,
        'mid_size': args.mid_size,
        'dropout_prob': args.dropout,
        'glimpses': args.glimpses,
        'dim_edge': args.dim_edge
    }
    model_kwargs_tosave = { k:v for k,v in model_kwargs.items() if k != 'vocab' }
    model = Net(**model_kwargs)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model).to(device)  # Support multiple GPUS
    else:
        model = model.to(device)
    logging.info(model)
    ################################################################

    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adamax(parameters, args.lr, weight_decay=0)

    start_epoch = 0
    if args.restore:
        print("Restore checkpoint and optimizer...")
        ckpt = os.path.join(args.save_dir, 'model.pt')
        ckpt = torch.load(ckpt, map_location={'cuda:0': 'cpu'})
        start_epoch = 4
        if torch.cuda.device_count() > 1:
            model.module.load_state_dict(ckpt['state_dict'])
        else:
            model.load_state_dict(ckpt['state_dict'])
        # optimizer.load_state_dict(ckpt['optimizer'])
    # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.5**(1 / args.lr_halflife))
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[8, 12, 15, 17, 19, 22], gamma=0.5)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5)
    gradual_warmup_steps = [0.25 * args.lr, 0.5 * args.lr, 0.75 * args.lr, 1.0 * args.lr]
    criterion = nn.CrossEntropyLoss().to(device)
    last_acc = 0.
    logging.info("Start training........")
    for epoch in range(start_epoch, args.num_epoch):
        model.train()
        if epoch < len(gradual_warmup_steps):
            utils.set_lr(optimizer, gradual_warmup_steps[epoch])
        else:
            scheduler.step()
        for p in optimizer.param_groups:
            lr_rate = p['lr']
            logging.info("Learning rate: %6f" % (lr_rate))
        for i, batch in enumerate(train_loader):
            progress = epoch+i/len(train_loader)
            orig_idx, image_idx, answers, *batch_input = [todevice(x, device) for x in batch]
            batch_input = [x.detach() for x in batch_input]
            logits, loss_time = model(*batch_input)
            ##################### loss #####################
            ce_loss = criterion(logits, answers)
            loss_time = 0.01 * loss_time.mean()
            loss = ce_loss + loss_time
            ################################################
            optimizer.zero_grad()
            loss.backward() 
            nn.utils.clip_grad_value_(parameters, clip_value=0.25)
            optimizer.step()
            if (i+1) % (len(train_loader) // 20) == 0:
                logging.info("Progress %.3f  ce_loss = %.3f  time_loss = %.3f" % (progress, ce_loss.item(), loss_time.item()))
            del  answers, batch_input, logits
            torch.cuda.empty_cache()
        # save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt')) 
        logging.info(' >>>>>> save to %s <<<<<<' % (args.save_dir))
        if args.val:
            if epoch % 1 ==0:
                valid_acc = validate(model, val_loader, device)
                logging.info('\n ~~~~~~ Valid Accuracy: %.4f ~~~~~~~\n' % valid_acc)
                if valid_acc >= last_acc:
                    last_acc = valid_acc
                    save_checkpoint(epoch, model, optimizer, model_kwargs_tosave, os.path.join(args.save_dir, 'model.pt'))
    infos['iter'] = 0
    infos['epoch'] = 0

iteration = infos['iter']
start_epoch = infos['epoch']
for e in range(start_epoch, start_epoch + 100):
    # Training with cross-entropy
    # learning rate decay
    if e >= 3:
        current_lr = opt.learning_rate * (opt.learning_rate_decay_rate**int(
            (e - 3) // opt.learning_rate_decay_every + 1))
    else:
        current_lr = opt.learning_rate
    if e == 20:
        break
    set_lr(optimizer, current_lr)
    running_loss = 0.
    re_sort_net.train()
    with tqdm(desc='Epoch %d - train' % e,
              ncols=150,
              unit='it',
              total=len(iter(dataloader_train))) as pbar:
        for it, (keys, values) in enumerate(iter(dataloader_train)):
            detections = keys  # b_s, 100, feat
            det_seqs_v, det_seqs_sr, control_verb, \
                gt_seqs_v, gt_seqs_sr, _, _, captions = values

            optimizer.zero_grad()
            # batch_verb, batch_det_sr, batch_gt_sr
            index = 0
            for i in range(detections.size(0)):  # batch
Пример #27
0
    if args.resume_path is not None:
        resume_ckpt = torch.load(args.resume_path)
        model.load_state_dict(resume_ckpt['state_dict'])
        # average.load_state_dict(resume_ckpt['average'])
        classifier.load_state_dict(resume_ckpt['classifier'])
        args.begin_epoch = resume_ckpt['epoch'] + 1
        best_acc = resume_ckpt['best_acc']
        optimizer.load_state_dict(resume_ckpt['optimizer'])

        print('==> Resume training...')
        print('best acc is: {}'.format(best_acc))
        del resume_ckpt
        torch.cuda.empty_cache()

        set_lr(optimizer, 0.03)
        resume = True
    else:
        print('==> Train from sratch...')
        resume = False
        best_acc = 0
        print('==> loading pre-trained model and NCE')
        ckpt = torch.load(args.pretrain_path)
        try:
            model.load_state_dict(ckpt['state_dict'])
        except:
            print(
                '=> [Warning]: weight structure is not equal to test model; Use non-equal load =='
            )
            model = neq_load_customized(model, ckpt['state_dict'])
        # average.load_state_dict(ckpt['average'])
Пример #28
0
def run(net,
        loader,
        optimizer,
        scheduler,
        tracker,
        train=False,
        prefix='',
        epoch=0):
    """ Run an epoch over the given loader """
    if train:
        net.train()
        # tracker_class, tracker_params = tracker.MovingMeanMonitor, {'momentum': 0.99}
    else:
        net.eval()

    tracker_class, tracker_params = tracker.MeanMonitor, {}

    # set learning rate decay policy
    if epoch < len(config.gradual_warmup_steps
                   ) and config.schedule_method == 'warm_up':
        utils.set_lr(optimizer, config.gradual_warmup_steps[epoch])

    elif (epoch in config.lr_decay_epochs
          ) and train and config.schedule_method == 'warm_up':
        utils.decay_lr(optimizer, config.lr_decay_rate)

    utils.print_lr(optimizer, prefix, epoch)

    loader = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0)
    loss_tracker = tracker.track('{}_loss'.format(prefix),
                                 tracker_class(**tracker_params))
    acc_tracker = tracker.track('{}_acc'.format(prefix),
                                tracker_class(**tracker_params))

    for v, q, a, b, idx, v_mask, q_mask, q_len in loader:
        var_params = {
            'requires_grad': False,
        }
        v = Variable(v.cuda(), **var_params)
        q = Variable(q.cuda(), **var_params)
        a = Variable(a.cuda(), **var_params)
        b = Variable(b.cuda(), **var_params)
        q_len = Variable(q_len.cuda(), **var_params)
        v_mask = Variable(v_mask.cuda(), **var_params)
        q_mask = Variable(q_mask.cuda(), **var_params)

        out = net(v, b, q, v_mask, q_mask, q_len)

        answer = utils.process_answer(a)
        loss = utils.calculate_loss(answer, out, method=config.loss_method)
        acc = utils.batch_accuracy(out, answer).data.cpu()

        if train:
            optimizer.zero_grad()
            loss.backward()
            # clip gradient
            clip_grad_norm_(net.parameters(), config.clip_value)
            optimizer.step()
            if config.schedule_method == 'batch_decay':
                scheduler.step()

        loss_tracker.append(loss.item())
        acc_tracker.append(acc.mean())
        fmt = '{:.4f}'.format
        loader.set_postfix(loss=fmt(loss_tracker.mean.value),
                           acc=fmt(acc_tracker.mean.value))

    return acc_tracker.mean.value, loss_tracker.mean.value
Пример #29
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    snet.train()
    decoder.train()
    train_loss = 0
    train_cls_loss = 0

    conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_a = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_b = np.zeros((NUM_CLASSES, NUM_CLASSES))

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = args.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = args.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (img_teacher, img_student,
                    target) in enumerate(trainloader):

        if args.cuda:
            img_teacher = img_teacher.cuda(non_blocking=True)
            img_student = img_student.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        optimizer.zero_grad()

        if args.augmentation:
            img_teacher, teacher_target_a, teacher_target_b, teacher_lam = mixup_data(
                img_teacher, target, 0.6)
            img_teacher, teacher_target_a, teacher_target_b = map(
                Variable, (img_teacher, teacher_target_a, teacher_target_b))

            img_student, student_target_a, student_target_b, student_lam = mixup_data(
                img_student, target, 0.6)
            img_student, student_target_a, student_target_b = map(
                Variable, (img_student, student_target_a, student_target_b))
        else:
            img_teacher, img_student, target = Variable(img_teacher), Variable(
                img_student), Variable(target)

        rb1_s, rb2_s, rb3_s, mimic_s, out_s = snet(img_student)
        rb1_t, rb2_t, rb3_t, mimic_t, out_t = tnet(img_teacher)

        if args.augmentation:
            cls_loss = mixup_criterion(Cls_crit, out_s, student_target_a,
                                       student_target_b, student_lam)
        else:
            cls_loss = Cls_crit(out_s, target)

        kd_loss = KD_T_crit(out_t, out_s)

        if args.distillation == 'KD':
            loss = 0.2 * cls_loss + 0.8 * kd_loss
        elif args.distillation == 'DE':
            new_rb1_s = decoder(rb1_s)
            decoder_loss = losses.styleLoss(img_teacher, new_rb1_s.cuda(),
                                            MSE_crit)
            loss = 0.2 * cls_loss + 0.8 * kd_loss + 0.1 * decoder_loss
        elif args.distillation == 'AS':
            rb2_loss = losses.Absdiff_Similarity(rb2_t, rb2_s).cuda()
            loss = 0.2 * cls_loss + 0.8 * kd_loss + 0.9 * rb2_loss
        elif args.distillation == 'DEAS':
            new_rb1_s = decoder(rb1_s)
            decoder_loss = losses.styleLoss(img_teacher, new_rb1_s.cuda(),
                                            MSE_crit)
            rb2_loss = losses.Absdiff_Similarity(rb2_t, rb2_s).cuda()
            loss = 0.2 * cls_loss + 0.8 * kd_loss + 0.1 * decoder_loss + 0.9 * rb2_loss
        elif args.distillation == 'SSDEAS':
            new_rb1_s = decoder(rb1_s)
            decoder_loss = losses.styleLoss(img_teacher, new_rb1_s.cuda(),
                                            MSE_crit)
            rb2_loss = losses.Absdiff_Similarity(rb2_t, rb2_s).cuda()
            loss = 0 * cls_loss + 0 * kd_loss + 0.1 * decoder_loss + 0.9 * rb2_loss
        else:
            raise Exception('Invalid distillation name...')

        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()
        train_cls_loss += cls_loss.item()

        if args.augmentation:
            conf_mat_a += losses.confusion_matrix(out_s, student_target_a,
                                                  NUM_CLASSES)
            acc_a = sum([conf_mat_a[i, i] for i in range(conf_mat_a.shape[0])
                         ]) / conf_mat_a.sum()
            precision_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            recall_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[:, i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            mAP_a = sum(precision_a) / len(precision_a)
            F1_score_a = (2 * precision_a * recall_a /
                          (precision_a + recall_a + 1e-10)).mean()

            conf_mat_b += losses.confusion_matrix(out_s, student_target_b,
                                                  NUM_CLASSES)
            acc_b = sum([conf_mat_b[i, i] for i in range(conf_mat_b.shape[0])
                         ]) / conf_mat_b.sum()
            precision_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            recall_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[:, i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            mAP_b = sum(precision_b) / len(precision_b)
            F1_score_b = (2 * precision_b * recall_b /
                          (precision_b + recall_b + 1e-10)).mean()

            acc = student_lam * acc_a + (1 - student_lam) * acc_b
            mAP = student_lam * mAP_a + (1 - student_lam) * mAP_b
            F1_score = student_lam * F1_score_a + (1 -
                                                   student_lam) * F1_score_b

        else:
            conf_mat += losses.confusion_matrix(out_s, target, NUM_CLASSES)
            acc = sum([conf_mat[i, i]
                       for i in range(conf_mat.shape[0])]) / conf_mat.sum()
            precision = [
                conf_mat[i, i] / (conf_mat[i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            mAP = sum(precision) / len(precision)

            recall = [
                conf_mat[i, i] / (conf_mat[:, i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            precision = np.array(precision)
            recall = np.array(recall)
            f1 = 2 * precision * recall / (precision + recall + 1e-10)
            F1_score = f1.mean()

        #utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% | mAP: %.3f%% | F1: %.3f%%'
        #% (train_loss/(batch_idx+1), 100.*acc, 100.* mAP, 100.* F1_score))

    return train_cls_loss / (batch_idx +
                             1), 100. * acc, 100. * mAP, 100 * F1_score
Пример #30
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0

    conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_a = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_b = np.zeros((NUM_CLASSES, NUM_CLASSES))

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = args.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = args.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()

        if args.augmentation:
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, 0.6)
            inputs, targets_a, targets_b = map(Variable,
                                               (inputs, targets_a, targets_b))
        else:
            inputs, targets = Variable(inputs), Variable(targets)

        _, _, _, _, outputs = net(inputs)

        if args.augmentation:
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b,
                                   lam)
        else:
            loss = criterion(outputs, targets)

        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()

        if args.augmentation:
            conf_mat_a += losses.confusion_matrix(outputs, targets_a,
                                                  NUM_CLASSES)
            acc_a = sum([conf_mat_a[i, i] for i in range(conf_mat_a.shape[0])
                         ]) / conf_mat_a.sum()
            precision_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            recall_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[:, i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            mAP_a = sum(precision_a) / len(precision_a)
            F1_score_a = (2 * precision_a * recall_a /
                          (precision_a + recall_a + 1e-10)).mean()

            conf_mat_b += losses.confusion_matrix(outputs, targets_b,
                                                  NUM_CLASSES)
            acc_b = sum([conf_mat_b[i, i] for i in range(conf_mat_b.shape[0])
                         ]) / conf_mat_b.sum()
            precision_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            recall_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[:, i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            mAP_b = sum(precision_b) / len(precision_b)
            F1_score_b = (2 * precision_b * recall_b /
                          (precision_b + recall_b + 1e-10)).mean()

            acc = lam * acc_a + (1 - lam) * acc_b
            mAP = lam * mAP_a + (1 - lam) * mAP_b
            F1_score = lam * F1_score_a + (1 - lam) * F1_score_b

        else:
            conf_mat += losses.confusion_matrix(outputs, targets, NUM_CLASSES)
            acc = sum([conf_mat[i, i]
                       for i in range(conf_mat.shape[0])]) / conf_mat.sum()
            precision = [
                conf_mat[i, i] / (conf_mat[i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            mAP = sum(precision) / len(precision)

            recall = [
                conf_mat[i, i] / (conf_mat[:, i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            precision = np.array(precision)
            recall = np.array(recall)
            f1 = 2 * precision * recall / (precision + recall + 1e-10)
            F1_score = f1.mean()

        #utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% | mAP: %.3f%% | F1: %.3f%%'
        #% (train_loss/(batch_idx+1), 100.*acc, 100.* mAP, 100.* F1_score))

    return train_loss / (batch_idx + 1), 100. * acc, 100. * mAP, 100 * F1_score