예제 #1
0
def train(Config,
          model,
          epoch_num,
          start_epoch,
          optimizer,
          exp_lr_scheduler,
          data_loader,
          save_dir,
          data_size=448,
          savepoint=500,
          checkpoint=1000):
    # savepoint: save without evalution
    # checkpoint: save with evaluation

    step = 0
    eval_train_flag = False
    rec_loss = []
    checkpoint_list = []

    train_batch_size = data_loader['train'].batch_size
    train_epoch_step = data_loader['train'].__len__()
    train_loss_recorder = LossRecord(train_batch_size)

    if savepoint > train_epoch_step:
        savepoint = 1 * train_epoch_step
        checkpoint = savepoint

    date_suffix = dt()
    log_file = open(
        os.path.join(
            Config.log_folder,
            'formal_log_r50_dcl_%s_%s.log' % (str(data_size), date_suffix)),
        'a')

    add_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_focal_loss = FocalLoss()
    get_angle_loss = AngleLoss()

    for epoch in range(start_epoch, epoch_num - 1):
        exp_lr_scheduler.step(epoch)
        model.train(True)

        save_grad = []
        for batch_cnt, data in enumerate(data_loader['train']):
            step += 1
            loss = 0
            model.train(True)
            if Config.use_backbone:
                inputs, labels, img_names = data
                inputs = Variable(inputs.cuda())
                labels = Variable(torch.from_numpy(np.array(labels)).cuda())

            if Config.use_dcl:
                inputs, labels, labels_swap, swap_law, img_names = data

                inputs = Variable(inputs.cuda())
                labels = Variable(torch.from_numpy(np.array(labels)).cuda())
                labels_swap = Variable(
                    torch.from_numpy(np.array(labels_swap)).cuda())
                swap_law = Variable(
                    torch.from_numpy(np.array(swap_law)).float().cuda())

            optimizer.zero_grad()

            if inputs.size(0) < 2 * train_batch_size:
                outputs = model(inputs, inputs[0:-1:2])
            else:
                outputs = model(inputs, None)

            if Config.use_focal_loss:
                ce_loss = get_focal_loss(outputs[0], labels)
            else:
                ce_loss = get_ce_loss(outputs[0], labels)

            if Config.use_Asoftmax:
                fetch_batch = labels.size(0)
                if batch_cnt % (train_epoch_step // 5) == 0:
                    angle_loss = get_angle_loss(outputs[3],
                                                labels[0:fetch_batch:2],
                                                decay=0.9)
                else:
                    angle_loss = get_angle_loss(outputs[3],
                                                labels[0:fetch_batch:2])
                loss += angle_loss

            loss += ce_loss

            alpha_ = 1
            beta_ = 1
            gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1
            if Config.use_dcl:
                swap_loss = get_ce_loss(outputs[1], labels_swap) * beta_
                loss += swap_loss
                law_loss = add_loss(outputs[2], swap_law) * gamma_
                loss += law_loss

            loss.backward()
            torch.cuda.synchronize()

            optimizer.step()
            torch.cuda.synchronize()

            if Config.use_dcl:
                print(
                    'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} '
                    .format(step, train_epoch_step,
                            loss.detach().item(),
                            ce_loss.detach().item(),
                            swap_loss.detach().item(),
                            law_loss.detach().item()),
                    flush=True)
            if Config.use_backbone:
                print(
                    'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} '
                    .format(step, train_epoch_step,
                            loss.detach().item(),
                            ce_loss.detach().item()),
                    flush=True)
            rec_loss.append(loss.detach().item())

            train_loss_recorder.update(loss.detach().item())

            # evaluation & save
            if step % checkpoint == 0:
                rec_loss = []
                print(32 * '-', flush=True)
                print(
                    'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}'
                    .format(step, train_epoch_step,
                            1.0 * step / train_epoch_step, epoch,
                            train_loss_recorder.get_val()),
                    flush=True)
                print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True)
                if eval_train_flag:
                    trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn(
                        model, data_loader['trainval'], 'trainval', epoch,
                        log_file)
                    if abs(trainval_acc1 - trainval_acc3) < 0.01:
                        eval_train_flag = False

                val_acc1, val_acc2, val_acc3 = eval_turn(
                    model, data_loader['val'], 'val', epoch, log_file)

                save_path = os.path.join(
                    save_dir, 'weights_%d_%d_%.4f_%.4f.pth' %
                    (epoch, batch_cnt, val_acc1, val_acc3))
                torch.cuda.synchronize()
                torch.save(model.state_dict(), save_path)
                print('saved model to %s' % (save_path), flush=True)
                torch.cuda.empty_cache()

            # save only
            elif step % savepoint == 0:
                train_loss_recorder.update(rec_loss)
                rec_loss = []
                save_path = os.path.join(
                    save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt()))

                checkpoint_list.append(save_path)
                if len(checkpoint_list) == 6:
                    os.remove(checkpoint_list[0])
                    del checkpoint_list[0]
                torch.save(model.state_dict(), save_path)
                torch.cuda.empty_cache()

    log_file.close()
예제 #2
0
def train(Config,
          model,
          epoch_num,
          start_epoch,
          optimizer,
          exp_lr_scheduler,
          data_loader,
          save_dir,
          data_size=448,
          savepoint=5000,
          checkpoint=5000):
    # savepoint: save without evalution
    # checkpoint: save with evaluation

    eval_train_flag = False
    rec_loss = []
    checkpoint_list = []

    train_batch_size = data_loader['train'].batch_size
    train_epoch_step = data_loader['train'].__len__()
    train_loss_recorder = LossRecord(train_batch_size)

    if savepoint > train_epoch_step:
        savepoint = 1 * train_epoch_step
        checkpoint = savepoint

    date_suffix = dt()
    log_file = open(
        os.path.join(
            Config.log_folder,
            'formal_log_r50_dcl_%s_%s.log' % (str(data_size), date_suffix)),
        'a')

    add_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_ce_sig_loss = nn.BCELoss()
    get_focal_loss = FocalLoss()
    get_angle_loss = AngleLoss()
    step = 0

    for epoch in range(start_epoch, epoch_num - 1):
        exp_lr_scheduler.step(epoch)
        model.train(True)

        save_grad = []
        for batch_cnt, data in enumerate(data_loader['train']):
            step += 1
            loss = 0
            model.train(True)

            if Config.use_backbone:
                inputs, labels, img_names = data
                inputs = Variable(inputs.cuda())
                # labels = Variable(torch.LongTensor(np.array(labels)).cuda())
                labels = Variable(torch.FloatTensor(np.array(labels)).cuda())

            if Config.use_dcl:
                inputs, labels, labels_swap, swap_law, law_index, img_names = data

                inputs = Variable(inputs.cuda())

                # print (type(labels))
                # labels = Variable(torch.LongTensor(np.array(labels)).cuda())
                labels = Variable(torch.FloatTensor(np.array(labels)).cuda())

                #######  dy modify
                # labels_numpy = np.array(labels.cpu()).astype(np.uint8)
                # print (labels_numpy)

                labels_swap = Variable(
                    torch.LongTensor(np.array(labels_swap)).cuda())
                swap_law = Variable(
                    torch.LongTensor(np.array(swap_law)).float().cuda())

            optimizer.zero_grad()

            if inputs.size(0) < 2 * train_batch_size:
                outputs = model(inputs, inputs[0:-1:2])
            else:
                outputs = model(inputs, law_index)

            idx_unswap = torch.tensor([0, 2, 4, 6, 8], dtype=torch.long).cuda()
            unswap_label = torch.index_select(labels, dim=0, index=idx_unswap)

            # print (inputs.size(0))

            if Config.use_focal_loss:
                ce_loss = get_focal_loss(outputs[0], labels)
            else:

                # ce_loss = get_ce_loss(outputs[0], labels)      ###  classification batach x 200
                # print (outputs[0].shape)
                # print (unswap_label.shape)
                ce_loss = get_ce_sig_loss(
                    outputs[0], unswap_label)  ###  classification batach x 200

            if Config.use_Asoftmax:
                fetch_batch = labels.size(0)
                if batch_cnt % (train_epoch_step // 5) == 0:
                    angle_loss = get_angle_loss(outputs[3],
                                                labels[0:fetch_batch:2],
                                                decay=0.9)
                else:
                    angle_loss = get_angle_loss(outputs[3],
                                                labels[0:fetch_batch:2])
                loss += angle_loss

            alpha_ = 1
            loss += ce_loss * alpha_

            beta_ = 0.1
            gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1

            if Config.use_dcl:
                swap_loss = get_ce_loss(
                    outputs[1], labels_swap
                ) * beta_  ### adverisal classification  batach x 2
                loss += swap_loss  #######  0.692 * 0.1 = 0.0692
                law_loss = add_loss(
                    outputs[2], swap_law
                ) * gamma_  ### mask L1Loss batach x 49   L1 Loss 主要用来计算 input x 和 target y 的逐元素间差值的平均绝对值.
                loss += law_loss  #######  0.0683 * 1 = 0.0683

            loss.backward()
            torch.cuda.synchronize()

            optimizer.step()
            torch.cuda.synchronize()

            if Config.use_dcl:
                print(
                    'epoch:{:d}, globalstep: {:-8d},  {:d} / {:d} \n loss=ce_l+swap_l+law_l: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} '
                    .format(epoch, step, batch_cnt, train_epoch_step,
                            loss.detach().item(),
                            ce_loss.detach().item(),
                            swap_loss.detach().item(),
                            law_loss.detach().item()),
                    flush=True)
            if Config.use_backbone:
                print(
                    'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} '
                    .format(step, train_epoch_step,
                            loss.detach().item(),
                            ce_loss.detach().item()),
                    flush=True)
            rec_loss.append(loss.detach().item())

            train_loss_recorder.update(loss.detach().item())

            # evaluation & save
            if step % checkpoint == 0:
                rec_loss = []
                print(32 * '-', flush=True)
                print(
                    'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}'
                    .format(step, train_epoch_step,
                            1.0 * step / train_epoch_step, epoch,
                            train_loss_recorder.get_val()),
                    flush=True)
                print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True)

                val_acc = eval_turn(Config, model, data_loader['trainval'],
                                    'val', epoch, log_file)

                # if val_acc >0.9:
                #     checkpoint = 500
                #     savepoint = 500
                # save_path = os.path.join(save_dir, 'weights_%d_%d_%.4f_%.4f.pth'%(epoch, batch_cnt, val_acc1, val_acc3))
                save_path = os.path.join(
                    save_dir,
                    'weights_%d_%d_%.4f.pth' % (epoch, batch_cnt, val_acc))

                torch.cuda.synchronize()
                torch.save(model.state_dict(), save_path)
                print('saved model to %s' % (save_path), flush=True)
                torch.cuda.empty_cache()

            # save only
            elif step % savepoint == 0:
                train_loss_recorder.update(rec_loss)
                rec_loss = []
                save_path = os.path.join(
                    save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt()))

                checkpoint_list.append(save_path)
                if len(checkpoint_list) == 6:
                    os.remove(checkpoint_list[0])
                    del checkpoint_list[0]
                torch.save(model.state_dict(), save_path)
                torch.cuda.empty_cache()

    log_file.close()
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)
    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):
            inputs = Variable(data_val[0].cuda())
            labels = Variable(
                torch.from_numpy(np.array(data_val[1])).long().cuda())
            outputs = model(inputs)
            loss = 0

            ce_loss = get_ce_loss(outputs[0], labels).item()
            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)

            if Config.use_dcl and Config.cls_2xmul:
                outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[
                    1][:, num_cls:2 * num_cls]
            else:
                outputs_pred = outputs[0]
            top3_val, top3_pos = torch.topk(outputs_pred, 3)

            #            print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True)

            batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item()
            val_corrects1 += batch_corrects1
            batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item()
            val_corrects2 += (batch_corrects2 + batch_corrects1)
            batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item()
            val_corrects3 += (batch_corrects3 + batch_corrects2 +
                              batch_corrects1)

        val_acc1 = val_corrects1 / item_count
        val_acc2 = val_corrects2 / item_count
        val_acc3 = val_corrects3 / item_count

        log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) +
                       '\t' + str(val_celoss_recorder.get_val()) + '\t' +
                       str(val_acc1) + '\t' + str(val_acc3) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        print(
            '% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f loss: %8.4f ||time: %d'
            % (epoch_num, val_version, dt(), val_version,
               val_loss_recorder.get_val(init=True), val_version, val_acc1,
               val_version, val_acc2, val_version, val_acc3, loss, since),
            flush=True)
        print('--' * 30, flush=True)

    return val_acc1, val_acc2, val_acc3
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)
    scores = []
    test_labels = []
    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):
            inputs = Variable(data_val[0].cuda())
            labels = Variable(
                torch.from_numpy(np.array(data_val[1])).long().cuda())
            # outputs = model(inputs)
            # import ipdb; ipdb.set_trace()
            preds = model(inputs)
            preds = F.softmax(preds[0], dim=1)
            # import ipdb; ipdb.set_trace()
            preds = preds.cpu().data.numpy()

            preds = preds[:, 1].squeeze()

            labels = list(labels.squeeze())
            preds = list(preds)

            scores = scores + preds
            test_labels = test_labels + labels

            # loss = 0

            # ce_loss = get_ce_loss(preds[0], labels).item()
            # loss += ce_loss

            # val_loss_recorder.update(loss)
            # val_celoss_recorder.update(ce_loss)

            # if Config.use_dcl and Config.cls_2xmul:
            #     outputs_pred = preds[0] + preds[1][:,0:num_cls] + preds[1][:,num_cls:2*num_cls]
            # else:
            #     outputs_pred = preds[0]
            # # top3_val, top3_pos = torch.topk(outputs_pred, 3)

            # print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True)

        #     batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item()
        #     val_corrects1 += batch_corrects1
        #     batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item()
        #     val_corrects2 += (batch_corrects2 + batch_corrects1)
        #     batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item()
        #     val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1)

        # val_acc1 = val_corrects1 / item_count
        # val_acc2 = val_corrects2 / item_count
        # val_acc3 = val_corrects3 / item_count

        # calculate tpr
        fpr_list = [0.01, 0.005, 0.001]
        threshold_list = get_thresholdtable_from_fpr(scores, test_labels,
                                                     fpr_list)
        tpr_list = get_tpr_from_threshold(scores, test_labels, threshold_list)

        # show results
        print(
            '========================================================================='
        )
        print('TPR@FPR=10E-3: {}\n'.format(tpr_list[0]))
        print('TPR@FPR=5E-3: {}\n'.format(tpr_list[1]))
        print('TPR@FPR=10E-4: {}\n'.format(tpr_list[2]))
        print(
            '========================================================================='
        )

        log_file.write(val_version + '\t' + str(tpr_list[0]) + '\t' +
                       str(tpr_list[1]) + '\t' + str(tpr_list[2]) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        # print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True)
        print(
            'TPR@FPR=10E-3: %.4f || TPR@FPR=5E-3: %.4f || TPR@FPR=10E-4: %.4f ||time: %d'
            % (tpr_list[0], tpr_list[1], tpr_list[2], since),
            flush=True)
        print('--' * 30, flush=True)

    return tpr_list[0], tpr_list[1], tpr_list[2]
예제 #5
0
def train(Config,
          model,
          epoch_num,
          start_epoch,
          optimizer,
          exp_lr_scheduler,
          data_loader,
          save_dir,
          sw,
          data_size=448,
          savepoint=500,
          checkpoint=1000):
    # savepoint: save without evalution
    # checkpoint: save with evaluation

    best_prec1 = 0.

    step = 0
    eval_train_flag = False
    rec_loss = []
    checkpoint_list = []

    train_batch_size = data_loader['train'].batch_size
    train_epoch_step = data_loader['train'].__len__()
    train_loss_recorder = LossRecord(train_batch_size)

    if savepoint > train_epoch_step:
        savepoint = 1 * train_epoch_step
        checkpoint = savepoint

    date_suffix = dt()
    # log_file = open(os.path.join(Config.log_folder, 'formal_log_r50_dcl_%s_%s.log'%(str(data_size), date_suffix)), 'a')

    add_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_loss1 = Loss_1()
    get_focal_loss = FocalLoss()
    get_angle_loss = AngleLoss()

    for epoch in range(start_epoch, epoch_num - 1):
        optimizer.step()
        exp_lr_scheduler.step(epoch)
        model.train(True)

        save_grad = []
        for batch_cnt, data in enumerate(data_loader['train']):
            step += 1
            loss = 0
            model.train(True)
            if Config.use_backbone:
                inputs, labels, img_names = data
                inputs = Variable(inputs.cuda())
                labels = Variable(torch.from_numpy(np.array(labels)).cuda())

            if Config.use_dcl:
                if Config.multi:
                    inputs, labels, labels_swap, swap_law, blabels, clabels, tlabels, img_names = data
                else:
                    inputs, labels, labels_swap, swap_law, img_names = data
                inputs = Variable(inputs.cuda())
                labels = Variable(torch.from_numpy(np.array(labels)).cuda())
                labels_swap = Variable(
                    torch.from_numpy(np.array(labels_swap)).cuda())
                swap_law = Variable(
                    torch.from_numpy(np.array(swap_law)).float().cuda())
                if Config.multi:
                    blabels = Variable(
                        torch.from_numpy(np.array(blabels)).cuda())
                    clabels = Variable(
                        torch.from_numpy(np.array(clabels)).cuda())
                    tlabels = Variable(
                        torch.from_numpy(np.array(tlabels)).cuda())

            optimizer.zero_grad()

            # 显示输入图片
            # sw.add_image('attention_image', inputs[0])

            if inputs.size(0) < 2 * train_batch_size:
                outputs = model(inputs, inputs[0:-1:2])
            else:
                outputs = model(inputs, None)
            if Config.multi:
                if Config.use_loss1:
                    b_loss, pro_b = get_loss1(outputs[2], blabels)
                    # 关联品牌标签和车型
                    t_loss, _ = get_loss1(outputs[4],
                                          tlabels,
                                          brand_prob=pro_b)
                    s_loss, pro_s = get_loss1(outputs[0],
                                              labels,
                                              brand_prob=pro_b)
                    c_loss, _ = get_loss1(outputs[3], clabels)
                    ce_loss = b_loss + t_loss + s_loss + c_loss * 0.2
                else:
                    ce_loss = get_ce_loss(outputs[0], labels) + get_ce_loss(
                        outputs[0], blabels) + get_ce_loss(
                            outputs[0], clabels) + get_ce_loss(
                                outputs[0], tlabels)
            else:
                if Config.use_focal_loss:
                    ce_loss = get_focal_loss(outputs[0], labels)
                else:
                    if Config.use_loss1:
                        # 直接内部组合两个loss
                        ce_loss_1, pro = get_loss1(outputs[0], labels)
                        ce_loss = 0
                    else:
                        ce_loss = get_ce_loss(outputs[0], labels)

            if Config.use_Asoftmax:
                fetch_batch = labels.size(0)
                if batch_cnt % (train_epoch_step // 5) == 0:
                    angle_loss = get_angle_loss(outputs[3],
                                                labels[0:fetch_batch:2],
                                                decay=0.9)
                else:
                    angle_loss = get_angle_loss(outputs[3],
                                                labels[0:fetch_batch:2])
                loss += angle_loss

            loss += ce_loss

            alpha_ = 1
            beta_ = 1
            # gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1
            gamma_ = 0.01
            if Config.use_dcl:
                if Config.use_focal_loss:
                    swap_loss = get_focal_loss(outputs[1], labels_swap) * beta_
                else:
                    if Config.use_loss1:
                        swap_loss, _ = get_loss1(outputs[1],
                                                 labels_swap,
                                                 brand_prob=pro_s)
                    else:
                        swap_loss = get_ce_loss(outputs[1],
                                                labels_swap) * beta_
                loss += swap_loss
                if not Config.no_loc:
                    law_loss = add_loss(outputs[2], swap_law) * gamma_
                    loss += law_loss

            loss.backward()
            torch.cuda.synchronize()

            torch.cuda.synchronize()

            if Config.use_dcl:
                if Config.multi:
                    print(
                        'step: {:-8d} / {:d}  loss: {:6.4f}  ce_loss: {:6.4f} swap_loss: {:6.4f} '
                        .format(step, train_epoch_step,
                                loss.detach().item(),
                                ce_loss.detach().item(),
                                swap_loss.detach().item()),
                        flush=True)
                # if Config.use_loss1:
                #     print(
                #         'step: {:-8d} / {:d}  loss: {:6.4f}  ce_loss: {:6.4f} swap_loss: {:6.4f} '.format(step,train_epoch_step,loss.detach().item(),ce_loss.detach().item(),swap_loss.detach().item()),
                #         flush=True)
                elif Config.no_loc:
                    print(
                        'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} '
                        .format(step, train_epoch_step,
                                loss.detach().item(),
                                ce_loss.detach().item(),
                                swap_loss.detach().item()),
                        flush=True)
                else:
                    print(
                        'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} '
                        .format(step, train_epoch_step,
                                loss.detach().item(),
                                ce_loss.detach().item(),
                                swap_loss.detach().item(),
                                law_loss.detach().item()),
                        flush=True)
            if Config.use_backbone:
                print(
                    'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} '
                    .format(step, train_epoch_step,
                            loss.detach().item(),
                            ce_loss.detach().item()),
                    flush=True)
            rec_loss.append(loss.detach().item())

            train_loss_recorder.update(loss.detach().item())

            # evaluation & save
            if step % checkpoint == 0:
                rec_loss = []
                print(32 * '-', flush=True)
                print(
                    'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}'
                    .format(step, train_epoch_step,
                            1.0 * step / train_epoch_step, epoch,
                            train_loss_recorder.get_val()),
                    flush=True)
                print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True)
                if Config.multi:
                    val_acc_s, val_acc_b, val_acc_c, val_acc_t = eval_turn(
                        Config, model, data_loader['val'], 'val', epoch)
                    is_best = val_acc_s > best_prec1
                    best_prec1 = max(val_acc_s, best_prec1)
                    filename = 'weights_%d_%d_%.4f_%.4f.pth' % (
                        epoch, batch_cnt, val_acc_s, val_acc_b)
                    save_checkpoint(model.state_dict(), is_best, save_dir,
                                    filename)
                    sw.add_scalar("Train_Loss/Total_loss",
                                  loss.detach().item(), epoch)
                    sw.add_scalar("Train_Loss/b_loss",
                                  b_loss.detach().item(), epoch)
                    sw.add_scalar("Train_Loss/t_loss",
                                  t_loss.detach().item(), epoch)
                    sw.add_scalar("Train_Loss/s_loss",
                                  s_loss.detach().item(), epoch)
                    sw.add_scalar("Train_Loss/c_loss",
                                  c_loss.detach().item(), epoch)
                    sw.add_scalar("Accurancy/val_acc_s", val_acc_s, epoch)
                    sw.add_scalar("Accurancy/val_acc_b", val_acc_b, epoch)
                    sw.add_scalar("Accurancy/val_acc_c", val_acc_c, epoch)
                    sw.add_scalar("Accurancy/val_acc_t", val_acc_t, epoch)
                    sw.add_scalar("learning_rate",
                                  exp_lr_scheduler.get_lr()[1], epoch)
                else:
                    val_acc1, val_acc2, val_acc3 = eval_turn(
                        Config, model, data_loader['val'], 'val', epoch)
                    is_best = val_acc1 > best_prec1
                    best_prec1 = max(val_acc1, best_prec1)
                    filename = 'weights_%d_%d_%.4f_%.4f.pth' % (
                        epoch, batch_cnt, val_acc1, val_acc3)
                    save_checkpoint(model.state_dict(), is_best, save_dir,
                                    filename)
                    sw.add_scalar("Train_Loss", loss.detach().item(), epoch)
                    sw.add_scalar("Val_Accurancy", val_acc1, epoch)
                    sw.add_scalar("learning_rate",
                                  exp_lr_scheduler.get_lr()[1], epoch)
                torch.cuda.empty_cache()

            # save only
            elif step % savepoint == 0:
                train_loss_recorder.update(rec_loss)
                rec_loss = []
                save_path = os.path.join(
                    save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt()))

                checkpoint_list.append(save_path)
                if len(checkpoint_list) == 6:
                    os.remove(checkpoint_list[0])
                    del checkpoint_list[0]
                torch.save(model.state_dict(), save_path)
                torch.cuda.empty_cache()
예제 #6
0
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_ce_sig_loss = nn.BCELoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)

    eval_t = locals()
    sum_fbeta = 0
    y_pred, Y_test = [], []
    sum_fbeta = 0
    best_fbeta = 0
    ave_test_accu_final = 0
    test_file = open("./result_log/val.log", "a+")

    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):

            # inputs = Variable(data_val[0].cuda())
            # labels = Variable(torch.LongTensor(np.array(data_val[1])).long().cuda())

            inputs, labels, labels_swap, swap_law, law_index, img_names = data_val

            # labels_npy = np.array(labels)
            # labels_tensor = Variable(torch.FloatTensor(np.array(labels)).cuda())

            labels_tensor_ = Variable(
                torch.FloatTensor(np.array(labels)).cuda())

            idx_unswap = torch.tensor([0, 2, 4, 6, 8], dtype=torch.long).cuda()
            labels_tensor = torch.index_select(labels_tensor_,
                                               dim=0,
                                               index=idx_unswap)

            labels_npy = np.array(labels_tensor.cpu())
            labels_ = labels_npy.astype(np.uint8)

            inputs = Variable(inputs.cuda())

            outputs = model(inputs, law_index)
            # print (outputs[0].shape)
            loss = 0

            # ce_loss = get_ce_loss(outputs[0], labels).item()

            ce_loss = get_ce_sig_loss(outputs[0], labels_tensor).item()

            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)

            if Config.use_dcl and Config.cls_2xmul:
                outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[
                    1][:, num_cls:2 * num_cls]
            else:
                outputs_pred = outputs[0]

            # cal_sigmoid = nn.Sigmoid()
            # outputs_pred_s = cal_sigmoid(outputs_pred)
            ########  MAP is label ranking, do not need normilization
            # predict_multensor = torch.ge(outputs_pred, 0.5)     ###   大于0.5的置为一,其他置为0,类似于阈值化操作
            predict_mul_ = outputs_pred.cpu().numpy()

            temp_fbeta = label_ranking_average_precision_score(
                labels_, predict_mul_)

            predict_multensor = torch.ge(outputs_pred,
                                         0.5)  ###   大于0.5的置为一,其他置为0,类似于阈值化操作
            predict_mul = predict_multensor.cpu().numpy()

            sum_fbeta = sum_fbeta + temp_fbeta
            ave_num = batch_cnt_val + 1

            y_pred.extend(predict_mul[:])
            Y_test.extend(labels_[:])

        ave_acc = sum_fbeta / ave_num

        y_pred_ = np.array(y_pred)
        Y_test_ = np.array(Y_test)

        log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) +
                       '\t' + str(val_celoss_recorder.get_val()) + '\t' +
                       str(ave_acc) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        print('% 3d %s %s %s-loss: %.4f ||%s-ave@acc: %.4f ||time: %d' %
              (epoch_num, val_version, dt(), val_version,
               val_loss_recorder.get_val(init=True), val_version, ave_acc,
               since),
              flush=True)
        print('--' * 30, flush=True)

        eval_t['metrics_' + str(0.5)] = evaluate_test(predictions=y_pred_,
                                                      labels=Y_test_)

        metrics = eval_t['metrics_' + str(0.5)]
        output = "=> Test : Coverage = {}".format(epoch_num)
        output += "=> Test : Coverage = {}\n Average Precision = {}\n Micro Precision = {}\n Micro Recall = {}\n Micro F Score = {}\n".format(
            metrics['coverage'], ave_acc, metrics['micro_precision'],
            metrics['micro_recall'], metrics['micro_f1'])
        output += "=> Test : Macro Precision = {}\n Macro Recall = {}\n Macro F Score = {}\n ranking_loss = {}\n hamming_loss = {}\n\n".format(
            metrics['macro_precision'], metrics['macro_recall'],
            metrics['macro_f1'], metrics['ranking_loss'],
            metrics['hamming_loss'])
        # output += "\n=> Test : ma-False_positive_rate(FPR) = {}, mi-False_positive_rate(FPR) = {}\n".format(metrics['ma-FPR'],metrics['mi-FPR'])
        print(output)
        test_file.write(output)
        test_file.close()

    return ave_acc
예제 #7
0
def train(Config,
          model,
          epoch_num,
          start_epoch,
          optimizer,
          exp_lr_scheduler,
          data_loader,
          save_dir,
          data_ver='all',
          data_size=448,
          savepoint=500,
          checkpoint=1000):

    step = 0
    eval_train_flag = False
    rec_loss = []
    checkpoint_list = []

    train_batch_size = data_loader['train'].batch_size
    train_epoch_step = data_loader['train'].__len__()
    train_loss_recorder = LossRecord(train_batch_size)
    bitempered_layer = BiTemperedLayer(t1=0.9, t2=1.05)
    bitempered_loss = BiTemperedLoss()

    add_loss = nn.L1Loss()
    get_focal_loss = FocalLoss()
    get_angle_loss = AngleLoss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_l2_loss = nn.MSELoss()

    for epoch in range(start_epoch, epoch_num - 1):
        exp_lr_scheduler.step(epoch)
        model.train(True)

        save_grad = []

        for batch_cnt, data in enumerate(data_loader['train']):
            step += 1
            loss = 0
            model.train(True)

            inputs, labels, img_names = data
            inputs = inputs.cuda()
            labels = torch.from_numpy(np.array(labels)).cuda()

            optimizer.zero_grad()

            outputs = model(inputs)
            # ce_loss = get_ce_loss(outputs, labels)
            labels_onehot = torch.zeros(outputs.shape[0],
                                        50030).cuda().scatter_(
                                            1, labels.unsqueeze_(1), 1)
            ce_loss = acc_loss(labels_onehot, F.softmax(outputs, 1))
            loss += ce_loss

            loss.backward()
            #torch.cuda.synchronize()

            optimizer.step()
            #torch.cuda.synchronize()

    log_file.close()
예제 #8
0
def eval_turn(Config, model, data_loader, val_version, epoch_num):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_corrects_s = 0
    val_corrects_b = 0
    val_corrects_c = 0
    val_corrects_t = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...'%val_version, flush=True)
    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):
            inputs = Variable(data_val[0].cuda())
            labels = Variable(torch.from_numpy(np.array(data_val[1])).long().cuda())
            outputs = model(inputs)
            loss = 0

            ce_loss = get_ce_loss(outputs[0], labels).item()
            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)
            if Config.multi:
                if Config.no_loc:
                    blabels = Variable(torch.from_numpy(np.array(data_val[2])).long().cuda())
                    clabels = Variable(torch.from_numpy(np.array(data_val[3])).long().cuda())
                    tlabels = Variable(torch.from_numpy(np.array(data_val[4])).long().cuda())
                    s_pred = outputs[0]
                    b_pred = outputs[2]
                    c_pred = outputs[3]
                    t_pred = outputs[4]
                    s_pred_confidence, s_pred_predicted = torch.max(s_pred, 1)
                    b_pred_confidence, b_pred_predicted = torch.max(b_pred, 1)
                    c_pred_confidence, c_pred_predicted = torch.max(c_pred, 1)
                    t_pred_confidence, t_pred_predicted = torch.max(t_pred, 1)

                    print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val,
                                                                                val_epoch_step, loss), flush=True)

                    batch_corrects_s = torch.sum((s_pred_predicted == labels)).data.item()
                    batch_corrects_b = torch.sum((b_pred_predicted == blabels)).data.item()
                    batch_corrects_c = torch.sum((c_pred_predicted == clabels)).data.item()
                    batch_corrects_t = torch.sum((t_pred_predicted == tlabels)).data.item()

                    val_corrects_s += batch_corrects_s
                    val_corrects_b += batch_corrects_b
                    val_corrects_c += batch_corrects_c
                    val_corrects_t += batch_corrects_t
            else:
                # outputs_pred = outputs[0] + outputs[1][:,0:num_cls] + outputs[1][:,num_cls:2*num_cls]
                outputs_pred = outputs[0]
                top3_val, top3_pos = torch.topk(outputs_pred, 3)

                print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True)

                batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item()
                val_corrects1 += batch_corrects1
                batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item()
                val_corrects2 += (batch_corrects2 + batch_corrects1)
                batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item()
                val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1)



        if Config.multi:
            if Config.no_loc:
                val_acc_s = val_corrects_s/item_count
                val_acc_b = val_corrects_b/item_count
                val_acc_c = val_corrects_c/item_count
                val_acc_t = val_corrects_t/item_count
                t1 = time.time()
                since = t1-t0
                print('--'*30, flush=True)
                print('% 3d %s %s %s-loss: %.4f ||%s-acc@S: %.4f %s-acc@C: %.4f %s-acc@B: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc_s,val_version, val_acc_c, val_version, val_acc_t, since), flush=True)
                print('--' * 30, flush=True)
                return val_acc_s, val_acc_b, val_acc_c,val_acc_t
        else:
            val_acc1 = val_corrects1 / item_count
            val_acc2 = val_corrects2 / item_count
            val_acc3 = val_corrects3 / item_count

        # log_file.write(val_version  + '\t' +str(val_loss_recorder.get_val())+'\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n')

            t1 = time.time()
            since = t1-t0
            print('--'*30, flush=True)
            print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True)
            print('--' * 30, flush=True)

            return val_acc1, val_acc2, val_acc3
예제 #9
0
def eval_turn(Config,
              model,
              data_loader,
              val_version,
              epoch_num,
              log_file,
              efd=None):
    model.train(False)
    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    bmy_correct = 0
    bm_correct = 0
    bb_correct = 0  # 通过bmy求出的品牌精度
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)
    #
    bmy_id_bm_vo_dict = WxsDsm.get_bmy_id_bm_vo_dict()
    #bmy_sim_org_dict = WxsDsm.get_bmy_sim_org_dict()
    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):
            inputs = Variable(data_val[0].cuda())
            print('eval_model.eval_turn inputs: {0};'.format(inputs.shape))
            brand_labels = Variable(
                torch.from_numpy(np.array(data_val[1])).long().cuda())
            bmy_labels = Variable(
                torch.from_numpy(np.array(data_val[-1])).long().cuda())
            img_files = data_val[-2]
            outputs = model(inputs)
            loss = 0

            ce_loss = get_ce_loss(outputs[0], brand_labels).item()
            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)

            if Config.use_dcl and Config.cls_2xmul:
                outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[
                    1][:, num_cls:2 * num_cls]
            else:
                outputs_pred = outputs[0]
            top3_val, top3_pos = torch.topk(outputs_pred, 3)

            print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(
                val_version, batch_cnt_val, val_epoch_step, loss),
                  flush=True)

            batch_corrects1 = torch.sum(
                (top3_pos[:, 0] == brand_labels)).data.item()
            val_corrects1 += batch_corrects1
            batch_corrects2 = torch.sum(
                (top3_pos[:, 1] == brand_labels)).data.item()
            val_corrects2 += (batch_corrects2 + batch_corrects1)
            batch_corrects3 = torch.sum(
                (top3_pos[:, 2] == brand_labels)).data.item()
            val_corrects3 += (batch_corrects3 + batch_corrects2 +
                              batch_corrects1)
            # 求出年款精度
            outputs_bmy = outputs[-1]
            bmy_top5_val, bmy_top5_pos = torch.topk(outputs_bmy, 5)
            batch_bmy_correct = torch.sum(
                (bmy_top5_pos[:, 0] == bmy_labels)).data.item()
            bmy_correct += batch_bmy_correct
            bb_correct = 0
            # 求出车型精度
            batch_bm_correct = 0
            for im in range(bmy_top5_pos.shape[0]):
                gt_bmy_id = bmy_top5_pos[im][0].item()
                net_bmy_id = bmy_labels[im].item()
                if gt_bmy_id in bmy_id_bm_vo_dict:
                    gt_bm_vo = bmy_id_bm_vo_dict[gt_bmy_id]
                    net_bm_vo = bmy_id_bm_vo_dict[net_bmy_id]
                    if gt_bm_vo['model_id'] == net_bm_vo['model_id']:
                        batch_bm_correct += 1
            bm_correct += batch_bm_correct
            # 找出品牌错误的样本,写入文件top1_error_samples
            if efd is not None:
                for idx in range(top3_pos.shape[0]):
                    if top3_pos[idx][0] != brand_labels[idx]:
                        efd.write('{0}*{1}*{2}\n'.format(
                            img_files[idx], brand_labels[idx],
                            top3_pos[idx][0]))
            '''
            # 
            pred_size = top3_pos[:, 0].shape[0]
            batch_bb_correct = 0
            for idx in range(pred_size):
                pred_bmy = fgvc_id_brand_dict[int(top3_pos[idx][0])]
                pred_brand = pred_bmy.split('_')[0]
                gt_bmy = fgvc_id_brand_dict[int(labels[idx])]
                gt_brand = gt_bmy.split('_')[0]
                if pred_brand == gt_brand:
                    batch_bb_correct += 1
            bb_correct += batch_bb_correct
            brand_correct = 0
            '''

        val_acc1 = val_corrects1 / item_count
        val_acc2 = val_corrects2 / item_count
        val_acc3 = val_corrects3 / item_count
        bmy_acc = bmy_correct / item_count
        bm_acc = bm_correct / item_count
        bb_acc = bb_correct / item_count

        log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) +
                       '\t' + str(val_celoss_recorder.get_val()) + '\t' +
                       str(val_acc1) + '\t' + str(val_acc3) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        print(
            '% 3d %s %s %s-loss: %.4f || 品牌:%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f; 车型:%.4f; 年款:%.4f; ||time: %d'
            % (epoch_num, val_version, dt(), val_version,
               val_loss_recorder.get_val(init=True), val_version, val_acc1,
               val_version, val_acc2, val_version, val_acc3, bm_acc, bmy_acc,
               since),
            flush=True)
        print('--' * 30, flush=True)

    return val_acc1, val_acc2, val_acc3
예제 #10
0
파일: train_model.py 프로젝트: yt7589/dcl
def train(Config,
          model,
          epoch_num,
          start_epoch,
          optimizer,
          exp_lr_scheduler,
          data_loader,
          save_dir,
          data_size=448,
          savepoint=500,
          checkpoint=1000):
    # savepoint: save without evalution
    # checkpoint: save with evaluation
    bmy_weight = 1.0  # 1.5 # 决定品牌分支在学习中权重
    step = 0
    eval_train_flag = False
    rec_loss = []
    checkpoint_list = []

    steps = np.array([], dtype=np.int)
    train_accs = np.array([], dtype=np.float32)
    test_accs = np.array([], dtype=np.float32)
    ce_losses = np.array([], dtype=np.float32)
    ce_loss_mu = -1
    ce_loss_std = 0.0

    train_batch_size = data_loader['train'].batch_size
    train_epoch_step = data_loader['train'].__len__()
    train_loss_recorder = LossRecord(train_batch_size)

    if savepoint > train_epoch_step:
        savepoint = 1 * train_epoch_step
        checkpoint = savepoint

    date_suffix = dt()
    log_file = open(
        os.path.join(
            Config.log_folder,
            'formal_log_r50_dcl_%s_%s.log' % (str(data_size), date_suffix)),
        'a')

    add_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()
    get_focal_loss = FocalLoss()
    get_angle_loss = AngleLoss()

    for epoch in range(start_epoch, epoch_num - 1):
        model.train(True)
        save_grad = []
        for batch_cnt, data in enumerate(data_loader['train']):
            step += 1
            loss = 0
            model.train(True)
            if Config.use_backbone:
                inputs, brand_labels, img_names, bmy_labels = data
                inputs = Variable(inputs.cuda())
                brand_labels = Variable(
                    torch.from_numpy(np.array(brand_labels)).cuda())
                bmy_labels = Variable(
                    torch.from_numpy(np.array(bmy_labels)).cuda())

            if Config.use_dcl:
                inputs, brand_labels, brand_labels_swap, swap_law, img_names, bmy_labels = data
                org_brand_labels = brand_labels
                inputs = Variable(inputs.cuda())
                brand_labels = Variable(
                    torch.from_numpy(np.array(brand_labels)).cuda())
                bmy_labels = Variable(
                    torch.from_numpy(np.array(bmy_labels)).cuda())
                brand_labels_swap = Variable(
                    torch.from_numpy(np.array(brand_labels_swap)).cuda())
                swap_law = Variable(
                    torch.from_numpy(np.array(swap_law)).float().cuda())

            optimizer.zero_grad()

            if inputs.size(0) < 2 * train_batch_size:
                outputs = model(inputs, inputs[0:-1:2])
            else:
                outputs = model(inputs, None)

            if Config.use_focal_loss:
                ce_loss_brand = get_focal_loss(outputs[0], brand_labels)
                ce_loss_bmy = get_focal_loss(outputs[-1], bmy_labels)
            else:
                ce_loss_brand = get_ce_loss(outputs[0], brand_labels)
                ce_loss_bmy = get_ce_loss(outputs[-1], bmy_labels)
            ce_loss = ce_loss_brand + bmy_weight * ce_loss_bmy

            if Config.use_Asoftmax:
                fetch_batch = brand_labels.size(0)
                if batch_cnt % (train_epoch_step // 5) == 0:
                    angle_loss = get_angle_loss(outputs[3],
                                                brand_labels[0:fetch_batch:2],
                                                decay=0.9)
                else:
                    angle_loss = get_angle_loss(outputs[3],
                                                brand_labels[0:fetch_batch:2])
                loss += angle_loss

            loss += ce_loss
            ce_loss_val = ce_loss.detach().item()
            ce_losses = np.append(ce_losses, ce_loss_val)

            alpha_ = 1
            beta_ = 1
            gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1
            if Config.use_dcl:
                swap_loss = get_ce_loss(outputs[1], brand_labels_swap) * beta_
                loss += swap_loss
                law_loss = add_loss(outputs[2], swap_law) * gamma_
                loss += law_loss

            loss.backward()
            torch.cuda.synchronize()
            optimizer.step()
            exp_lr_scheduler.step(epoch)
            torch.cuda.synchronize()

            if Config.use_dcl:
                if ce_loss_mu > 0 and ce_loss_val > ce_loss_mu + 3.0 * ce_loss_std:
                    # 记录下这个批次,可能是该批次有标注错误情况
                    print('记录可疑批次信息: loss={0}; threshold={1};'.format(
                        ce_loss_val, ce_loss_mu + 2.0 * ce_loss_std))
                    with open(
                            './logs/abnormal_samples_{0}_{1}_{2}.txt'.format(
                                epoch, step, ce_loss_val), 'a+') as fd:
                        error_batch_len = len(img_names)
                        for i in range(error_batch_len):
                            fd.write('{0} <=> {1};\r\n'.format(
                                org_brand_labels[i * 2], img_names[i]))
                print('epoch{}: step: {:-8d} / {:d} loss=ce_loss+'
                      'swap_loss+law_loss: {:6.4f} = {:6.4f} '
                      '+ {:6.4f} + {:6.4f} brand_loss: {:6.4f}'.format(
                          epoch, step % train_epoch_step, train_epoch_step,
                          loss.detach().item(), ce_loss_val,
                          swap_loss.detach().item(),
                          law_loss.detach().item(),
                          ce_loss_brand.detach().item()),
                      flush=True)

            if Config.use_backbone:
                print('epoch{}: step: {:-8d} / {:d} loss=ce_loss+'
                      'swap_loss+law_loss: {:6.4f} = {:6.4f} '.format(
                          epoch, step % train_epoch_step, train_epoch_step,
                          loss.detach().item(),
                          ce_loss.detach().item()),
                      flush=True)
            rec_loss.append(loss.detach().item())

            train_loss_recorder.update(loss.detach().item())

            # evaluation & save
            if step % checkpoint == 0:
                rec_loss = []
                print(32 * '-', flush=True)
                print(
                    'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}'
                    .format(step, train_epoch_step,
                            1.0 * step / train_epoch_step, epoch,
                            train_loss_recorder.get_val()),
                    flush=True)
                print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True)
                '''
                if eval_train_flag:
                    trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn(Config, model, data_loader['trainval'], 'trainval', epoch, log_file)
                    if abs(trainval_acc1 - trainval_acc3) < 0.01:
                        eval_train_flag = False
                '''
                print('##### validate dataset #####')
                trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn(
                    Config, model, data_loader['val'], 'val', epoch, log_file
                )  #eval_turn(Config, model, data_loader['trainval'], 'trainval', epoch, log_file)
                print('##### test dataset #####')
                val_acc1, val_acc2, val_acc3 = trainval_acc1, trainval_acc2, \
                            trainval_acc3 # eval_turn(Config, model, data_loader['val'], 'val', epoch, log_file)
                steps = np.append(steps, step)
                train_accs = np.append(train_accs, trainval_acc1)
                test_accs = np.append(test_accs, val_acc1)

                save_path = os.path.join(
                    save_dir, 'weights_%d_%d_%.4f_%.4f.pth' %
                    (epoch, batch_cnt, val_acc1, val_acc3))
                torch.cuda.synchronize()
                torch.save(model.state_dict(),
                           save_path,
                           _use_new_zipfile_serialization=False)
                print('saved model to %s' % (save_path), flush=True)
                torch.cuda.empty_cache()
                # 保存精度等信息并初始化
                ce_loss_mu = ce_losses.mean()
                ce_loss_std = ce_losses.std()
                print('Cross entropy loss: mu={0}; std={1}; range:{2}~{3};'.
                      format(ce_loss_mu, ce_loss_std,
                             ce_loss_mu - 3.0 * ce_loss_std,
                             ce_loss_mu + 3.0 * ce_loss_std))
                ce_losses = np.array([], dtype=np.float32)
                if train_accs.shape[0] > 30:
                    np.savetxt('./logs/steps1.txt', (steps, ))
                    np.savetxt('./logs/train_accs1.txt', (train_accs, ))
                    np.savetxt('./logs/test_accs1.txt', (test_accs, ))
                    steps = np.array([], dtype=np.int)
                    train_accs = np.array([], dtype=np.float32)
                    test_accs = np.array([], dtype=np.float32)

            # save only
            elif step % savepoint == 0:
                train_loss_recorder.update(rec_loss)
                rec_loss = []
                save_path = os.path.join(
                    save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt()))

                checkpoint_list.append(save_path)
                if len(checkpoint_list) == 6:
                    os.remove(checkpoint_list[0])
                    del checkpoint_list[0]
                torch.save(model.state_dict(),
                           save_path,
                           _use_new_zipfile_serialization=False)
                torch.cuda.empty_cache()

    log_file.close()
예제 #11
0
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file):

    model.train(False)

    val_corrects1 = 0
    val_corrects2 = 0
    val_corrects3 = 0
    val_size = data_loader.__len__()
    item_count = data_loader.total_item_len
    t0 = time.time()
    get_l1_loss = nn.L1Loss()
    get_ce_loss = nn.CrossEntropyLoss()

    val_batch_size = data_loader.batch_size
    val_epoch_step = data_loader.__len__()
    num_cls = data_loader.num_cls

    val_loss_recorder = LossRecord(val_batch_size)
    val_celoss_recorder = LossRecord(val_batch_size)
    print('evaluating %s ...' % val_version, flush=True)

    sum_fbeta = 0

    with torch.no_grad():
        for batch_cnt_val, data_val in enumerate(data_loader):

            # inputs = Variable(data_val[0].cuda())
            # labels = Variable(torch.LongTensor(np.array(data_val[1])).long().cuda())

            inputs, labels, labels_swap, swap_law, img_names = data_val
            labels_npy = np.array(labels)

            labels_tensor = Variable(torch.FloatTensor(labels_npy).cuda())

            labels_ = labels_npy.astype(np.uint8)

            inputs = Variable(inputs.cuda())

            outputs = model(inputs)
            loss = 0

            # ce_loss = get_ce_loss(outputs[0], labels).item()
            ce_loss = get_sigmoid_ce(outputs[0], labels_tensor).item()
            loss += ce_loss

            val_loss_recorder.update(loss)
            val_celoss_recorder.update(ce_loss)

            if Config.use_dcl and Config.cls_2xmul:
                outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[
                    1][:, num_cls:2 * num_cls]
            else:
                outputs_pred = outputs[0]
            ########  MAP is label ranking, do not need normilization
            # predict_multensor = torch.ge(outputs_pred, 0.5)     ###   大于0.5的置为一,其他置为0,类似于阈值化操作
            predict_mul = outputs_pred.cpu().numpy()

            temp_fbeta = label_ranking_average_precision_score(
                labels_, predict_mul)
            #################################################################  dy modify    Micro precision
            # cor_sum = 0
            # num_sum =0

            # for j in range(10):

            #     query_col = labels_[j,:]
            #     label_col = predict_mul[j,:]

            #     index = np.where(label_col > 0.5)
            #     index_ = index[0]
            #     number_=index_.size

            #     query_binary = query_col[index]
            #     query_label = label_col[index]

            #     batch_corrects1 = np.count_nonzero(query_binary == query_label)

            #     cor_sum = cor_sum + batch_corrects1
            #     num_sum = num_sum + number_

            # temp_fbeta = cor_sum/num_sum
            ##################################################################

            sum_fbeta = sum_fbeta + temp_fbeta
            ave_num = batch_cnt_val + 1

            # top3_val, top3_pos = torch.topk(outputs_pred, 3)

            # print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True)

        #     batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item()
        #     val_corrects1 += batch_corrects1

        #     batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item()
        #     val_corrects2 += (batch_corrects2 + batch_corrects1)
        #     batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item()
        #     val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1)

        # val_acc1 = val_corrects1 / item_count
        # val_acc2 = val_corrects2 / item_count
        # val_acc3 = val_corrects3 / item_count

        # log_file.write(val_version  + '\t' +str(val_loss_recorder.get_val())+'\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n')

        # t1 = time.time()
        # since = t1-t0
        # print('--'*30, flush=True)
        # print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True)
        # print('--' * 30, flush=True)

    # return val_acc1, val_acc2, val_acc3

        ave_acc = sum_fbeta / ave_num
        log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) +
                       '\t' + str(val_celoss_recorder.get_val()) + '\t' +
                       str(ave_acc) + '\n')

        t1 = time.time()
        since = t1 - t0
        print('--' * 30, flush=True)
        print('% 3d %s %s %s-loss: %.4f ||%s-ave@acc: %.4f ||time: %d' %
              (epoch_num, val_version, dt(), val_version,
               val_loss_recorder.get_val(init=True), val_version, ave_acc,
               since),
              flush=True)
        print('--' * 30, flush=True)

    return ave_acc