def train_model(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu , optimizer_center , criterion_center_loss, criterion_osm_caa, beta_ratio):
    model.train()
    losses = AverageMeter()
    cetner_loss_weight = 0.0005
    for batch_idx, (imgs, pids, _) in enumerate(trainloader):
        if use_gpu:
            imgs, pids   = imgs.cuda(), pids.cuda() 
        imgs, pids = Variable(imgs), Variable(pids)
        outputs, features  = model(imgs)
        ide_loss = criterion_xent(outputs , pids)
        triplet_loss = criterion_htri(features, features, features, pids, pids, pids)
        center_loss = criterion_center_loss(features, pids)
        # hosm_loss = criterion_osm_caa(features, pids , model.module.classifier.classifier.weight.t()) 
        hosm_loss = criterion_osm_caa(features, pids , criterion_center_loss.centers.t() ) 
        
        loss = ide_loss + (1-beta_ratio )* triplet_loss  + center_loss * cetner_loss_weight + beta_ratio * hosm_loss 
        optimizer.zero_grad()
        optimizer_center.zero_grad()
        loss.backward()
        optimizer.step()
        for param in criterion_center_loss.parameters():
            param.grad.data *= (1./cetner_loss_weight)
        optimizer_center.step()
        losses.update(loss.data.item(), pids.size(0))
    return (losses.avg , ide_loss.item() , triplet_loss.item() , hosm_loss.item())
Exemplo n.º 2
0
def predict_set(nets, dataloader, runtime_params):
    run_type = runtime_params['run_type']
    #net = net.eval()
    progbar = Progbar(len(dataloader.dataset), stateful_metrics=['run-type'])
    batch_time = AverageMeter()
    names = []
    pred_landmarks = np.array([])
    gt_landmarks = np.array([])
    with torch.no_grad():
        for i, (landmarks, imgs, img_paths) in enumerate(dataloader):
            s_time = time.time()
            imgs = imgs.cuda()

            names.extend(img_paths)

            net = nets[0]
            if 'half' in runtime_params.values():
                output = net(imgs.half())
            else:
                output = net(imgs)
            output = output.cpu().numpy()
            pred_landmarks = np.concatenate((pred_landmarks, output), axis=0)
            gt_landmarks = np.concatenate(
                (gt_landmarks, landmarks.data.numpy()), axis=0)
            progbar.add(imgs.size(0), values=[
                ('run-type', run_type)
            ])  # ,('batch_time', batch_time.val)])
            batch_time.update(time.time() - s_time)
            if runtime_params['debug'] and i:
                break
    pred_landmarks = pred_landmarks.reshape((-1, 28, 2))
    gt_landmarks = gt_landmarks.reshape((-1, 28, 2))
    assert gt_landmarks.shape == pred_landmarks.shape
    return gt_landmarks, gt_landmarks, names
def valid_trainer(model, valid_loader, criterion):
    model.eval()
    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []
    with torch.no_grad():
        for step, (imgs, gt_label, gt_depth,
                   imgname) in enumerate(tqdm(valid_loader)):
            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            gt_depth = gt_depth.cuda()
            gt_list.append(gt_label.cpu().numpy())
            gt_label[gt_label == -1] = 0
            valid_logits, _ = model(imgs)
            valid_loss = criterion(valid_logits, gt_label) + loss_autoencoder(
                _, gt_depth)
            valid_probs = torch.sigmoid(valid_logits)

            preds_probs.append(valid_probs.cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))

    valid_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)
    return valid_loss, gt_label, preds_probs
Exemplo n.º 4
0
def valid_trainer(model, valid_loader, criterion):
    model.eval()
    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []
    with torch.no_grad():
        for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):
            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            gt_list.append(gt_label.cpu().numpy())
            gt_label[gt_label == -1] = 0
            valid_logits = model(imgs)
            valid_loss = criterion(valid_logits, gt_label)
            #去除sigmoid for mcc
            valid_probs = torch.sigmoid(valid_logits)
            # valid_probs = valid_logits
            preds_probs.append(valid_probs.cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))

    valid_loss = loss_meter.avg
    print(f'valid losss: {valid_loss}')

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)
    return valid_loss, gt_label, preds_probs
def valid_trainer(model, valid_loader, criterion):
    model.eval()
    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []
    with torch.no_grad():
        for step, (imgs, depth, gt_label, imgname) in enumerate(tqdm(valid_loader)):
            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            gt_list.append(gt_label.cpu().numpy())
            gt_label[gt_label == -1] = 0
            valid_logits = model(imgs, depth)
            #valid_logits = model(imgs, gt_label)
            #valid_logits, output_depth_0,output_depth_1,output_depth_2 = model(imgs, depth)
            #valid_logits, depth_logits = model(imgs, depth)
            
            valid_loss = criterion(valid_logits, gt_label)
            valid_probs = torch.sigmoid(valid_logits)
            preds_probs.append(valid_probs.cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))
            #show_on_image(imgname, output_0)      
            #vif(imgname,output_depth_0,output_depth_1,output_depth_2,output_depth_3, output_depth_4, output_depth_5)
            #return 0
    valid_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)
    return valid_loss, gt_label, preds_probs
Exemplo n.º 6
0
def batch_trainer(epoch, model, train_loader, criterion, optimizer):
    model.train()
    epoch_time = time.time()
    loss_meter = AverageMeter()

    batch_num = len(train_loader)
    gt_list = []
    preds_probs = []

    lr = optimizer.param_groups[1]['lr']

    for step, (imgs, gt_label, imgname) in enumerate(train_loader):

        batch_time = time.time()
        imgs, gt_label = imgs.cuda(), gt_label.cuda()

        feat_map, output = model(imgs)
        loss_list = []
        for k in range(len(output)):
            out = output[k]
            loss_list.append(criterion(out, gt_label))
        loss = sum(loss_list)

        #maximum voting
        output = torch.max(
            torch.max(torch.max(torch.max(output[0], output[1]), output[2]),
                      output[3]), output[4])

        train_loss = loss

        optimizer.zero_grad()
        train_loss.backward()
        clip_grad_norm_(model.parameters(),
                        max_norm=10.0)  # make larger learning rate works
        optimizer.step()

        loss_meter.update(to_scalar(train_loss))

        gt_list.append(gt_label.cpu().numpy())
        train_probs = torch.sigmoid(output)
        preds_probs.append(train_probs.detach().cpu().numpy())

        log_interval = 20
        if (step + 1) % log_interval == 0 or (step +
                                              1) % len(train_loader) == 0:
            print(
                f'{time_str()}, Step {step}/{batch_num} in Ep {epoch}, {time.time() - batch_time:.2f}s ',
                f'train_loss:{loss_meter.val:.4f}')

    train_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)

    print(
        f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}'
    )

    return train_loss, gt_label, preds_probs
def batch_trainer(epoch, model, train_loader, criterion, optimizer, loss):
    model.train()
    epoch_time = time.time()
    loss_meter = AverageMeter()

    batch_num = len(train_loader)
    gt_list = []
    preds_probs = []

    lr = optimizer.param_groups[0]['lr']

    for step, (imgs, gt_label, imgname) in enumerate(train_loader):

        batch_time = time.time()
        imgs, gt_label = imgs.cuda(), gt_label.cuda()

        train_logit_1, train_logit_2, train_logit_3, train_logit_4 = model(
            imgs)

        if loss == 'Multi_Level_Loss':
            train_loss = 0.1 * criterion(
                train_logit_1, gt_label) + 0.3 * criterion(
                    train_logit_2, gt_label) + 0.7 * criterion(
                        train_logit_3, gt_label) + criterion(
                            train_logit_4, gt_label)

        train_loss.backward()
        clip_grad_norm_(model.parameters(),
                        max_norm=10.0)  # make larger learning rate works
        optimizer.step()
        optimizer.zero_grad()
        loss_meter.update(to_scalar(train_loss))

        gt_list.append(gt_label.cpu().numpy())
        train_probs = torch.sigmoid(train_logit_4)
        #train_probs_2 = torch.sigmoid(train_logit_2)
        #train_probs_3 = torch.sigmoid(train_logit_3)
        #train_probs_4 = torch.sigmoid(train_logit_4)
        #train_max = (train_probs + train_probs_2)/2
        #preds_probs.append(train_max.detach().cpu().numpy())
        preds_probs.append(train_probs.detach().cpu().numpy())
        log_interval = 20

        if (step + 1) % log_interval == 0 or (step +
                                              1) % len(train_loader) == 0:
            print(
                f'{time_str()}, Step {step}/{batch_num} in Ep {epoch}, {time.time() - batch_time:.2f}s ',
                f'train_loss:{loss_meter.val:.4f}')

    train_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)

    print(
        f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}'
    )

    return train_loss, gt_label, preds_probs
Exemplo n.º 8
0
def train_val(model,
              optimizer,
              train_loader,
              test_loader,
              epoch,
              margin=1.0,
              use_ohem=False,
              log_interval=100,
              test_interval=2000,
              is_cuda=True):
    loss = AverageMeter()
    batch_num = len(train_loader)
    for batch_idx, (data_a, data_p, data_n, target) in enumerate(train_loader):
        model.train()
        if is_cuda:
            data_a = data_a.cuda()
            data_p = data_p.cuda()
            data_n = data_n.cuda()
            #target = target.cuda()
        #print('data_size = ',data_a.size())
        #print(data_a)
        #print('-----------------------------------------')
        data_a = Variable(data_a)
        data_p = Variable(data_p)
        data_n = Variable(data_n)
        target = Variable(target)

        optimizer.zero_grad()
        out_a = model(data_a)
        out_p = model(data_p)
        out_n = model(data_n)

        triploss_layer = TripletMarginLoss(margin, use_ohem=use_ohem)
        trip_loss = triploss_layer(out_a, out_p, out_n)

        trip_loss.backward()
        optimizer.step()

        loss.update(trip_loss.data[0])
        if (batch_idx + 1) % log_interval == 0:
            logging('Train-Epoch:{:04d}\tbatch:{:06d}/{:06d}\tloss:{:.04f}'\
                    .format(epoch, batch_idx+1, batch_num, trip_loss.data[0]))
        if (batch_idx + 1) % test_interval == 0:
            threshlod, accuracy, mean_d_a_p, mean_d_a_n = best_test(
                model, test_loader)
            logging(
                'Test-T-A Epoch {:04d}-{:06d} accuracy: {:.04f} threshold: {:.05} ap_mean: {:.04f} an_mean: {:.04f}'
                .format(epoch, batch_idx + 1, accuracy, threshlod, mean_d_a_p,
                        mean_d_a_n))
            cutoff = len(model.module.feat_model._modules)
            model_name = 'models/epoch_{:04d}-{:06d}_feat.weights'.format(
                epoch, batch_idx + 1)
            save_weights(model.module.feat_model, model_name, cutoff)
            logging('save model: {:s}'.format(model_name))
def valid_trainer(model, valid_loader, criterion):
    model.eval()
    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []

    with torch.no_grad():
        for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):

            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            valid_logit_1, valid_logit_2, valid_logit_3, valid_logit_4 = model(
                imgs)

            #pdb.set_trace()
            gt_list.append(gt_label.cpu().numpy())

            gt_label[gt_label == -1] = 0
            #valid_logits, cha_att, spa_att = model(imgs)

            #pdb.set_trace()

            #valid_loss = 0
            valid_loss = criterion(valid_logit_4, gt_label)
            valid_probs = torch.sigmoid(valid_logit_4)
            #valid_probs_2 = torch.sigmoid(valid_logit_2)
            #valid_probs_3 = torch.sigmoid(valid_logit_3)
            #valid_probs_4 = torch.sigmoid(valid_logit_4)
            #pdb.set_trace()

            #pred_max = (valid_probs + valid_probs_2)/2
            #preds_probs.append(pred_max.cpu().numpy())
            preds_probs.append(valid_probs.detach().cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))

            #show_filter(imgname, gt_label, valid_logit_2)
            #pdb.set_trace()
            #nmf_show(imgname, feature_map)
            #show_att(imgname, mask,mask )
            #affine(imgname, theta)
            #vif(imgname, valid_logit_4, valid_logit_4)
            #return 0
            #get_mask_block(imgname, gt_label, valid_logit_l, valid_logit_3, valid_logit_2)
            #get_att(imgname, gt_label, valid_logit_2, valid_logit_l)
            #pdb.set_trace()
            #get_detector(gt_label, valid_logit_4,valid_probs_2, valid_logit_3)
    #np.save('part_detector.py', part_detector)
    valid_loss = loss_meter.avg
    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)
    #save_part()
    return valid_loss, gt_label, preds_probs
Exemplo n.º 10
0
def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler,
                train_data_loader, summary_writer, conf, local_rank):
    losses = AverageMeter()
    mious = AverageMeter()

    iterator = tqdm(train_data_loader)
    model.train()
    if conf["optimizer"]["schedule"]["mode"] == "epoch":
        scheduler.step(current_epoch)
    for i, sample in enumerate(iterator):
        imgs = sample["image"].cuda()
        masks = sample["mask"].cuda().float()
        masks_orig = sample["mask_orig"].cuda().float()
        out_mask = model(imgs)
        with torch.no_grad():
            pred = torch.softmax(out_mask, dim=1)
            argmax = torch.argmax(pred, dim=1)
            ious = miou_round(argmax, masks_orig).item()

        mious.update(ious, imgs.size(0))

        mask_loss = loss_functions["mask_loss"](out_mask, masks.contiguous())
        loss = mask_loss
        losses.update(loss.item(), imgs.size(0))
        iterator.set_description(
            "epoch: {}; lr {:.7f}; Loss ({loss.avg:.4f}); miou ({miou.avg:.4f}); "
            .format(current_epoch,
                    scheduler.get_lr()[-1],
                    loss=losses,
                    miou=mious))
        optimizer.zero_grad()
        if conf['fp16']:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1)
        optimizer.step()
        torch.cuda.synchronize()

        if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"):
            scheduler.step(i + current_epoch * len(train_data_loader))

    if local_rank == 0:
        for idx, param_group in enumerate(optimizer.param_groups):
            lr = param_group['lr']
            summary_writer.add_scalar('group{}/lr'.format(idx),
                                      float(lr),
                                      global_step=current_epoch)
        summary_writer.add_scalar('train/loss',
                                  float(losses.avg),
                                  global_step=current_epoch)
Exemplo n.º 11
0
def main():
    args.n_resgroups = 5
    args.n_resblocks = 3
    args.n_feats = 64
    args.n_reduction = 16

    data_path = './data/valid/lr3'
    gt_path = './data/valid/hr'
    result_path = './track1_valid_data/'
    var_name = 'data'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
        
    model_path = './model/track1_model.pkl'
    save_point = torch.load(model_path)
    model_param = save_point['state_dict']
    model = make_model(args)
    model.load_state_dict(model_param)

    model = model.cuda()
    model.eval()

    mrae = AverageMeter()

    for mat_name in sorted(os.listdir(data_path)):
        mat_path_name = os.path.join(data_path, mat_name)
        f = h5py.File(mat_path_name,'r')
        input_data = f.get(var_name)
        input_data = np.array(input_data) 
        
        mat_name = mat_name[:-8] + '_tr1.mat'
        mat_path_name = os.path.join(gt_path, mat_name)
        f = h5py.File(mat_path_name,'r')
        target = f.get(var_name)
        target = np.array(target) 
        target = np.transpose(target,[2,1,0])
            
        input_data = input_data/65535

        img_res = self_ensemble(model,input_data,target)
        MRAEs = cal_mrae(target,img_res)

        mat_name = mat_name[:-8] + '_tr1.mat'
        mat_dir= os.path.join(result_path, mat_name)
        save_matv73(mat_dir, var_name, img_res)

        mrae.update(MRAEs)
        print(mat_name)
        print(img_res.shape)
        print(MRAEs)
    print(mrae.avg)
def batch_trainer(epoch, model, train_loader, criterion, optimizer, loss):
    model.train()
    epoch_time = time.time()
    loss_meter = AverageMeter()

    batch_num = len(train_loader)
    gt_list = []
    preds_probs = []

    lr = optimizer.param_groups[0]['lr']
    for step, (imgs, gt_label, gt_depth, imgname) in enumerate(train_loader):
        #print(step)
        batch_time = time.time()
        imgs, gt_label, gt_depth = imgs.cuda(), gt_label.cuda(), gt_depth.cuda(
        )
        train_logits, _ = model(imgs)
        #print("sssssssssssssssssssssssssssss")
        #pdb.set_trace()
        if loss == 'BCE_LOSS':
            train_loss = criterion(train_logits, gt_label) + loss_autoencoder(
                _, gt_depth)
        train_loss.backward()
        clip_grad_norm_(model.parameters(),
                        max_norm=10.0)  # make larger learning rate works
        optimizer.step()
        optimizer.zero_grad()
        loss_meter.update(to_scalar(train_loss))

        gt_list.append(gt_label.cpu().numpy())
        train_probs = torch.sigmoid(train_logits)
        preds_probs.append(train_probs.detach().cpu().numpy())

        log_interval = 20
        if (step + 1) % log_interval == 0 or (step +
                                              1) % len(train_loader) == 0:
            print(
                f'{time_str()}, Step {step}/{batch_num} in Ep {epoch}, {time.time() - batch_time:.2f}s ',
                f'train_loss:{loss_meter.val:.4f}')

    train_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)

    print(
        f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}'
    )

    return train_loss, gt_label, preds_probs
Exemplo n.º 13
0
def train_epoch(current_epoch, loss_function, model, optimizer, scheduler,
                train_data_loader, summary_writer, conf, local_rank, debug):
    #存储平均值
    progbar = Progbar(len(train_data_loader.dataset),
                      stateful_metrics=['epoch', 'config', 'lr'])
    batch_time = AverageMeter()
    end = time.time()
    losses = AverageMeter()
    max_iters = conf['optimizer']['schedule']['params']['max_iter']
    print("training epoch {}".format(current_epoch))
    model.train()

    for i, (landmarks, imgs, img_path) in enumerate(train_data_loader):
        numm = imgs.shape[0]
        optimizer.zero_grad()
        imgs = imgs.reshape((-1, imgs.size(-3), imgs.size(-2), imgs.size(-1)))
        imgs = Variable(imgs, requires_grad=True).cuda()

        landmarks = landmarks.cuda().float()
        output = model(imgs)

        loss = loss_function(output, landmarks)

        losses.update(loss.item(), imgs.size(0))
        summary_writer.add_scalar('train/loss',
                                  loss.item(),
                                  global_step=i + current_epoch * max_iters)
        summary_writer.add_scalar('train/lr',
                                  float(scheduler.get_lr()[-1]),
                                  global_step=i + current_epoch * max_iters)

        if conf['fp16']:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1)
        optimizer.step()
        torch.cuda.synchronize()

        batch_time.update(time.time() - end)
        end = time.time()

        if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"):
            scheduler.step(i + current_epoch * max_iters)
            if (i == max_iters - 1) or debug:
                break
        progbar.add(numm,
                    values=[('epoch', current_epoch), ('loss', losses.avg),
                            ("lr", float(scheduler.get_lr()[-1]))])

    if conf["optimizer"]["schedule"]["mode"] == "epoch":
        scheduler.step(current_epoch)
    if local_rank == 0:
        for idx, param_group in enumerate(optimizer.param_groups):
            lr = param_group['lr']
            summary_writer.add_scalar('group{}/lr'.format(idx),
                                      float(lr),
                                      global_step=current_epoch)
def extract(test_loader, model):
    batch_time = AverageMeter(10)
    model.eval()
    features = []
    with torch.no_grad():
        end = time.time()
        for i, input in enumerate(test_loader):
            # compute output
            output = model(input)
            features.append(output.data.cpu().numpy())
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

    return np.vstack(features)
Exemplo n.º 15
0
def run_epoch(phase, epoch, data_loader, model_loss, opt, optimizer,
              losses_stat):
    avg_loss_stats = {k: AverageMeter() for k in losses_stat}
    if phase == 'train':
        model_loss.train()
    else:
        if len(opt.gups) > 1:
            model_loss = model_loss.module
        model_loss.eval()
        torch.cuda.empty_cache()
    for iter_id, batch in enumerate(data_loader):

        for k in batch:
            if k != 'meta':
                batch[k] = batch[k].to(device=opt.device, non_blocking=True)
        output, loss, loss_stats = model_loss(batch)
        loss = loss.mean()
        if phase == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        message = phase + ' | ' + ' epoch : ' + str(
            epoch) + ' | iter : ' + iter_id + " | "
        for kw in loss_stats.items():
            message += kw[0] + ' : ' + str(kw[1]) + ' | '
        print(message)

        for k in avg_loss_stats:
            avg_loss_stats[k].update(loss_stats[k].mean().item(),
                                     batch['iinput'].size(0))
        ret = {k: v.avg for k, v in avg_loss_stats.items()}
        return ret
def valid_trainer(model, valid_loader, criterion):
    model.eval()
    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []
    with torch.no_grad():
        for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):
            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            gt_list.append(gt_label.cpu().numpy())
            gt_label[gt_label == -1] = 0
            #valid_logits = model(imgs)
            valid_logits, valid_logits_2 = model(imgs)
            #pdb.set_trace()
            #valid_logits = model(imgs)
            #valid_loss = criterion(valid_logits, gt_label) #+ criterion(valid_logits_2, gt_label)
            #valid_loss = criterion(valid_logits, gt_label) #+ F.kl_div(torch.mean(valid_logits_2.squeeze(), 0)[0], torch.from_numpy(label_att).float().cuda(), reduction='sum')
            valid_loss = criterion(
                valid_logits_2, gt_label
            )  #+criterion(valid_logits_2, gt_label) #+ 0.3*mse_loss_fn(torch.mean(valid_logits_2.squeeze()[0], 0), torch.from_numpy(label_att).float().cuda())
            valid_probs = torch.sigmoid(valid_logits_2)
            '''
            valid_probs_2 = torch.sigmoid(valid_logits_2)
            #pdb.set_trace()
            # accessory
            
            valid_prob = valid_probs > 0.5
            for i in range(valid_logits.size()[0]):
                if  (valid_prob[i,15] and valid_prob[i,16]):
                    print(str(valid_probs[i,15]) + ' '+str(valid_probs[i,16]))              
            '''
            preds_probs.append(valid_probs.cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))
            #show_on_image(imgname, output)
            #vif(imgname,output_depth_0,output_depth_1,output_depth_2,output_depth_3, output_depth_4, output_depth_5)
            #return 0

    valid_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)
    return valid_loss, gt_label, preds_probs
Exemplo n.º 17
0
def valid_trainer(model, valid_loader, criterion):
    model.eval()
    grad_cam = GradCam(model=model,
                       target_layer_names=["layer4"],
                       use_cuda=True)

    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []
    if True:
        #with torch.no_grad():
        for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):
            #pdb.set_trace()
            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            gt_list.append(gt_label.cpu().numpy())
            #gt_list.append(gt_label[:, 6:].cpu().numpy())
            gt_label[gt_label == -1] = 0
            valid_logits = model(imgs)

            #mask_cam = grad_cam(imgs, 22)#mask: bs, 256, 192
            valid_loss = criterion(
                valid_logits, gt_label
            )  #+ 0.2*(torch.sum(torch.abs(cha_att))+torch.sum(torch.abs(spa_att)))
            #valid_loss = criterion(valid_logits * (mask.expand_as(valid_logits)), gt_label * (mask.expand_as(gt_label)))#+ 0.2*(torch.sum(torch.abs(cha_att))+torch.sum(torch.abs(spa_att)))
            valid_probs = torch.sigmoid(valid_logits)
            #valid_probs = torch.sigmoid(valid_logits[:, 6:])
            preds_probs.append(valid_probs.detach().cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))
            #show_att(imgname, spa_att,spa_att )
            #affine(imgname, theta)
            #vif(imgname, spa_att, spa_att)
            #show_on_image(imgname, mask_cam, 22, gt_label)
            #return 0
    valid_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)
    return valid_loss, gt_label, preds_probs
Exemplo n.º 18
0
    def train(self, epoch):
        self.scheduler.step()
        self.model.train()
        landmark_loss_ = AverageMeter()

        for batch_idx, sample in enumerate(self.train_loader):
            image = sample['image']
            gt_landmarks = sample['landmarks']
            image, gt_landmarks = image.to(self.device), gt_landmarks.to(
                self.device)

            pred_landmarks = self.model(image)
            loss = self.lossfn(pred_landmarks, gt_landmarks)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            landmark_loss_.update(loss, image.size(0))
            if batch_idx % 20 == 0:
                print(
                    "Train Epoch: {:03} [{:05}/{:05} ({:03.0f}%)]\tLoss:{:.6f} LR: {:.7f}"
                    .format(epoch, batch_idx * len(sample['image']),
                            len(self.train_loader.dataset),
                            100. * batch_idx / len(self.train_loader),
                            loss.item(), self.optimizer.param_groups[0]['lr']))

        self.scalar_info['loss'] = landmark_loss_.avg
        self.scalar_info['lr'] = self.scheduler.get_lr()[0]

        if self.logger is not None:
            for tag, value in list(self.scalar_info.items()):
                self.logger.scalar_summary(tag, value, self.run_count)
            self.scalar_info = {}

        self.run_count += 1

        print("|===>Loss: {:.4f}".format(landmark_loss_.avg))

        self.evaluate(epoch, image, gt_landmarks, pred_landmarks)
Exemplo n.º 19
0
def valid_trainer(epoch, model, valid_loader, criterion):
    model.eval()
    loss_meter = AverageMeter()

    preds_probs = []
    gt_list = []

    with torch.no_grad():
        for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):
            imgs = imgs.cuda()
            gt_label = gt_label.cuda()
            gt_list.append(gt_label.cpu().numpy())
            gt_label[gt_label == -1] = 0

            output = model(imgs)

            loss_list = []
            for k in range(len(output)):
                out = output[k]
                loss_list.append(criterion(out, gt_label))
            loss = sum(loss_list)
            # maximum voting
            output = torch.max(
                torch.max(
                    torch.max(torch.max(output[0], output[1]), output[2]),
                    output[3]), output[4])

            valid_loss = loss

            valid_probs = torch.sigmoid(output)
            preds_probs.append(valid_probs.detach().cpu().numpy())
            loss_meter.update(to_scalar(valid_loss))

    valid_loss = loss_meter.avg

    gt_label = np.concatenate(gt_list, axis=0)
    preds_probs = np.concatenate(preds_probs, axis=0)

    return valid_loss, gt_label, preds_probs
Exemplo n.º 20
0
def validate(val_queue, model):
    top1 = AverageMeter()
    top5 = AverageMeter()
    model.eval()

    for data in tqdm.tqdm(val_queue):
        x = data[0].cuda(non_blocking=True)
        target = data[1].cuda(non_blocking=True)

        with torch.no_grad():
            logits = model(x)

        prec1, prec5 = accuracy(logits, target, topk=(1, 5))
        n = x.size(0)
        top1.update(prec1.data.item(), n)
        top5.update(prec5.data.item(), n)

    return top1.avg, top5.avg
Exemplo n.º 21
0
    def train(self, epoch):
        cls_loss_ = AverageMeter()
        accuracy_ = AverageMeter()

        self.model.train()

        for batch_idx, (data, gt_label) in enumerate(self.train_loader):
            data, gt_label = data.to(self.device), gt_label.to(self.device)

            cls_pred = self.model(data)
            # compute the loss
            cls_loss = self.lossfn.cls_loss(gt_label, cls_pred)
            accuracy = self.compute_accuracy(cls_pred, gt_label)

            self.optimizer.zero_grad()
            cls_loss.backward()
            self.optimizer.step()

            cls_loss_.update(cls_loss, data.size(0))
            accuracy_.update(accuracy, data.size(0))

            if batch_idx % 50 == 0:

                print(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}\tTrain Accuracy: {:.6f}'
                    .format(epoch, batch_idx * len(data),
                            len(self.train_loader.dataset),
                            100. * batch_idx / len(self.train_loader),
                            cls_loss.item(), accuracy))

        self.scalar_info['cls_loss'] = cls_loss_.avg
        self.scalar_info['accuracy'] = accuracy_.avg
        self.scalar_info['lr'] = self.lr

        # if self.logger is not None:
        #     for tag, value in list(self.scalar_info.items()):
        #         self.logger.scalar_summary(tag, value, self.run_count)
        #     self.scalar_info = {}
        # self.run_count += 1

        print("|===>Loss: {:.4f}   Train Accuracy: {:.6f} ".format(
            cls_loss_.avg, accuracy_.avg))

        return cls_loss_.avg, accuracy_.avg
Exemplo n.º 22
0
def main():
    args.n_resgroups = 5
    args.n_resblocks = 3
    args.n_feats = 64
    args.n_reduction = 16

    data_path = './data/final_test/lr3'
    #    data_path = './data/valid/lr3'
    result_path = './track1_test_data/'
    var_name = 'data'
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    model_path = './model/track1_model.pkl'
    save_point = torch.load(model_path)
    model_param = save_point['state_dict']
    model = make_model(args)
    model.load_state_dict(model_param)

    model = model.cuda()
    model.eval()

    mrae = AverageMeter()

    for mat_name in sorted(os.listdir(data_path)):
        mat_path_name = os.path.join(data_path, mat_name)
        f = h5py.File(mat_path_name, 'r')
        input_data = f.get(var_name)
        input_data = np.array(input_data)

        input_data = input_data / 65535

        s_time = time.time()
        img_res = self_ensemble(model, input_data)
        e_time = time.time()
        print(s_time - e_time)
        mat_name = mat_name[:-8] + '_tr1.mat'
        mat_dir = os.path.join(result_path, mat_name)
        save_matv73(mat_dir, var_name, img_res)

        print(mat_name)
        print(img_res.shape)
Exemplo n.º 23
0
def validate(val_loader, model, criterion):
    objs = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    model.eval()

    for step, data in enumerate(val_loader):
        x = data[0].cuda(non_blocking=True)
        target = data[1].cuda(non_blocking=True)

        with torch.no_grad():
            logits = model(x)
            loss = criterion(logits, target)

        prec1, prec5 = accuracy(logits, target, topk=(1, 5))
        if args.distributed:
            reduced_loss = reduce_tensor(loss.data)
            prec1 = reduce_tensor(prec1)
            prec5 = reduce_tensor(prec5)
        else:
            reduced_loss = loss.data
        objs.update(reduced_loss.item(), x.size(0))
        top1.update(prec1.item(), x.size(0))
        top5.update(prec5.item(), x.size(0))

        if args.local_rank == 0 and step % args.print_freq == 0:
            duration = 0 if step == 0 else time.time() - duration_start
            duration_start = time.time()
            logging.info(
                'VALIDATE Step: %03d Objs: %e R1: %f R5: %f Duration: %ds',
                step, objs.avg, top1.avg, top5.avg, duration)

    return top1.avg, top5.avg, objs.avg
Exemplo n.º 24
0
def train(train_loader, model, criterion, optimizer):
    objs = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    model.train()

    end = time.time()
    for step, data in enumerate(train_loader):
        data_time.update(time.time() - end)
        x = data[0].cuda(non_blocking=True)
        target = data[1].cuda(non_blocking=True)

        # forward
        batch_start = time.time()
        logits = model(x)
        loss = criterion(logits, target)

        # backward
        optimizer.zero_grad()
        if args.opt_level is not None:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        if args.grad_clip > 0:
            torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                           args.grad_clip)
        optimizer.step()
        batch_time.update(time.time() - batch_start)

        if step % args.print_freq == 0:
            # For better performance, don't accumulate these metrics every iteration,
            # since they may incur an allreduce and some host<->device syncs.
            prec1, prec5 = accuracy(logits, target, topk=(1, 5))
            if args.distributed:
                reduced_loss = reduce_tensor(loss.data)
                prec1 = reduce_tensor(prec1)
                prec5 = reduce_tensor(prec5)
            else:
                reduced_loss = loss.data
            objs.update(reduced_loss.item(), x.size(0))
            top1.update(prec1.item(), x.size(0))
            top5.update(prec5.item(), x.size(0))
            torch.cuda.synchronize()

            duration = 0 if step == 0 else time.time() - duration_start
            duration_start = time.time()
            if args.local_rank == 0:
                logging.info(
                    'TRAIN Step: %03d Objs: %e R1: %f R5: %f Duration: %ds BTime: %.3fs DTime: %.4fs',
                    step, objs.avg, top1.avg, top5.avg, duration,
                    batch_time.avg, data_time.avg)
        end = time.time()

    return top1.avg, objs.avg
Exemplo n.º 25
0
    def train(self, epoch):
        cls_loss_ = AverageMeter()
        box_offset_loss_ = AverageMeter()
        total_loss_ = AverageMeter()
        accuracy_ = AverageMeter()

        self.scheduler.step()
        self.model.train()

        for batch_idx, (data, target) in enumerate(self.train_loader):
            gt_label = target['label']
            gt_bbox = target['bbox_target']
            data, gt_label, gt_bbox = data.to(self.device), gt_label.to(
                self.device), gt_bbox.to(self.device).float()

            cls_pred, box_offset_pred = self.model(data)
            # compute the loss
            cls_loss = self.lossfn.cls_loss(gt_label, cls_pred)
            box_offset_loss = self.lossfn.box_loss(gt_label, gt_bbox,
                                                   box_offset_pred)

            total_loss = cls_loss + box_offset_loss
            accuracy = self.compute_accuracy(cls_pred, gt_label)

            self.optimizer.zero_grad()
            total_loss.backward()
            self.optimizer.step()

            cls_loss_.update(cls_loss, data.size(0))
            box_offset_loss_.update(box_offset_loss, data.size(0))
            total_loss_.update(total_loss, data.size(0))
            accuracy_.update(accuracy, data.size(0))

            print(
                'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'
                .format(epoch, batch_idx * len(data),
                        len(self.train_loader.dataset),
                        100. * batch_idx / len(self.train_loader),
                        total_loss.item(), accuracy.item()))

        self.scalar_info['cls_loss'] = cls_loss_.avg
        self.scalar_info['box_offset_loss'] = box_offset_loss_.avg
        self.scalar_info['total_loss'] = total_loss_.avg
        self.scalar_info['accuracy'] = accuracy_.avg
        self.scalar_info['lr'] = self.scheduler.get_lr()[0]

        if self.logger is not None:
            for tag, value in list(self.scalar_info.items()):
                self.logger.scalar_summary(tag, value, self.run_count)
            self.scalar_info = {}
        self.run_count += 1

        print("|===>Loss: {:.4f}".format(total_loss_.avg))
        return cls_loss_.avg, box_offset_loss_.avg, total_loss_.avg, accuracy_.avg
Exemplo n.º 26
0
    def train(self, epoch):
        cls_loss_ = AverageMeter()
        accuracy_ = AverageMeter()
        accuracy_valid_ = AverageMeter()

        # 训练集作为模型输入
        self.scheduler_1.step()
        self.scheduler_2.step()
        self.model_1.train()
        self.model_2.train()

        for batch_idx, (data, gt_label) in enumerate(self.train_loader):

            data, gt_label = data.to(self.device), gt_label.to(self.device)
            x, mask = self.model_1(data)

            # test
            # print(self.model_1.alexnet_1.conv1[0].weight.data)
            # print(self.model_2.channelgroup_2.group[0].weight.data[5][5:10])
            # print(self.model_3.Classify_1.conv1[0].weight.data)

            # test

            with torch.no_grad():
                parts = part_box(mask)
                img_parts, parts = get_part(data.cpu(),
                                            parts)  # (1, 64, 48, 48)
                img_parts = torch.from_numpy(img_parts).view(
                    img_parts.shape[0], 1, 48,
                    48).to(self.device)  # view(64, 1, 48, 48)

                if (epoch == 1 or epoch == 5 or epoch == 10
                        or epoch == 15) and batch_idx == 1:
                    self.show_image_grid(data, img_parts, parts, epoch)
                    self.show_mask(mask, epoch)
                    print('save image and parts in result: ' +
                          self.config.save_path)
                    print('epoch: ' + str(epoch))
                    print('batch_idx: ' + str(batch_idx))

            cls_pred = self.model_2(img_parts, x)

            # compute the loss
            cls_loss = self.lossfn.cls_loss(gt_label, cls_pred)
            accuracy = self.compute_accuracy(cls_pred, gt_label)

            if epoch >= 0:
                self.optimizer_1.zero_grad()
                self.optimizer_2.zero_grad()
                cls_loss.backward()
                self.optimizer_1.step()
                self.optimizer_2.step()

            cls_loss_.update(cls_loss.item(), data.size(0))
            accuracy_.update(accuracy, data.size(0))

            if batch_idx % 2000 == 1:
                print('batch_idx: ', batch_idx)
                print('Cls loss: ', cls_loss.item())

        # 验证集作为模型输入
        with torch.no_grad():
            self.model_1.eval()
            self.model_2.eval()

            for batch_idx, (data, gt_label) in enumerate(self.valid_loader):
                data, gt_label = data.to(self.device), gt_label.to(self.device)

                x, mask = self.model_1(data)

                parts = part_box(mask)
                img_parts, parts = get_part(data.cpu(),
                                            parts)  # (4, 64, 48, 48)

                img_parts = torch.from_numpy(img_parts).view(
                    img_parts.shape[0], 1, 48, 48).to(self.device)

                cls_pred = self.model_2(img_parts, x)

                accuracy_valid = self.compute_accuracy(cls_pred, gt_label)
                accuracy_valid_.update(accuracy_valid, data.size(0))

            # 记录数据
            self.scalar_info['cls_loss'] = cls_loss_.avg
            self.scalar_info['accuracy'] = accuracy_.avg
            self.scalar_info['lr'] = self.scheduler_1.get_lr()[0]

            # if self.logger is not None:
            #     for tag, value in list(self.scalar_info.items()):
            #         self.logger.scalar_summary(tag, value, self.run_count)
            #     self.scalar_info = {}
            # self.run_count += 1

        print(
            "\r\nEpoch: {}|===>Train Loss: {:.8f}   Train Accuracy: {:.6f}   valid Accuracy: {:.6f}\r\n"
            .format(epoch, cls_loss_.avg, accuracy_.avg, accuracy_valid_.avg))

        return cls_loss_.avg, accuracy_.avg, accuracy_valid_.avg
Exemplo n.º 27
0
def train(epoch):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()
    end = time.time()

    for batch_idx, (data, img_data, labels) in enumerate(train_loader):

        data_time.update(time.time() - end)  # measure data loading time
        B = data.shape[0]  # Batch size
        N = data.shape[1]  # Num of points in PointCloud

        data, labels, img_data = data.float(), labels.float(), img_data.float()

        if use_cuda:
            labels, data, img_data = labels.cuda(), data.cuda(), img_data.cuda(
            )

        img_data = img_data.unsqueeze(1)

        hidden = torch.zeros(
            1, B, 512).cuda()  # initialising the hidden variable for GRU

        optimizer.zero_grad()

        output = model(data, img_data, hidden, seq_len)  # (B,4)
        loss = criterion(output, labels, seq_len)

        loss.backward()
        optimizer.step()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        hidden = hidden.detach()
        losses.update(loss.item(), B)

        pred = output[0]
        prec1 = binary_accuracy(pred[0], labels[:, 0, 8])
        top1.update(prec1, B)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch,
                      batch_idx,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1))

    return losses.avg
Exemplo n.º 28
0
def validate():

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    TP = torch.zeros(0)  # True Positives
    CS = torch.zeros(0)  # Cosine Similarity

    # switch to evaluate mode
    model.eval()
    # if args.evaluate:
    #     model.train()
    with torch.no_grad():
        end = time.time()

        for batch_idx, (data, img_data, labels) in enumerate(valid_loader):

            B = data.shape[0]  # Batch size
            N = data.shape[1]  # Num of points in PointCloud

            data, labels, img_data = data.float(), labels.float(
            ), img_data.float()
            # labels = labels.permute(1,0,2) #(seq,B,5)

            if use_cuda:
                labels, data, img_data = labels.cuda(), data.cuda(
                ), img_data.cuda()

            img_data = img_data.unsqueeze(1)
            hidden = torch.zeros(
                1, B, 512).cuda()  # initialising the hidden variable for GRU

            optimizer.zero_grad()

            output = model(data, img_data, hidden, seq_len)  # (B,4)
            loss = criterion(output, labels, seq_len)

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            hidden = hidden.detach()
            losses.update(loss.item(), B)

            pred = output[0]
            prec1 = binary_accuracy(pred[1], labels[:, 1, 8])
            top1.update(prec1, B)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            ###################################### Final Evaluation ####################################################

            score_seq, loc_seq, box_seq = output

            trans_mat_1 = torch.eye(3).view(1, -1).repeat(seq_len, B, 1).cuda()
            trans_mat_1[:, :, 0] = loc_seq[:, :, 0]  # c
            trans_mat_1[:, :, 1] = -loc_seq[:, :, 1]  # -s
            trans_mat_1[:, :, 3] = loc_seq[:, :, 1]  # s
            trans_mat_1[:, :, 4] = loc_seq[:, :, 0]  # c
            trans_mat_1[:, :, 2] = loc_seq[:, :, 2]  #tx
            trans_mat_1[:, :, 5] = loc_seq[:, :, 3]  #ty
            trans_mat_1 = trans_mat_1.view(seq_len * B, 3, 3)

            trans_mat_2 = torch.eye(3).view(1, -1).repeat(seq_len, B, 1).cuda()
            trans_mat_2[:, :, 0] = box_seq[:, :, 0]  # c
            trans_mat_2[:, :, 1] = -box_seq[:, :, 1]  # -s
            trans_mat_2[:, :, 3] = box_seq[:, :, 1]  # s
            trans_mat_2[:, :, 4] = box_seq[:, :, 0]  # c
            trans_mat_2[:, :, 2] = box_seq[:, :, 2]  #tx
            trans_mat_2[:, :, 5] = box_seq[:, :, 3]  #ty
            trans_mat_2 = trans_mat_2.view(seq_len * B, 3, 3)

            resultant_trans = torch.bmm(trans_mat_1, trans_mat_2)
            resultant_trans = resultant_trans.view(seq_len, B, 9)

            final_trans_params = resultant_trans[:, :, [0, 3, 2, 5]]
            z = (loc_seq[:, :, 4] + box_seq[:, :, 4]).view(seq_len, B, -1)
            final_trans_params = torch.cat((final_trans_params, z), 2)

            loc = final_trans_params[:, :, 2:5]
            theta = torch.atan2(final_trans_params[:, :, 1],
                                final_trans_params[:, :, 0])
            size = box_seq[:, :, 5:]

            for a in range(B):
                car_list = check_for_car(labels[a])

                detections = []

                for i in range(seq_len):

                    trans_params = torch.cat(
                        (loc[i, a], theta[i, a].view(1), size[i, a]), 0)

                    if ((score_seq[i, a] > 0.7)):
                        detections.append(trans_params.cpu().numpy())

                TP_region, CS_region = eval_detect_in_region(
                    car_list, detections)
                TP = torch.cat((TP, TP_region), 0)
                CS = torch.cat((CS, CS_region), 0)

            if batch_idx % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format(
                          batch_idx,
                          len(valid_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1))

        print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

        if (TP.nelement() == 0):
            recall = 0
        else:
            recall = TP.mean()

        if (CS.nelement() == 0):
            AOS = 0
        else:
            AOS = CS.mean()
        print("recall: ", recall)
        print("AOS: ", AOS)

    return losses.avg, recall, AOS
Exemplo n.º 29
0
def train(model, params):
    
    # helper function to print and save logs
    def print_log(string, print_time = True):
        if print_time:
            curr_time = time.asctime(time.localtime(time.time()))
            string = "[ " + curr_time + " ] " + string
        print(string)
        log_file = os.path.join(params.work_dir, "train_log.txt")
        with open(log_file, "a+") as log:
            log.write(string + "\n")
    
    # helper function to save checkpoints
    def save_checkpoint(best = False):
        if isinstance(model, nn.DataParallel):
            model_state_dict = model.module.model.state_dict()
        else:
            model_state_dict = model.model.state_dict()
        ckpt_dict = {
            "epoch": e,
            "step": step,
            "model_state_dict": model_state_dict,
            "optimizer_state_dict": optimizer.state_dict(),
            "scheduler_state_dict": scheduler.state_dict(),
            "best_accuracy": best_accuracy
        }
        ckpt_dir = os.path.join(params.work_dir, "checkpoints")
        os.makedirs(ckpt_dir, exist_ok = True)
        if best:
            torch.save(ckpt_dict, os.path.join(ckpt_dir, "best.pth"))
        else:
            torch.save(ckpt_dict, os.path.join(ckpt_dir, f"epoch_{e}_step_{step}.pth"))

    # set up our project working directory
    os.makedirs(params.work_dir, exist_ok = True)
    # and save our training configuration
    with open(os.path.join(params.work_dir, "train_args.yaml"), "w+") as f:
        yaml.dump(params.params, f)

    # print out the settings for training
    print_log("Below are the training settings", print_time = False)
    for k, v in params.params.items():
        print_log(f"{k} : {v}", print_time = False)
    

    # tensorboard summary writer
    writer = SummaryWriter(os.path.join(params.work_dir, "events"))


    # initiating dataset and loader
    train_dir = os.path.join(params.data_root, "train")
    train_set = TrainDataset(
        imdir = train_dir,
        input_size = params.input_size,
        color_jitter = params.color_jitter,
        resize_scale = params.resize_scale,
        ratio = params.ratio,
        interpolation = params.interpolation,
        horizontal_flip = params.horizontal_flip,
        mean = params.mean,
        std = params.std,
        fname = True
    )

    train_loader = DataLoader(
        train_set, 
        batch_size = params.train_bs, 
        num_workers = params.num_workers,
        shuffle = True
    )


    # we will use center crop to evaluate the model's accuracy every epoch
    val_dir = os.path.join(params.data_root, "val")
    val_set = EvalDataset(
        imdir = val_dir,
        input_size = params.input_size,
        mean = params.mean,
        std = params.std,
        rescale_sizes = params.test_rescales,
        center_square = False,
        crop = "center",
        horizontal_flip = False
    )

    val_loader = DataLoader(
        val_set,
        batch_size = params.test_bs,
        shuffle = False,
        num_workers = params.num_workers
    )
    
    # GPU(s) or CPU usage
    if params.gpus:
        assert len(params.gpus) >= 1, "Please provide at least one gpu id for gpu training"
        if len(params.gpus) == 1:
            device = torch.device(f"cuda:{params.gpus[0]}")
            model = model.to(device)
            print_log(f"Training model on cuda: {params.gpus[0]}")
        else:
            device = torch.device(f"cuda:{params.gpus[0]}")

            # for parallelism, the model on the default gpu is still the one being updated
            # however, we replicate it to the other gpu every forward and backward pass
            # for gradient computation on the data that we allocated to those gpus
            model = model.to(torch.device(f"cuda:{params.gpus[0]}")) # it seems params.gpus must be like [0, 1] instead of [1, 0]
            model = nn.DataParallel(model, params.gpus)
            print_log(f"Data Parallelism is used across cuda: {params.gpus}")

    else:
        # the model stays on cpu
        print_log("Using cpu for training")
    

    # define optimizer
    # add in separate bn parameters
    if params.weight_decay is not None:
        # add_weight_decay separate bias and weight and bias in batchnorm from other parameters
        # because bias terms and and weight and bias in bn should not be decayed towards zero-norm
        # check here https://discuss.pytorch.org/t/weight-decay-in-the-optimizers-is-a-bad-idea-especially-with-batchnorm/16994/2
        param_groups = add_weight_decay(model, params.weight_decay)
    else:
        param_groups = model.parameters()

    # it is recommended to construct an optimizer after you have done the model.cuda(),
    # as some optimizer might create buffers of the type same as the model parameters.
    # since we will put our model to gpu, it is better that the model parameters have the type cuda 
    # instead of cpu before optimizer construction.
    optimizer = torch.optim.SGD(param_groups, lr = params.lr, weight_decay = params.weight_decay, momentum = params.momentum, nesterov=params.nesterov)

    # Let's define a learning rate scheduler that helps us reduce the learning rate by 10 times if our model's performance
    # on the validation set ceases to increase for 6 epochs
    # The mode should be min, so that it stores the min previous validation loss and compares that to the new validation loss
    # that we will provide when calling scheduler.step(<new_value>).
    # You may use "max" and validation accuracy too.
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = "min", factor = 0.1, patience = 6)

    # resume from previous training
    if params.resume_path:
        ckpt_dict = torch.load(params.resume_path)
        model_state_dict = ckpt_dict["model_state_dict"]
        if isinstance(model, nn.DataParallel):
            model.module.model.load_state_dict(model_state_dict)
        else:
            model.model.load_state_dict(model_state_dict)
        optimizer.load_state_dict(ckpt_dict["optimizer_state_dict"])
        scheduler.load_state_dict(ckpt_dict["scheduler_state_dict"])
        start_epoch = ckpt_dict["epoch"]
        step = ckpt_dict["step"]
        if step != 0 and step % len(train_loader) == 0:
            # if we have finished the whole epoch last time before we saved the checkpoint
            # we move on to the next epoch
            start_epoch += 1
        best_accuracy = ckpt_dict["best_accuracy"]
        print_log(f"Loaded checkpoint {params.resume_path}")
        print_log(f"Resuming from epoch {start_epoch} step {step}")
    else:
        start_epoch = 0
        step = 0
        best_accuracy = 0


    batch_loss = AverageMeter()
    batch_accu = AverageMeter()
    try:
        # the try clause is for the except below where if we use ctrl-c/cmd-c to stop the program
        # it will save a checkpoint before exiting
        for e in range(start_epoch, params.num_epochs):
            model.train()
            progress_bar = tqdm(range(len(train_loader)))
            step_in_last_epoch = step % len(train_loader)
            
            loader = iter(train_loader)
            for i in progress_bar:
                # If we saved weights and stopped training halfway in an epoch, let's finish the remaining data in 
                # that epoch before moving on.
                # As the train_loader will be shuffled, we cannot really train the model on the data 
                # we left behind last time. However, it is easier for us to track training, as with these few lines
                # of code, we can align the stored epoch number correctly with the number of images trained (suppose the batch size is
                # the same, so the number of images trained per step is the same)
                if i < step_in_last_epoch:
                    progress_bar.update()
                    continue

                if i == len(train_loader) - step_in_last_epoch:
                    # if we have finished the equivalent amount of what we left behind last time
                    # stop this epoch and move on to the next
                    break

                
                data = next(loader)
                images = data['image']
                labels = data['label']
                
                # sending images and labels to gpus
                if params.gpus and len(params.gpus) == 1:
                    # if we are using one gpu
                    images = images.to(device)
                    labels = labels.to(device)
                # else:
                    # if the model is on cpu, then nothing needs to be done
                    # if multiple gpus are used, the data will be scattered to the corresponding gpus
                    # inside the nn.DataParallel class directly from CPU. Nothing needs to be done here.


                # forward pass the images to get prediction and loss
                # remember now our model is an instance of the wrapper ModelWithLoss.
                # It computes the loss inside its own forward() method.
                # Do not write as model(images = images, labels = labels) with DataParallel, as
                # they will then be counted as kwargs instead of tensor inputs
                preds, loss = model(images, labels)

                # compute accuracy for the training batch
                # Note: for the last batch or batch of odd number, the dataparallel may skip the remainder
                # when dividing the batch evenly among the gpus, resulting in different dimension between
                # preds and labels. Therefore, we need to take labels[:len(preds)]
                batch_accu.update(compute_accuracy(preds, labels[:len(preds)].view((-1, 1)))[0])
                
                # if we use data parallelism, the loss will be a vector with elements corresponding
                # to the loss on each gpu
                # it does not hurt if we are not using data parallelism
                loss = loss.mean()
                # compute dloss/dx for every parameter x that has requires_grad = True
                # and add this dloss/dx to the parameter's gradient
                # Initially, the parameters' gradients are all zero, loss.backward() adds the newly computed gradient
                # to the existing gradient. It will accumulate unless we call optimizer.zero_grad() to clear them.
                loss /= params.grad_accu_steps # see comments right below
                loss.backward()

                batch_loss.update(loss.item())
                
                step += 1
                # params.grad_accu_steps specifies for how many mini-batches we wish to accumulate gradients.
                # It is a work-around if we cannot fit a desirable size of mini-batch in GPU, we can simply accumulate
                # 2 or 3 batches' gradient before we call optimizer.step() (backpropagation).
                # However, this work-around has a difference when you have batch normalization layers,
                # as the running averages/variances of these are computed as exponential moving average. So
                # the running averages/variances statistics may deviate from using a larger batch.
                if step % params.grad_accu_steps == 0:
                    # Backpropagation to update parameters
                    optimizer.step()
                    # Set the gradients to zero, so that we can accumulate gradients from fresh
                    optimizer.zero_grad()
                
                if step % params.logging_interval == 0:
                    print_log(f"Epoch {e} Step {step}: Average loss is {batch_loss.avg:.4f} Training accuracy is {batch_accu.avg:.4f}")
                    for j, param_group in enumerate(optimizer.param_groups):
                        print_log(f"lr_{j} is {param_group['lr']}")
                    batch_loss.reset()
                    batch_accu.reset()
                
                writer.add_scalars("accuracy", {"train": batch_accu.val}, step)
                writer.add_scalars("loss", {"train": batch_loss.val}, step)
                for j, param_group in enumerate(optimizer.param_groups):
                    writer.add_scalar(f"lr/lr_{j}", param_group["lr"], step)
                # update the information of the 
                progress_bar.set_description(f"Epoch {e}/{params.num_epochs} Step {step} Loss: {batch_loss.avg:.4f} Accuracy: {batch_accu.avg:.4f}")

            if (e + 1) % params.saving_interval == 0:
                save_checkpoint()

            # evaluate our model on the validation set
            # remember, our evaluate function can take care of ModelWithLoss wrapper
            if params.gpus and len(params.gpus) == 1:
                accu_meters, loss_meter = evaluate(model, val_loader, topk = (1, ), device = device)
            else:
                # dataparallel or cpu, let the data stay on cpu, see relevants comments above during training
                accu_meters, loss_meter = evaluate(model, val_loader, topk = (1, ))

            accuracy = accu_meters[0].avg
            print_log(f"Accuracy is {accuracy:.4f}, loss is {loss_meter.avg:.4f} for Epoch {e} Step {step} ")
            writer.add_scalars("accuracy", {"val": accuracy}, step)
            writer.add_scalars("loss", {"val": loss_meter.avg}, step)

            # update learning rate scheduler
            scheduler.step(loss_meter.avg)
            # scheduler.step(accuracy)
            
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                save_checkpoint(best = True)

            

    except KeyboardInterrupt:
        print_log("KeyboardInterrupt: Saving a checkpoint")
        save_checkpoint()
Exemplo n.º 30
0
def train_one_epoch(model, data_queue, opt, gm, epoch, args):
    def train_func(image, im_info, gt_boxes):
        with gm:
            loss_dict = model(image=image, im_info=im_info, gt_boxes=gt_boxes)
            gm.backward(loss_dict["total_loss"])
            loss_list = list(loss_dict.values())
        opt.step().clear_grad()
        return loss_list

    meter = AverageMeter(record_len=model.cfg.num_losses)
    time_meter = AverageMeter(record_len=2)
    log_interval = model.cfg.log_interval
    tot_step = model.cfg.nr_images_epoch // (args.batch_size *
                                             dist.get_world_size())
    for step in range(tot_step):
        adjust_learning_rate(opt, epoch, step, model.cfg, args)

        data_tik = time.time()
        mini_batch = next(data_queue)
        data_tok = time.time()

        tik = time.time()
        loss_list = train_func(image=mge.tensor(mini_batch["data"]),
                               im_info=mge.tensor(mini_batch["im_info"]),
                               gt_boxes=mge.tensor(mini_batch["gt_boxes"]))
        tok = time.time()

        time_meter.update([tok - tik, data_tok - data_tik])

        if dist.get_rank() == 0:
            info_str = "e%d, %d/%d, lr:%f, "
            loss_str = ", ".join(
                ["{}:%f".format(loss) for loss in model.cfg.losses_keys])
            time_str = ", train_time:%.3fs, data_time:%.3fs"
            log_info_str = info_str + loss_str + time_str
            meter.update([loss.numpy() for loss in loss_list])
            if step % log_interval == 0:
                logger.info(log_info_str, epoch, step, tot_step,
                            opt.param_groups[0]["lr"], *meter.average(),
                            *time_meter.average())
                meter.reset()
                time_meter.reset()
Exemplo n.º 31
0
    def train(self, epoch):
        cls_loss_ = AverageMeter()
        box_offset_loss_ = AverageMeter()
        landmark_loss_ = AverageMeter()
        total_loss_ = AverageMeter()
        accuracy_ = AverageMeter()

        self.scheduler.step()
        self.model.train()

        for batch_idx, (data, target) in enumerate(self.train_loader):
            gt_label = target['label']
            gt_bbox = target['bbox_target']
            gt_landmark = target['landmark_target']
            data, gt_label, gt_bbox, gt_landmark = data.to(self.device), gt_label.to(
                self.device), gt_bbox.to(self.device).float(), gt_landmark.to(self.device).float()

            cls_pred, box_offset_pred, landmark_offset_pred = self.model(data)
            # compute the loss
            cls_loss = self.lossfn.cls_loss(gt_label, cls_pred)
            box_offset_loss = self.lossfn.box_loss(
                gt_label, gt_bbox, box_offset_pred)
            landmark_loss = self.lossfn.landmark_loss(gt_label, gt_landmark, landmark_offset_pred)

            total_loss = cls_loss + box_offset_loss * 0.5 + landmark_loss
            accuracy = self.compute_accuracy(cls_pred, gt_label)

            self.optimizer.zero_grad()
            total_loss.backward()
            self.optimizer.step()

            cls_loss_.update(cls_loss, data.size(0))
            box_offset_loss_.update(box_offset_loss, data.size(0))
            landmark_loss_.update(landmark_loss, data.size(0))
            total_loss_.update(total_loss, data.size(0))
            accuracy_.update(accuracy, data.size(0))

            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(
                epoch, batch_idx * len(data), len(self.train_loader.dataset),
                100. * batch_idx / len(self.train_loader), total_loss.item(), accuracy.item()))

        self.scalar_info['cls_loss'] = cls_loss_.avg
        self.scalar_info['box_offset_loss'] = box_offset_loss_.avg
        self.scalar_info['landmark_loss'] = landmark_loss_.avg
        self.scalar_info['total_loss'] = total_loss_.avg
        self.scalar_info['accuracy'] = accuracy_.avg
        self.scalar_info['lr'] = self.scheduler.get_lr()[0]

        if self.logger is not None:
            for tag, value in list(self.scalar_info.items()):
                self.logger.scalar_summary(tag, value, self.run_count)
            self.scalar_info = {}
        self.run_count += 1

        print("|===>Loss: {:.4f}".format(total_loss_.avg))
        return cls_loss_.avg, box_offset_loss_.avg, landmark_loss_.avg, total_loss_.avg, accuracy_.avg