Пример #1
0
def build_reid_train_loader(cfg):
    train_transforms = build_transforms(cfg, is_train=True)

    logger = logging.getLogger(__name__)
    train_items = list()
    for d in cfg.DATASETS.NAMES:
        logger.info('prepare training set {}'.format(d))
        print('preparing training set...')
        dataset = DATASET_REGISTRY.get(d)(cfg)
        print(dataset)
        train_items.extend(dataset.train)
       

    train_set = BlackreidDataset(train_items, train_transforms, mode='train', relabel=True)

    num_workers = cfg.DATALOADER.NUM_WORKERS
    batch_size = cfg.SOLVER.IMS_PER_BATCH
    num_instance = cfg.DATALOADER.NUM_INSTANCE

    if cfg.DATALOADER.PK_SAMPLER:
        data_sampler = samplers.RandomIdentitySampler(train_set.img_items, batch_size, num_instance)
    else:
        data_sampler = samplers.TrainingSampler(len(train_set))
    batch_sampler = torch.utils.data.sampler.BatchSampler(data_sampler, batch_size, True)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        num_workers=num_workers,
        batch_sampler=batch_sampler,
        collate_fn=fast_batch_collator,
    )
    return data_prefetcher(cfg, train_loader)
Пример #2
0
    def get_train_loader(self):
        opt = self.opt
        transforms = torchvision.transforms.Compose([
            torchvision.transforms.Resize(opt.pretrained_image_size),
            torchvision.transforms.ToTensor(),
        ])

        train_dataset = PFADataset(age_group=opt.age_group,
                                   max_iter=opt.max_iter,
                                   batch_size=opt.batch_size *
                                   len(opt.device_ids),
                                   dataset_name=opt.dataset_name,
                                   source=opt.source,
                                   transforms=transforms)
        train_sampler = tordata.distributed.DistributedSampler(train_dataset,
                                                               shuffle=False)

        train_loader = tordata.DataLoader(dataset=train_dataset,
                                          batch_size=opt.batch_size,
                                          drop_last=True,
                                          num_workers=opt.num_workers,
                                          pin_memory=True,
                                          sampler=train_sampler)
        # source_img, true_img, source_label, target_label, true_label, true_age, mean_age
        return data_prefetcher(train_loader, [0, 1])
Пример #3
0
def build_reid_test_loader(cfg, dataset_name):
    test_transforms = build_transforms(cfg, is_train=False)

    logger = logging.getLogger(__name__)
    logger.info('prepare test set {}'.format(dataset_name))
    print('preparing test set...')
    dataset = DATASET_REGISTRY.get(dataset_name)(cfg)
    print(dataset)
    test_items = dataset.query + dataset.gallery

    test_set = BlackreidDataset(test_items, test_transforms, mode='test', relabel=False)

    num_workers = cfg.DATALOADER.NUM_WORKERS
    batch_size = cfg.TEST.IMS_PER_BATCH
    data_sampler = samplers.InferenceSampler(len(test_set))
    batch_sampler = torch.utils.data.BatchSampler(data_sampler, batch_size, False)
    test_loader = DataLoader(
        test_set,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        collate_fn=fast_batch_collator)
    return data_prefetcher(cfg, test_loader), len(dataset.query)
Пример #4
0
def train(train_loader, val_loader, model, optimizer, criterion, lr_scheduler,
          device, opt):
    '''
    model training
    :param: train_loader: dataloader, val_loader: dataloader, model: cpkt,
    optimizer: optimizer, criterion: weighted_binary_crossentropy, lr_scheduler: LRScheduler,
    device: device, opt: dict
    :return
    '''
    total_step = len(train_loader)
    best_acc = -1
    losses = AverageMeter()
    batch_time = AverageMeter()
    end = time.time()
    print_freq = 20
    iter_per_epoch = len(train_loader)
    iter_sum = iter_per_epoch * opt.epochs
    fast_train = hasattr(opt, 'fast_train')
    writer = SummaryWriter(opt.model_save_path)

    for epoch in range(opt.epochs):
        model.train()

        prefetcher = data_prefetcher(train_loader)
        datas, ages, sexs, labels = prefetcher.next()
        i = 0
        while datas is not None:
            i += 1
            lr_scheduler.update(i, epoch)
            # for i, (datas, ages, sexs, labels) in enumerate(train_loader):
            datas = datas.to(device)
            ages = ages.to(device)
            sexs = sexs.to(device)
            labels = labels.to(device)
            # Forward pass
            outputs = model(datas, ages, sexs)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # Update tensorboard
            batch_time.update(time.time() - end)
            losses.update(loss.item(), datas.size(0))
            # Print
            if (i + 1) % print_freq == 0:
                iter_used = epoch * iter_per_epoch + i
                used_time = batch_time.sum
                total_time = used_time / iter_used * iter_sum
                used_time = str(datetime.timedelta(seconds=used_time))
                total_time = str(datetime.timedelta(seconds=total_time))
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, LR:{:.5f}, Time[{:.7s}/{:.7s}]'
                    .format(epoch + 1, opt.epochs, i + 1, total_step,
                            loss.item(), optimizer.param_groups[0]['lr'],
                            used_time, total_time),
                    flush=True)
                writer.add_scalar('Learning_rate',
                                  optimizer.param_groups[0]['lr'], iter_used)
                writer.add_scalar('Train/Avg_Loss', losses.avg, iter_used)
            end = time.time()
            datas, ages, sexs, labels = prefetcher.next()

        if not fast_train:
            # acc in train set
            acc_train = val(train_loader, model, device)
            print('Train Accuracy: {} %'.format(acc_train), flush=True)
            writer.add_scalar('Train/F1_Score', acc_train, iter_used)
            # acc in validation set
            acc_val = val(val_loader, model, device)
            if acc_val > best_acc:
                # Save the model checkpoint
                best_acc = acc_val
                if epoch > int(opt.epochs * 0.8):
                    save_name = args.model + '_e{}.ckpt'.format(epoch)
                    save_path = opt.model_save_path + save_name
                    torch.save(model.state_dict(), save_path)
            print('Validation Accuracy: {} %'.format(acc_val), flush=True)
            writer.add_scalar('Validation/F1_Score', acc_val, iter_used)
        else:
            if epoch > int(opt.epochs * 0.8):
                acc_val = val(val_loader, model, device)
                if acc_val > best_acc:
                    best_acc = acc_val
                    save_name = args.model + '_e{}.ckpt'.format(epoch)
                    save_path = opt.model_save_path + save_name
                    torch.save(model.state_dict(), save_path)
    return
Пример #5
0
def train():
    model,pe = make_model(class_num)
    if opt["previous_stage"] is None:
        for name,param in model.named_parameters():
            if param.dim() > 1:
                init.kaiming_normal_(param)
            elif 'weight' in name:
                init.constant_(param,1)
            else:
                init.constant_(param,0)
    else:
        model.load_state_dict(torch.load("./"+opt["model_name"]+opt["previous_stage"]+".pkl"))
    model = model.cuda()
    pe = pe.cuda()
    criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda()
    if opt["optimizer"] == "Adam":
        optimizer = optim.Adadelta(model.parameters(),lr=opt["lr"])
    elif opt["optimizer"] == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=opt["lr"], momentum=0.9)
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,patience=3,verbose=True)
    else:
        optimizer = adabound.AdaBound(model.parameters())
    if opt["previous_stage"] is not None:
        optimizer.load_state_dict(torch.load("./"+opt["model_name"]+opt["previous_stage"]+opt["optimizer"]+".pkl"))
        if opt["optimizer"] == "SGD":
            lr_scheduler.load_state_dict(torch.load("./"+opt["model_name"]+opt["previous_stage"]+opt["optimizer"]+"_lrScheduler.pkl"))
    TrainSet = dataset("/data1/luofuyou/ReCTS_lmdb",split_ratio=0.8,img_size=opt["img_size"])
    num_batch = int(TrainSet.len / opt["batch_size"])
    num_iter = opt["num_epoch"] * num_batch
    print(f"TrainSet_len:{TrainSet.len}")
    print("batch_size:%d;num_batch:%d;num_epoch:%d;iter:%d;optimizer:%s;lr:%.2f;train_stage:%s" % (
            opt["batch_size"],num_batch, opt["num_epoch"], num_iter,opt["optimizer"],optimizer.param_groups[0]["lr"],opt["train_stage"]))
    ValidateSet = dataset("/data1/luofuyou/ReCTS_lmdb",bias=0.01,img_size=opt["img_size"])
    print(f"ValidateSet_len:{ValidateSet.len}")
    totalTimeS = time.time()
    mask = make_mask(opt["max_len"]+2)
    if torch.cuda.device_count() > 1:
        updater = MultiGPUTrainer(model,criterion,optimizer,pe,mask)
    else:
        updater = OneGPUTrainer(model,criterion,optimizer,pe,mask)
    for epoch in range(1, opt["num_epoch"] + 1):
        torch.cuda.empty_cache()
        model.train()
        total_loss = torch.zeros(1).cuda()
        loader = DataLoader(TrainSet, batch_size=opt["batch_size"], shuffle=True,
                            num_workers=8,pin_memory=True, drop_last=True)
        prefetcher = data_prefetcher(loader)
        start = time.time()
        X,Y = prefetcher.next()
        while X is not None:
            loss = updater.update(X,Y)
            total_loss += loss
            X, Y = prefetcher.next()
        end = time.time()
        print(f"epoch:{epoch};avg_loss:{total_loss.item() / num_batch};time consumed:{time_interval(end - start)}")
        torch.save(model.state_dict(), opt["model_name"]+opt["train_stage"]+".pkl")

        CCR=test(model,pe,ValidateSet,opt["max_len"],make_mask,class_num,converter,error_analysis=False)
        torch.save(optimizer.state_dict(),opt["model_name"]+opt["train_stage"]+opt["optimizer"]+".pkl")
        if opt["optimizer"] == "SGD":
            lr_scheduler.step(total_loss.item() / num_batch)
            torch.save(lr_scheduler.state_dict(),opt["model_name"]+opt["train_stage"]+opt["optimizer"]+"_lrScheduler.pkl")

    totalTimeE = time.time()
    print(f"the training has finished;time consumed is {time_interval(totalTimeE - totalTimeS)}")
    TrainSet.env.close()
    ValidateSet.env.close()
Пример #6
0
def train():
    model = LSTMBase(opt)
    if opt["train_stage"] == 1.0:
        for name, param in model.named_parameters():
            if 'localization_fc2' in name:
                print(f'Skip {name} as it is already initialized')
                continue
            try:
                if 'bias' in name:
                    init.constant_(param, 0.0)
                elif 'weight' in name:
                    init.kaiming_normal_(param)
            except Exception as e:  # for batchnorm.
                if 'weight' in name:
                    param.data.fill_(1)
                continue
    else:
        model.load_state_dict(
            torch.load("./LSTMBase_params_{}.pkl".format(opt["train_stage"] -
                                                         1.0)))
    criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda()
    optimizer = optim.Adadelta(model.parameters(),
                               lr=opt["lr"],
                               rho=opt["rho"],
                               eps=opt["eps"])
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids).cuda()
    else:
        model = model.cuda()
    model.train()
    ReCTS = dataset("/data1/luofuyou/ReCTS_lmdb",
                    split_ratio=0.96,
                    img_size=opt["img_size"])
    num_batch = int(ReCTS.len / opt["batch_size"])
    num_iter = opt["num_epoch"] * num_batch
    print(
        "dataSet_len:%d;batch size:%d;num_batch:%d;num_epoch:%d;iter:%d;lr=%f"
        % (ReCTS.len, opt["batch_size"], num_batch, opt["num_epoch"], num_iter,
           opt["lr"]))
    totalTimeS = time.time()
    p = 0
    for epoch in range(1, opt["num_epoch"] + 1):
        torch.cuda.empty_cache()
        total_loss = 0.0
        dataLoader = DataLoader(ReCTS,
                                batch_size=opt["batch_size"],
                                shuffle=True,
                                num_workers=8,
                                pin_memory=True,
                                drop_last=True)
        prefetcher = data_prefetcher(dataLoader)
        epochTimeS = time.time()
        X, Y = prefetcher.next()
        while X is not None:
            try:
                text = converter.encode(Y, opt["batch_max_length"])
                preds = model(X, text)
                target = text[:, 1:]  # without [GO] Symbol
                cost = criterion(preds.view(-1, preds.shape[-1]),
                                 target.contiguous().view(-1))
                optimizer.zero_grad()
                cost.backward()
                clip_grad_norm_(model.parameters(), opt["grad_clip"])
                optimizer.step()
                total_loss += cost.item()
                X, Y = prefetcher.next()
            except Exception as e:
                print(e)
                opt["batch_max_length"] += 4
        epochTimeE = time.time()
        print("epoch:%d;avg_loss:%f;time consumed:%s" %
              (epoch, total_loss / num_batch,
               time_interval(epochTimeE - epochTimeS)))
        #if epoch % int(0.6*opt["num_epoch"]) == 0 or epoch % int(0.8*opt["num_epoch"]) == 0:
        #    p += 1
        #    lr = opt["lr"] * np.power(0.1, p)
        #    optimizer.param_groups[0]["lr"] = lr
        if torch.cuda.device_count() > 1:
            torch.save(model.module.state_dict(),
                       "LSTMBase_params_{}.pkl".format(opt["train_stage"]))
        else:
            torch.save(model.state_dict(),
                       "LSTMBase_params_{}.pkl".format(opt["train_stage"]))

    ReCTS.env.close()
    totalTimeE = time.time()
    print("the training has finished;the total time consumed is %s" %
          time_interval(totalTimeE - totalTimeS))
Пример #7
0
def train(args):
    start_epoch = 0
    data_loader = DataLoader(dataset=FaceDetectSet(416, True),
                             batch_size=args.batch,
                             shuffle=True,
                             num_workers=16)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    model = MSSD()
    print("add graph")
    writer.add_graph(model, torch.zeros((1, 3, 416, 416)))
    print("add graph over")
    if args.pretrained and os.path.exists(MODEL_SAVE_PATH):
        print("loading ...")
        state = torch.load(MODEL_SAVE_PATH)
        model.load_state_dict(state['net'])
        start_epoch = state['epoch']
        print("loading over")
    model = torch.nn.DataParallel(model, device_ids=[0, 1])  # multi-GPU
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)
    scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gama)
    train_loss = 0
    loss_func = MLoss().to(device)
    to_pil_img = tfs.ToPILImage()
    to_tensor = tfs.ToTensor()

    for epoch in range(start_epoch, start_epoch + args.epoes):
        model.train()
        prefetcher = data_prefetcher(data_loader)
        img_tensor, label_tensor = prefetcher.next()
        last_img_tensor = img_tensor
        last_label_tensor = label_tensor
        optimizer.zero_grad()
        i_batch = 0
        while img_tensor is not None:
            last_img_tensor = img_tensor
            last_label_tensor = label_tensor
            output = model(img_tensor)
            loss = loss_func(output, label_tensor)
            if loss is None:
                img_tensor, label_tensor = prefetcher.next()
                continue
            loss.backward()
            if i_batch % args.mini_batch == 0:
                optimizer.step()
                optimizer.zero_grad()

            train_loss = loss.item()
            global_step = epoch * len(data_loader) + i_batch
            progress_bar(i_batch, len(data_loader),
                         'loss: %f, epeche: %d' % (train_loss, epoch))
            writer.add_scalar("loss", train_loss, global_step=global_step)
            img_tensor, label_tensor = prefetcher.next()
            i_batch += 1

        #save one pic and output
        pil_img = to_pil_img(last_img_tensor[0].cpu())
        bboxes = tensor2bbox(output[0], 416, [52, 26, 13], thresh=0.5)
        # bboxes = nms(bboxes, 0.6, 0.5)
        draw = ImageDraw.Draw(pil_img)
        for bbox in bboxes:
            draw.text((bbox[1] - bbox[3] / 2, bbox[2] - bbox[4] / 2 - 10),
                      str(round(bbox[0].item(), 2)),
                      fill=(255, 0, 0))
            draw.rectangle((bbox[1] - bbox[3] / 2, bbox[2] - bbox[4] / 2,
                            bbox[1] + bbox[3] / 2, bbox[2] + bbox[4] / 2),
                           outline=(0, 255, 0))
            draw.rectangle(
                (bbox[1] - bbox[3] / 2 + 1, bbox[2] - bbox[4] / 2 + 1,
                 bbox[1] + bbox[3] / 2 - 1, bbox[2] + bbox[4] / 2 - 1),
                outline=(0, 255, 0))
        writer.add_image("img: " + str(epoch), to_tensor(pil_img))
        scheduler.step()

        if epoch % 10 == 0:
            print('Saving..')
            state = {
                'net': model.module.state_dict(),
                'epoch': epoch,
            }
            torch.save(state, "./data/mssd_face_detect" + str(epoch) + ".pt")

    if not os.path.isdir('data'):
        os.mkdir('data')
    print('Saving..')
    state = {
        'net': model.module.state_dict(),
        'epoch': epoch,
    }
    torch.save(state, MODEL_SAVE_PATH)
    writer.close()
Пример #8
0
def train():
    model,pe = make_model(class_num)
    criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda()
    if opt["optimizer"] == "Adam":
        optimizer = optim.Adadelta(model.parameters(),lr=opt["lr"])
    elif opt["optimizer"] == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=opt["lr"], momentum=0.9)
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,patience=5,verbose=True)
    else:
        optimizer = adabound.AdaBound(model.parameters())
    if opt["previous_stage"] is None:
        for param in model.parameters():
            if param.dim() > 1:
                init.kaiming_normal_(param)
            else:
                init.constant_(param,0)
    else:
        model.load_state_dict(torch.load("./"+opt["model_name"]+opt["previous_stage"]+".pkl"))
        optimizer.load_state_dict(torch.load("./"+opt["model_name"]+opt["previous_stage"]+opt["optimizer"]+".pkl"))
        if opt["optimizer"] == "SGD":
            lr_scheduler.load_state_dict(torch.load("./"+opt["model_name"]+opt["previous_stage"]+opt["optimizer"]+"_lrScheduler.pkl"))
    model = model.cuda()
    pe = pe.cuda()
    TrainSet = dataset("/data1/luofuyou/ReCTS_lmdb",split_ratio=0.99,img_size=opt["img_size"])
    num_batch = int(TrainSet.len / opt["batch_size"])
    num_iter = opt["num_epoch"] * num_batch
    print(f"TrainSet_len:{TrainSet.len}")
    print("batch_size:%d;num_batch:%d;num_epoch:%d;iter:%d;optimizer:%s;lr:%.2f;train_stage:%s" % (
            opt["batch_size"],num_batch, opt["num_epoch"], num_iter,opt["optimizer"],opt["lr"],opt["train_stage"]))
    ValidateSet = dataset("/data1/luofuyou/ReCTS_lmdb",bias=0.99,img_size=opt["img_size"])
    print(f"ValidateSet_len:{ValidateSet.len}")
    totalTimeS = time.time()
    updater = OneGPUTrainer(model,criterion,optimizer,pe)
    for epoch in range(1, opt["num_epoch"] + 1):
        model.train()
        total_loss = torch.zeros(1).cuda()
        loader = DataLoader(TrainSet, batch_size=opt["batch_size"], shuffle=True,
                            num_workers=8,pin_memory=True, drop_last=True)
        prefetcher = data_prefetcher(loader)
        start = time.time()
        X,Y = prefetcher.next()
        while X is not None:
            loss = updater.update(X,Y)
            total_loss += loss
            X, Y = prefetcher.next()
        end = time.time()
        print(f"epoch:{epoch};avg_loss:{total_loss.item() / num_batch};time consumed:{time_interval(end - start)}")
        torch.cuda.empty_cache()
        torch.save(model.state_dict(), opt["model_name"]+opt["train_stage"]+".pkl")

        loader = DataLoader(ValidateSet,batch_size=44, shuffle=False,
                            num_workers=8,pin_memory=True, drop_last=False)
        prefetcher = data_prefetcher(loader)
        CCR = 0.0
        start = time.time()
        X, Y = prefetcher.next()
        while X is not None:
            preds = model(X,pe)
            text_indexs = torch.argmax(preds,dim=-1)
            texts = converter.decode(text_indexs)
            for text,label in zip(texts,Y):
                text = text[:text.find('[s]')]
                try:
                    NED = 1 - editdistance.distance(text, label) / max(len(text), len(label))
                    CCR += NED
                except:
                    pass
            X, Y = prefetcher.next()
        CCR /= ValidateSet.len
        CCR = round(100 * CCR, 2)
        end = time.time()
        print(f"CCR:{CCR}%;time consumed:{time_interval(end - start)}")
        torch.cuda.empty_cache()
        torch.save(optimizer.state_dict(),opt["model_name"]+opt["train_stage"]+opt["optimizer"]+".pkl")
        if opt["optimizer"] == "SGD":
            lr_scheduler.step(total_loss.item() / num_batch)
            torch.save(lr_scheduler.state_dict(),opt["model_name"]+opt["train_stage"]+opt["optimizer"]+"_lrScheduler.pkl")

    totalTimeE = time.time()
    print(f"the training has finished;time consumed is {time_interval(totalTimeE - totalTimeS)}")
    TrainSet.env.close()
    ValidateSet.env.close()