示例#1
0
def train_model(model, optimizer, dataloader, epoch):
    model.train()
    running_mask_loss, running_size_loss, running_offset_loss = 0, 0, 0
    for batch_idx, (img_batch, mask_batch) in enumerate(dataloader):
        img_batch = img_batch.to(device)
        mask_batch = mask_batch.to(device)
        # center_index = center_index.to(device)

        optimizer.zero_grad()
        output = model(img_batch)

        mask_loss, size_loss, offset_loss = criterion(output, mask_batch)

        loss = mask_loss + size_loss + offset_loss

        loss.backward()

        optimizer.step()

        running_mask_loss += mask_loss
        running_size_loss += size_loss
        running_offset_loss += offset_loss

        if batch_idx % 5 == 0:
            print(
                f'\r{running_mask_loss/(batch_idx+1):.3f} {running_size_loss/(batch_idx+1):.3f} {running_offset_loss/(batch_idx+1):.3f}',
                end='',
                flush=True)

    print('\r', end='', flush=True)
    print(
        f"Epoch: {epoch} mask_loss: {running_mask_loss/(batch_idx): .3f} "
        f"size_loss: {running_size_loss/(batch_idx): .3f} offset_loss: {running_offset_loss/(batch_idx): .3f}"
    )
def train(model, optimizer, epochs, trainloader, valloader):
    model.train()
    for epoch_index in range(epochs):
        pbar = Progbar(target=len(trainloader))
        index_train = epoch_index * len(trainloader)
        running_loss = 0.0
        for batch_index, data in enumerate(trainloader):
            batch_index_ = batch_index
            batch_index_ += index_train
            lr = ajust_learning_tri(optimizer,
                                    batch_index_,
                                    step_size=len(trainloader) * 2)
            # 输入数据
            inputs, labels = data
            inputs = inputs.cuda()
            labels = labels.cuda()

            outputs = model(inputs)
            cost = criterion(outputs, labels)
            # 梯度清零
            optimizer.zero_grad()
            cost.backward()
            # 更新参数
            optimizer.step()
            running_loss += cost.item()
            pbar.update(batch_index + 1,
                        values=[('loss', running_loss / (batch_index + 1)),
                                ('epoch:', epoch_index)])

            lr_list.append(lr)
        val(model, valloader)
示例#3
0
def evaluate(model, dataloader):
    model.eval()
    eval_loss = 0.0
    with torch.no_grad():
        for idx, data in enumerate(dataloader, 0):
            X, Y = data
            X = X.to(_DEVICE_)
            Y = Y.to(_DEVICE_)
            res = model(X)
            batch_loss = criterion(res, Y)
            eval_loss += batch_loss
    eval_loss = eval_loss / len(dataloader)
    return eval_loss
def val(model, valloader):
    correct = 0  # 预测正确的图片数
    total = 0  # 总共的图片数
    # 由于测试的时候不需要求导,可以暂时关闭autograd,提高速度,节约内存
    model.eval()
    with torch.no_grad():
        running_loss = 0.0
        pbar = Progbar(target=len(valloader))
        for i, data in enumerate(valloader):
            images, labels = data
            images = images.cuda()
            labels = labels.cuda()
            outputs = model(images)
            cost = criterion(outputs, labels)
            running_loss += cost.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.cpu().numpy().shape[0]
            correct += (predicted.cpu().numpy() == labels.cpu().numpy()).sum()
            acc = correct / total
            pbar.update(i + 1,
                        values=[('loss', running_loss / (i + 1)),
                                ('acc:', acc)])
        save_model(model, acc, distributed=False)
示例#5
0
    def train_one_epoch(self):
        train_annot_dir = self.train_config['train_annot_dir']
        val_annot_dir = self.train_config['val_annot_dir']
        if not [is_photo(a) for a in ls(train_annot_dir)]:
            return
        if not [is_photo(a) for a in ls(val_annot_dir)]:
            return

        if self.first_loop:
            self.first_loop = False
            self.write_message('Training started')
            self.log('Starting Training')

        train_loader = DataLoader(
            self.train_set,
            self.bs,
            shuffle=True,
            # 12 workers is good for performance
            # on 2 RTX2080 Tis
            # 0 workers is good for debugging
            num_workers=12,
            drop_last=False,
            pin_memory=True)
        epoch_start = time.time()
        self.model.train()
        tps = 0
        fps = 0
        tns = 0
        fns = 0
        defined_total = 0
        loss_sum = 0
        for step, (photo_tiles, foreground_tiles,
                   defined_tiles) in enumerate(train_loader):

            self.check_for_instructions()
            photo_tiles = photo_tiles.cuda()
            foreground_tiles = foreground_tiles.cuda()
            defined_tiles = defined_tiles.cuda()
            self.optimizer.zero_grad()
            outputs = self.model(photo_tiles)
            softmaxed = softmax(outputs, 1)
            # just the foreground probability.
            foreground_probs = softmaxed[:, 1, :]
            # remove any of the predictions for which we don't have ground truth
            # Set outputs to 0 where annotation undefined so that
            # The network can predict whatever it wants without any penalty.
            outputs[:, 0] *= defined_tiles
            outputs[:, 1] *= defined_tiles
            loss = criterion(outputs, foreground_tiles)
            loss.backward()
            self.optimizer.step()
            foreground_probs *= defined_tiles
            predicted = foreground_probs > 0.5

            # we only want to calculate metrics on the
            # part of the predictions for which annotations are defined
            # so remove all predictions and foreground labels where
            # we didn't have any annotation.

            defined_list = defined_tiles.view(-1)
            preds_list = predicted.view(-1)[defined_list > 0]
            foregrounds_list = foreground_tiles.view(-1)[defined_list > 0]

            # # calculate all the false positives, false negatives etc
            tps += torch.sum(
                (foregrounds_list == 1) * (preds_list == 1)).cpu().numpy()
            tns += torch.sum(
                (foregrounds_list == 0) * (preds_list == 0)).cpu().numpy()
            fps += torch.sum(
                (foregrounds_list == 0) * (preds_list == 1)).cpu().numpy()
            fns += torch.sum(
                (foregrounds_list == 1) * (preds_list == 0)).cpu().numpy()
            defined_total += torch.sum(defined_list > 0).cpu().numpy()
            loss_sum += loss.item()  # float
            sys.stdout.write(f"Training {(step+1) * self.bs}/"
                             f"{len(train_loader.dataset)} "
                             f" loss={round(loss.item(), 3)} \r")
            self.check_for_instructions()  # could update training parameter
            if not self.training:
                return

        duration = round(time.time() - epoch_start, 3)
        print('epoch train duration', duration)
        self.log_metrics(
            'train', get_metrics(tps, fps, tns, fns, defined_total, duration))
        before_val_time = time.time()
        self.validation()
        print('epoch validation duration', time.time() - before_val_time)
示例#6
0
def main():
    torch.backends.cudnn.benchmark = True
    num_train_imgs = len(loaders_dict['train'].dataset)
    num_val_imgs = len(loaders_dict['val'].dataset)
    batch_size = loaders_dict['train'].batch_size
    logs = []

    for epoch in range(num_epochs):
        t_epoch_start = time.time()
        epoch_train_loss = 0.0
        epoch_val_loss = 0.0
        epoch_train_score = 0.0
        epoch_val_score = 0.0

        print('-----------------------')
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-----------------------')

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
                optimizer.zero_grad()
            else:
                model.eval()

            count = 0
            for img_batch, label_batch in loaders_dict[phase]:
                if use_mixup and (pahse=='train'):
                    mixup_flag = np.random.randint(use_mixup)==1
                    if mixup_flag:
                        img_batch, label_batch = mixup(img_batch, label_batch, alpha=1, n_classes=18)
                img_batch = img_batch.to(device, dtype=torch.float)
                label_batch = label_batch.to(device, dtype=torch.float)

                if (phase=='train') and (count==0):
                    optimizer.step()
                    optimizer.zero_grad()
                    count = batch_multiplier
                
                with torch.set_grad_enabled(phase == 'train'):
                    output = torch.sigmoid(model(img_batch))
                    loss = criterion(output, label_batch)
                    loss /= batch_multiplier

                    if phase == 'train':
                        loss.backward()
                        count -= 1
                        epoch_train_loss += loss.item() * batch_multiplier

                        for pred, label in zip(output, label_batch):
                            pred = pred.detach().cpu().numpy()
                            label = label.detach().cpu().numpy()
                            epoch_train_score += metric(label, pred)

                    else:
                        epoch_val_loss += loss.item() * batch_multiplier

                        for pred, label in zip(output, label_batch):
                            pred = pred.detach().cpu().numpy()
                            label = label.detach().cpu().numpy()
                            epoch_val_score += metric(label, pred)

        train_loss = epoch_train_loss / num_train_imgs
        val_loss = epoch_val_loss / num_val_imgs
        train_score = epoch_train_score / num_train_imgs
        val_score = epoch_val_score / num_val_imgs

        t_epoch_finish = time.time()
        print(f'epoch: {epoch+1}')
        print(f'Epoch_Train_Loss: {train_loss:.3f}')
        print(f'Epoch_Val_Loss: {val_loss:.3f}\n')
        print(f'Epoch_Train_Score: {train_score:.3f}')
        print(f'Epoch_Val_Score: {val_score:.3f}\n')
        print('timer:  {:.3f} sec.'.format(t_epoch_finish - t_epoch_start), '\n')
        t_epoch_start = time.time()
        for g in optimizer.param_groups:
            print('lr: ', g['lr'], '\n\n')

        log_epoch = {
            'epoch': epoch+1,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'train_score': train_score,
            'val_score': val_score,
            }
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv(f'{export_model}/log.csv', index=False)
        torch.save(model.state_dict(), f'{export_model}/model_epoch{epoch+1}.pth')

        scheduler.step(val_loss)

    df = pd.read_csv(f'{export_model}/log.csv')
    plt.plot(df['train_loss'], label='train loss')
    plt.plot(df['val_loss'], label='val loss')
    plt.legend()
    plt.savefig(f'{export_figure}/loss.png')
    plt.close()

    plt.plot(df['train_score'], label='train score')
    plt.plot(df['val_score'], label='val score')
    plt.legend()
    plt.savefig(f'{export_figure}/score.png')
    plt.close()
示例#7
0
        epoch_loss = 0.0
        epoch_since = time.time()
        model.train()

        for idx, data in enumerate(dataloader['train'], 0):
            with torch.set_grad_enabled(True):
                images, detections = data

                images = images.to(_DEVICE_)
                detections = detections.to(_DEVICE_)

                optimiser.zero_grad()

                predictions = model(images)

                batch_loss = criterion(predictions, detections)
                epoch_loss += batch_loss.item()

                if idx % 100 == 0:
                    logger.info(
                        f"\tIteration {idx+1}/{len(dataloader['train'])}: Loss = {batch_loss.item()}"
                    )

                batch_loss.backward()
                optimiser.step()

        epoch_loss = epoch_loss / len(dataloader['train'])
        epoch_elapsed = time.time() - epoch_since
        logger.info(
            f"\tAverage Train Epoch loss is {epoch_loss:.2f} [{epoch_elapsed//60:.0f}m {epoch_elapsed%60:.0f}s]"
        )