Exemplo n.º 1
0
def _train(network, criterion, trainLoader, device, optimizer):
    network.train()
    epoch_loss = 0
    epoch_acc = 0
    num_data = 0
    for input_x, target_y in tqdm(trainLoader):
        input_x, target_y = input_x.to(device), target_y.to(device)
        optimizer.zero_grad()
        predict_y = network(input_x)
        loss = criterion(predict_y, target_y)
        loss.backward()
        optimizer.step()

        # log data
        num_data += target_y.shape[0]
        epoch_loss += loss.item()
        epoch_acc += (predict_y.argmax(axis=1) == target_y).sum().item()

    mean_loss = epoch_loss / num_data
    mean_acc = epoch_acc / num_data
    return mean_loss, mean_acc
Exemplo n.º 2
0
            input = input.to(device)
            target = target.to(device)

            # Forward pass of the network.
            output = net.forward(input)

            # Gather the training stats.
            stats.training.correctSamples += torch.sum(
                snn.predict.getClass(output) == label).data.item()
            stats.training.numSamples += len(label)

            # Calculate loss.
            loss = error.numSpikes(output, target)

            # Reset gradients to zero.
            optimizer.zero_grad()

            # Backward pass of the network.
            loss.backward()

            # Update weights.
            optimizer.step()

            # Gather training loss stats.
            stats.training.lossSum += loss.cpu().data.item()

            # Display training stats.
            stats.print(epoch, i, (datetime.now() - tSt).total_seconds())

        # Testing loop.
        # Same steps as Training loops except loss backpropagation and weight update.
Exemplo n.º 3
0
def train(train_loader, net, criterion, optimizer, curr_epoch, writer):
    '''
    Runs the training loop per epoch
    train_loader: Data loader for train
    net: thet network
    criterion: loss fn
    optimizer: optimizer
    curr_epoch: current epoch 
    writer: tensorboard writer
    return: val_avg for step function if required
    '''
    net.train()

    train_main_loss = AverageMeter()
    train_edge_loss = AverageMeter()
    train_seg_loss = AverageMeter()
    train_att_loss = AverageMeter()
    train_dual_loss = AverageMeter()
    curr_iter = curr_epoch * len(train_loader)

    for i, data in enumerate(train_loader):
        if i == 0:
            print('running....')

        inputs, mask, edge, _img_name = data

        if torch.sum(torch.isnan(inputs)) > 0:
            import pdb
            pdb.set_trace()

        batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3)

        inputs, mask, edge = inputs.cuda(), mask.cuda(), edge.cuda()

        if i == 0:
            print('forward done')

        optimizer.zero_grad()

        main_loss = None
        loss_dict = None

        if args.joint_edgeseg_loss:
            loss_dict = net(inputs, gts=(mask, edge))

            if args.seg_weight > 0:
                log_seg_loss = loss_dict['seg_loss'].mean().clone().detach_()
                train_seg_loss.update(log_seg_loss.item(), batch_pixel_size)
                main_loss = loss_dict['seg_loss']

            if args.edge_weight > 0:
                log_edge_loss = loss_dict['edge_loss'].mean().clone().detach_()
                train_edge_loss.update(log_edge_loss.item(), batch_pixel_size)
                if main_loss is not None:
                    main_loss += loss_dict['edge_loss']
                else:
                    main_loss = loss_dict['edge_loss']

            if args.att_weight > 0:
                log_att_loss = loss_dict['att_loss'].mean().clone().detach_()
                train_att_loss.update(log_att_loss.item(), batch_pixel_size)
                if main_loss is not None:
                    main_loss += loss_dict['att_loss']
                else:
                    main_loss = loss_dict['att_loss']

            if args.dual_weight > 0:
                log_dual_loss = loss_dict['dual_loss'].mean().clone().detach_()
                train_dual_loss.update(log_dual_loss.item(), batch_pixel_size)
                if main_loss is not None:
                    main_loss += loss_dict['dual_loss']
                else:
                    main_loss = loss_dict['dual_loss']

        else:
            main_loss = net(inputs, gts=mask)

        main_loss = main_loss.mean()
        log_main_loss = main_loss.clone().detach_()

        train_main_loss.update(log_main_loss.item(), batch_pixel_size)

        main_loss.backward()

        optimizer.step()

        if i == 0:
            print('step 1 done')

        curr_iter += 1

        if args.local_rank == 0:
            msg = '[epoch {}], [iter {} / {}], [train main loss {:0.6f}], [seg loss {:0.6f}], [edge loss {:0.6f}], [lr {:0.6f}]'.format(
                curr_epoch, i + 1, len(train_loader), train_main_loss.avg,
                train_seg_loss.avg, train_edge_loss.avg,
                optimizer.param_groups[-1]['lr'])

            logging.info(msg)

            # Log tensorboard metrics for each iteration of the training phase
            writer.add_scalar('training/loss', (train_main_loss.val),
                              curr_iter)
            writer.add_scalar('training/lr', optimizer.param_groups[-1]['lr'],
                              curr_iter)
            if args.joint_edgeseg_loss:

                writer.add_scalar('training/seg_loss', (train_seg_loss.val),
                                  curr_iter)
                writer.add_scalar('training/edge_loss', (train_edge_loss.val),
                                  curr_iter)
                writer.add_scalar('training/att_loss', (train_att_loss.val),
                                  curr_iter)
                writer.add_scalar('training/dual_loss', (train_dual_loss.val),
                                  curr_iter)
        if i > 5 and args.test_mode:
            return
Exemplo n.º 4
0
def trainNetwork(logging_path,
                 loader,
                 bt_size,
                 eval_size,
                 is_cuda,
                 evle,
                 net,
                 loss_func,
                 optimizer,
                 num_epochs,
                 str_epochs,
                 lr,
                 arg_list=[]):
    if is_cuda > -1:
        net.cuda(is_cuda)

    print("Training for " + str(num_epochs))

    log = logger.Logger(logging_path)
    log.masterLog(net.getStructure(), loss_func, optimizer, lr)

    opt = optimizer(net, lr)

    for epoch in range(str_epochs, num_epochs + 1):
        print("Training epoch: " + str(epoch))

        tr_loss = 0.0
        tr_root_loss = 0.0
        tr_soil_loss = 0.0
        ev_loss = 0.0
        opt.zero_grad()

        bt_per_it = 1
        for bt_it in range(bt_per_it):
            #Load Data
            #bt_nbr = np.random.randint( num_bts )
            batch, teacher = loader.getBatchAndShuffle(bt_size)

            for it in range(bt_size - eval_size):
                num_slices = 2
                cut_it = int(round(batch.size()[4] / num_slices))
                cut_id = 0
                for jt in range(num_slices):
                    print("   " + str(it) + " Slice: " + str(jt))
                    start, end = cut_id, min(batch.size()[4],
                                             cut_it * (jt + 1))
                    start_t, end_t = start, min(teacher.size()[4],
                                                end - net.teacher_offset * 2)
                    cut_id = end
                    input_data = batch[:, it, :, :, start:end].unsqueeze(1)
                    teacher_data = teacher[:, it, :, :,
                                           start_t:end_t].unsqueeze(1)

                    #Train
                    output = net(input_data, loss_func.apply_sigmoid)
                    loss, root_loss, soil_loss = loss_func(
                        output, teacher_data, epoch)
                    loss /= (bt_size - eval_size) * bt_per_it * num_slices
                    loss.backward()

                    tr_loss += loss
                    tr_root_loss += root_loss
                    tr_soil_loss += soil_loss

        #Eval
        output = net(batch[:, 3, :, :, :].unsqueeze(1))
        loss, _, _ = loss_func(output, teacher[:, 3, :, :, :].unsqueeze(1),
                               epoch)

        ev_loss += loss

        tr_root_loss /= (bt_size - eval_size) * bt_per_it * num_slices
        tr_soil_loss /= (bt_size - eval_size) * bt_per_it * num_slices
        opt.step()

        #Log
        log.logEpoch(epoch,
                     tr_loss.cpu().data.numpy(),
                     ev_loss.cpu().data.numpy(),
                     tr_root_loss.cpu().data.numpy(),
                     tr_soil_loss.cpu().data.numpy())
        if (epoch % 20 == 0):
            weights = net.getWeightsCuda()
            output = feedForward(net, loader, 0)
            log.logWeights(weights)
            teacher = loader.getTeacherNp(0, 4, loader.offset)
            f1_r = np.array([0.0, 0.0, 0.0])
            f1_s = np.array([0.0, 0.0, 0.0])
            for it in range(4):
                f1_r += evle(output[it][0, 0, :, :, :], teacher[0,
                                                                it, :, :, :])
                f1_s += evle(output[it][0, 0, :, :, :],
                             teacher[0, it, :, :, :], True)
            log.logF1Root(epoch, f1_r / 4)
            log.logF1Soil(epoch, f1_s / 4)
            if (epoch % 100 == 0):
                log.logMilestone(epoch, weights, output)