Пример #1
0
def train_epoch(
        epoch,
        net,
        train_metric,
        train_data,
        use_cuda,
        L,
        optimizer,
        # lr_scheduler,
        batch_size,
        log_interval):

    tic = time.time()
    net.train()
    train_metric.reset()
    train_loss = 0.0

    btic = time.time()
    for i, (data, target) in enumerate(train_data):
        if use_cuda:
            data = data.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
        output = net(data)
        loss = L(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        train_metric.update(labels=target, preds=output)

        if log_interval and not (i + 1) % log_interval:
            speed = batch_size * log_interval / (time.time() - btic)
            btic = time.time()
            train_accuracy_msg = report_accuracy(metric=train_metric)
            logging.info(
                "Epoch[{}] Batch [{}]\tSpeed: {:.2f} samples/sec\t{}\tlr={:.5f}"
                .format(epoch + 1, i, speed, train_accuracy_msg,
                        optimizer.param_groups[0]["lr"]))

    throughput = int(batch_size * (i + 1) / (time.time() - tic))
    logging.info(
        "[Epoch {}] speed: {:.2f} samples/sec\ttime cost: {:.2f} sec".format(
            epoch + 1, throughput,
            time.time() - tic))

    train_loss /= (i + 1)
    train_accuracy_msg = report_accuracy(metric=train_metric)
    logging.info("[Epoch {}] training: {}\tloss={:.4f}".format(
        epoch + 1, train_accuracy_msg, train_loss))

    return train_loss
Пример #2
0
def test(net,
         test_data,
         metric,
         use_cuda,
         input_image_size,
         in_channels,
         calc_weight_count=False,
         calc_flops=False,
         calc_flops_only=True,
         extended_log=False):
    if not calc_flops_only:
        tic = time.time()
        validate(
            metric=metric,
            net=net,
            val_data=test_data,
            use_cuda=use_cuda)
        accuracy_msg = report_accuracy(
            metric=metric,
            extended_log=extended_log)
        logging.info("Test: {}".format(accuracy_msg))
        logging.info("Time cost: {:.4f} sec".format(
            time.time() - tic))

    if calc_weight_count:
        weight_count = calc_net_weight_count(net)
        if not calc_flops:
            logging.info("Model: {} trainable parameters".format(weight_count))
    if calc_flops:
        num_flops, num_macs, num_params = measure_model(net, in_channels, input_image_size)
        assert (not calc_weight_count) or (weight_count == num_params)
        stat_msg = "Params: {params} ({params_m:.2f}M), FLOPs: {flops} ({flops_m:.2f}M)," \
                   " FLOPs/2: {flops2} ({flops2_m:.2f}M), MACs: {macs} ({macs_m:.2f}M)"
        logging.info(stat_msg.format(
            params=num_params, params_m=num_params / 1e6,
            flops=num_flops, flops_m=num_flops / 1e6,
            flops2=num_flops / 2, flops2_m=num_flops / 2 / 1e6,
            macs=num_macs, macs_m=num_macs / 1e6))
Пример #3
0
def test(net,
         test_data,
         metric,
         use_cuda,
         input_image_size,
         in_channels,
         calc_weight_count=False,
         calc_flops=False,
         calc_flops_only=True,
         extended_log=False,
         show_bad_samples=False):
    """
    Main test routine.

    Parameters:
    ----------
    net : Module
        Model.
    test_data : DataLoader
        Data loader.
    metric : EvalMetric
        Metric object instance.
    use_cuda : bool
        Whether to use CUDA.
    input_image_size : tuple of 2 ints
        Spatial size of the expected input image.
    in_channels : int
        Number of input channels.
    calc_weight_count : bool, default False
        Whether to calculate count of weights.
    calc_flops : bool, default False
        Whether to calculate FLOPs.
    calc_flops_only : bool, default True
        Whether to only calculate FLOPs without testing.
    extended_log : bool, default False
        Whether to log more precise accuracy values.
    show_bad_samples : bool, default False
        Whether to log file names for bad samples.
    """
    if not calc_flops_only:
        tic = time.time()
        validate(
            metric=metric,
            net=net,
            val_data=test_data,
            use_cuda=use_cuda)
        accuracy_msg = report_accuracy(
            metric=metric,
            extended_log=extended_log)
        logging.info("Test: {}".format(accuracy_msg))
        logging.info("Time cost: {:.4f} sec".format(
            time.time() - tic))

    if calc_weight_count:
        weight_count = calc_net_weight_count(net)
        if not calc_flops:
            logging.info("Model: {} trainable parameters".format(weight_count))
    if calc_flops:
        num_flops, num_macs, num_params = measure_model(net, in_channels, input_image_size)
        assert (not calc_weight_count) or (weight_count == num_params)
        stat_msg = "Params: {params} ({params_m:.2f}M), FLOPs: {flops} ({flops_m:.2f}M)," \
                   " FLOPs/2: {flops2} ({flops2_m:.2f}M), MACs: {macs} ({macs_m:.2f}M)"
        logging.info(stat_msg.format(
            params=num_params, params_m=num_params / 1e6,
            flops=num_flops, flops_m=num_flops / 1e6,
            flops2=num_flops / 2, flops2_m=num_flops / 2 / 1e6,
            macs=num_macs, macs_m=num_macs / 1e6))

    if show_bad_samples:
        store_misses = StoreMisses()
        validate(
            metric=store_misses,
            net=net,
            val_data=test_data,
            use_cuda=use_cuda)
        _, misses_list = store_misses.get()
        if len(misses_list) > 0:
            dataset = test_data.iterable.dataset if isinstance(test_data, tqdm) else test_data.dataset
            for i, miss_ind in enumerate(misses_list):
                logging.info("Miss [{}]: {}".format(i, dataset.get_file_name(miss_ind)))
Пример #4
0
def calc_model_accuracy(net,
                        test_data,
                        metric,
                        use_cuda,
                        input_image_size,
                        in_channels,
                        calc_weight_count=False,
                        calc_flops=False,
                        calc_flops_only=True,
                        extended_log=False,
                        ml_type="cls"):
    """
    Estimating particular model accuracy.

    Parameters:
    ----------
    net : Module
        Model.
    test_data : DataLoader
        Data loader.
    metric : EvalMetric
        Metric object instance.
    use_cuda : bool
        Whether to use CUDA.
    input_image_size : tuple of 2 ints
        Spatial size of the expected input image.
    in_channels : int
        Number of input channels.
    calc_weight_count : bool, default False
        Whether to calculate count of weights.
    calc_flops : bool, default False
        Whether to calculate FLOPs.
    calc_flops_only : bool, default True
        Whether to only calculate FLOPs without testing.
    extended_log : bool, default False
        Whether to log more precise accuracy values.
    ml_type : str, default 'cls'
        Machine learning type.

    Returns:
    -------
    list of floats
        Accuracy values.
    """
    if not calc_flops_only:
        tic = time.time()
        validate(
            metric=metric,
            net=net,
            val_data=test_data,
            use_cuda=use_cuda)
        accuracy_msg = report_accuracy(
            metric=metric,
            extended_log=extended_log)
        logging.info("Test: {}".format(accuracy_msg))
        logging.info("Time cost: {:.4f} sec".format(
            time.time() - tic))
        acc_values = metric.get()[1]
        acc_values = acc_values if type(acc_values) == list else [acc_values]
    else:
        acc_values = []

    if calc_weight_count:
        weight_count = calc_net_weight_count(net)
        if not calc_flops:
            logging.info("Model: {} trainable parameters".format(weight_count))
    if calc_flops:
        in_shapes = [(1, 640 * 25 * 5), (1,)] if ml_type == "asr" else\
            [(1, in_channels, input_image_size[0], input_image_size[1])]
        num_flops, num_macs, num_params = measure_model(
            model=net,
            in_shapes=in_shapes)
        assert (not calc_weight_count) or (weight_count == num_params)
        stat_msg = "Params: {params} ({params_m:.2f}M), FLOPs: {flops} ({flops_m:.2f}M)," \
                   " FLOPs/2: {flops2} ({flops2_m:.2f}M), MACs: {macs} ({macs_m:.2f}M)"
        logging.info(stat_msg.format(
            params=num_params, params_m=num_params / 1e6,
            flops=num_flops, flops_m=num_flops / 1e6,
            flops2=num_flops / 2, flops2_m=num_flops / 2 / 1e6,
            macs=num_macs, macs_m=num_macs / 1e6))

    return acc_values
Пример #5
0
def train_net(batch_size, num_epochs, start_epoch1, train_data, val_data, net,
              optimizer, lr_scheduler, lp_saver, log_interval, num_classes,
              val_metric, train_metric, use_cuda):
    """
    Main procedure for training model.

    Parameters:
    ----------
    batch_size : int
        Training batch size.
    num_epochs : int
        Number of training epochs.
    start_epoch1 : int
        Number of starting epoch (1-based).
    train_data : DataLoader
        Data loader (training subset).
    val_data : DataLoader
        Data loader (validation subset).
    net : Module
        Model.
    optimizer : Optimizer
        Optimizer.
    lr_scheduler : LRScheduler
        Learning rate scheduler.
    lp_saver : TrainLogParamSaver
        Model/trainer state saver.
    log_interval : int
        Batch count period for logging.
    num_classes : int
        Number of model classes.
    val_metric : EvalMetric
        Metric object instance (validation subset).
    train_metric : EvalMetric
        Metric object instance (training subset).
    use_cuda : bool
        Whether to use CUDA.
    """
    assert (num_classes > 0)

    L = nn.CrossEntropyLoss()
    if use_cuda:
        L = L.cuda()

    assert (type(start_epoch1) == int)
    assert (start_epoch1 >= 1)
    if start_epoch1 > 1:
        logging.info("Start training from [Epoch {}]".format(start_epoch1))
        validate(metric=val_metric,
                 net=net,
                 val_data=val_data,
                 use_cuda=use_cuda)
        val_accuracy_msg = report_accuracy(metric=val_metric)
        logging.info("[Epoch {}] validation: {}".format(
            start_epoch1 - 1, val_accuracy_msg))

    gtic = time.time()
    for epoch in range(start_epoch1 - 1, num_epochs):
        lr_scheduler.step()

        train_loss = train_epoch(
            epoch=epoch,
            net=net,
            train_metric=train_metric,
            train_data=train_data,
            use_cuda=use_cuda,
            L=L,
            optimizer=optimizer,
            # lr_scheduler,
            batch_size=batch_size,
            log_interval=log_interval)

        validate(metric=val_metric,
                 net=net,
                 val_data=val_data,
                 use_cuda=use_cuda)
        val_accuracy_msg = report_accuracy(metric=val_metric)
        logging.info("[Epoch {}] validation: {}".format(
            epoch + 1, val_accuracy_msg))

        if lp_saver is not None:
            state = {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            lp_saver_kwargs = {"state": state}
            val_acc_values = val_metric.get()[1]
            train_acc_values = train_metric.get()[1]
            val_acc_values = val_acc_values if type(
                val_acc_values) == list else [val_acc_values]
            train_acc_values = train_acc_values if type(
                train_acc_values) == list else [train_acc_values]
            lp_saver.epoch_test_end_callback(
                epoch1=(epoch + 1),
                params=(val_acc_values + train_acc_values +
                        [train_loss, optimizer.param_groups[0]["lr"]]),
                **lp_saver_kwargs)

    logging.info("Total time cost: {:.2f} sec".format(time.time() - gtic))
    if lp_saver is not None:
        opt_metric_name = get_metric_name(val_metric, lp_saver.acc_ind)
        logging.info("Best {}: {:.4f} at {} epoch".format(
            opt_metric_name, lp_saver.best_eval_metric_value,
            lp_saver.best_eval_metric_epoch))
Пример #6
0
def train_epoch(
        epoch,
        net,
        train_metric,
        train_data,
        use_cuda,
        L,
        optimizer,
        # lr_scheduler,
        batch_size,
        log_interval):
    """
    Train model on particular epoch.

    Parameters:
    ----------
    epoch : int
        Epoch number.
    net : Module
        Model.
    train_metric : EvalMetric
        Metric object instance.
    train_data : DataLoader
        Data loader.
    use_cuda : bool
        Whether to use CUDA.
    L : Loss
        Loss function.
    optimizer : Optimizer
        Optimizer.
    batch_size : int
        Training batch size.
    log_interval : int
        Batch count period for logging.

    Returns
    -------
    float
        Loss value.
    """
    tic = time.time()
    net.train()
    train_metric.reset()
    train_loss = 0.0

    btic = time.time()
    for i, (data, target) in enumerate(train_data):
        if use_cuda:
            data = data.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
        output = net(data)
        loss = L(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        train_metric.update(labels=target, preds=output)

        if log_interval and not (i + 1) % log_interval:
            speed = batch_size * log_interval / (time.time() - btic)
            btic = time.time()
            train_accuracy_msg = report_accuracy(metric=train_metric)
            logging.info(
                "Epoch[{}] Batch [{}]\tSpeed: {:.2f} samples/sec\t{}\tlr={:.5f}"
                .format(epoch + 1, i, speed, train_accuracy_msg,
                        optimizer.param_groups[0]["lr"]))

    throughput = int(batch_size * (i + 1) / (time.time() - tic))
    logging.info(
        "[Epoch {}] speed: {:.2f} samples/sec\ttime cost: {:.2f} sec".format(
            epoch + 1, throughput,
            time.time() - tic))

    train_loss /= (i + 1)
    train_accuracy_msg = report_accuracy(metric=train_metric)
    logging.info("[Epoch {}] training: {}\tloss={:.4f}".format(
        epoch + 1, train_accuracy_msg, train_loss))

    return train_loss
Пример #7
0
def train_net(batch_size,
              num_epochs,
              start_epoch1,
              train_data,
              val_data,
              net,
              optimizer,
              lr_scheduler,
              lp_saver,
              log_interval,
              num_classes,
              val_metric,
              train_metric,
              use_cuda):
    assert (num_classes > 0)

    L = nn.CrossEntropyLoss()
    if use_cuda:
        L = L.cuda()

    assert (type(start_epoch1) == int)
    assert (start_epoch1 >= 1)
    if start_epoch1 > 1:
        logging.info("Start training from [Epoch {}]".format(start_epoch1))
        validate(
            metric=val_metric,
            net=net,
            val_data=val_data,
            use_cuda=use_cuda)
        val_accuracy_msg = report_accuracy(metric=val_metric)
        logging.info("[Epoch {}] validation: {}".format(start_epoch1 - 1, val_accuracy_msg))

    gtic = time.time()
    for epoch in range(start_epoch1 - 1, num_epochs):
        lr_scheduler.step()

        train_loss = train_epoch(
            epoch=epoch,
            net=net,
            train_metric=train_metric,
            train_data=train_data,
            use_cuda=use_cuda,
            L=L,
            optimizer=optimizer,
            # lr_scheduler,
            batch_size=batch_size,
            log_interval=log_interval)

        validate(
            metric=val_metric,
            net=net,
            val_data=val_data,
            use_cuda=use_cuda)
        val_accuracy_msg = report_accuracy(metric=val_metric)
        logging.info("[Epoch {}] validation: {}".format(epoch + 1, val_accuracy_msg))

        if lp_saver is not None:
            state = {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            lp_saver_kwargs = {"state": state}
            val_acc_values = val_metric.get()[1]
            train_acc_values = train_metric.get()[1]
            val_acc_values = val_acc_values if type(val_acc_values) == list else [val_acc_values]
            train_acc_values = train_acc_values if type(train_acc_values) == list else [train_acc_values]
            lp_saver.epoch_test_end_callback(
                epoch1=(epoch + 1),
                params=(val_acc_values + train_acc_values + [train_loss, optimizer.param_groups[0]["lr"]]),
                **lp_saver_kwargs)

    logging.info("Total time cost: {:.2f} sec".format(time.time() - gtic))
    if lp_saver is not None:
        opt_metric_name = get_metric_name(val_metric, lp_saver.acc_ind)
        logging.info("Best {}: {:.4f} at {} epoch".format(
            opt_metric_name, lp_saver.best_eval_metric_value, lp_saver.best_eval_metric_epoch))