Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device_id',
                        type=int,
                        default=1,
                        help='which device the model will be trained on')
    args, model_settings = eval_config(parser)
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Davinci",
                        device_id=args.device_id)

    # Logger
    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir)
    # show args
    args.logger.save_args(args)
    # find model path
    if os.path.isdir(args.model_dir):
        models = list(glob.glob(os.path.join(args.model_dir, '*.ckpt')))
        print(models)
        f = lambda x: -1 * int(
            os.path.splitext(os.path.split(x)[-1])[0].split('-')[0].split(
                'epoch')[-1])
        args.models = sorted(models, key=f)
    else:
        args.models = [args.model_dir]

    args.best_acc = 0
    args.index = 0
    args.best_index = 0
    for model_path in args.models:
        test_de = audio_dataset(args.feat_dir, 'testing',
                                model_settings['spectrogram_length'],
                                model_settings['dct_coefficient_count'],
                                args.per_batch_size)
        network = DSCNN(model_settings, args.model_size_info)

        load_ckpt(network, model_path, False)
        network.set_train(False)
        model = Model(network)
        args.logger.info('load model {} success'.format(model_path))
        val(args, model, test_de)
        args.index += 1

    args.logger.info('Best model:{} acc:{:.2f}%'.format(
        args.models[args.best_index], args.best_acc))
Exemplo n.º 2
0
    args_opt.dataset_path = os.path.abspath(args_opt.dataset_path)
    config = set_config(args_opt)
    start = time.time()

    print(f"train args: {args_opt}\ncfg: {config}")

    #set context and device init
    context_device_init(config)

    # define network
    backbone_net, head_net, net = define_net(config, args_opt.is_training)
    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, config=config)
    step_size = dataset.get_dataset_size()
    if args_opt.pretrain_ckpt:
        if args_opt.freeze_layer == "backbone":
            load_ckpt(backbone_net, args_opt.pretrain_ckpt, trainable=False)
            step_size = extract_features(backbone_net, args_opt.dataset_path, config)
        else:
            load_ckpt(net, args_opt.pretrain_ckpt)
    if step_size == 0:
        raise ValueError("The step_size of dataset is zero. Check if the images' count of train dataset is more \
            than batch_size in config.py")

    # Currently, only Ascend support switch precision.
    switch_precision(net, mstype.float16, config)

    # define loss
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(
            smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
Exemplo n.º 3
0
from mindspore.common import dtype as mstype

from src.dataset import create_dataset
from src.config import set_config
from src.args import eval_parse_args
from src.models import define_net, load_ckpt
from src.utils import switch_precision, set_context

if __name__ == '__main__':
    args_opt = eval_parse_args()
    config = set_config(args_opt)
    backbone_net, head_net, net = define_net(config, args_opt.is_training)

    #load the trained checkpoint file to the net for evaluation
    if args_opt.head_ckpt:
        load_ckpt(backbone_net, args_opt.pretrain_ckpt)
        load_ckpt(head_net, args_opt.head_ckpt)
    else:
        load_ckpt(net, args_opt.pretrain_ckpt)

    set_context(config)
    switch_precision(net, mstype.float16, config)

    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=False,
                             config=config)
    step_size = dataset.get_dataset_size()
    if step_size == 0:
        raise ValueError(
            "The step_size of dataset is zero. Check if the images count of train dataset is more \
            than batch_size in config.py")
Exemplo n.º 4
0
                    type=str,
                    choices=["AIR", "ONNX", "MINDIR"],
                    default="AIR",
                    help="file format")
parser.add_argument('--platform',
                    type=str,
                    default="Ascend",
                    choices=("Ascend", "GPU", "CPU"),
                    help='run platform, only support GPU, CPU and Ascend')
args = parser.parse_args()
args.is_training = False
args.run_distribute = False

context.set_context(mode=context.GRAPH_MODE, device_target=args.platform)
if args.platform == "Ascend":
    context.set_context(device_id=args.device_id)

if __name__ == '__main__':
    cfg = set_config(args)
    set_context(cfg)
    _, _, net = define_net(cfg, args.is_training)

    load_ckpt(net, args.ckpt_file)
    input_shp = [args.batch_size, 3, cfg.image_height, cfg.image_width]
    input_array = Tensor(
        np.random.uniform(-1.0, 1.0, size=input_shp).astype(np.float32))
    export(net,
           input_array,
           file_name=args.file_name,
           file_format=args.file_format)
Exemplo n.º 5
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
mobilenetv2 export mindir.
"""
import numpy as np
from mindspore import Tensor, export
from src.config import set_config
from src.args import export_parse_args
from src.models import define_net, load_ckpt
from src.utils import set_context

if __name__ == '__main__':
    args_opt = export_parse_args()
    cfg = set_config(args_opt)
    set_context(cfg)
    _, _, net = define_net(cfg, args_opt.is_training)

    load_ckpt(net, args_opt.pretrain_ckpt)
    input_shp = [1, 3, cfg.image_height, cfg.image_width]
    input_array = Tensor(
        np.random.uniform(-1.0, 1.0, size=input_shp).astype(np.float32))
    export(net,
           input_array,
           file_name=cfg.export_file,
           file_format=cfg.export_format)
Exemplo n.º 6
0
def test_on_model(args):
    device = args.device
    if device == 'cpu':
        raise NotImplementedError("CPU training is not implemented.")
    device = torch.device(args.device)
    torch.cuda.set_device(device)

    # build model
    model = build_model(args)
    model.to(device)

    # output dir
    p_out = Path(
        args.p_out).joinpath(f"{model.name}-{args.tensorboard_exp_name}")
    if not p_out.exists():
        p_out.mkdir(exist_ok=True, parents=True)

    # dataset & loader
    test_dataset = MTTDataset(path=args.p_data, split='test')
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.n_workers,
                             pin_memory=True,
                             drop_last=False)  # not dropping last in testing
    test_steps = test_dataset.calc_steps(
        args.batch_size, drop_last=False)  # not dropping last in testing
    LOG.info(f"Total testing steps: {test_steps}")
    LOG.info(f"Testing data size: {len(test_dataset)}")

    # create loss
    loss_fn = get_loss(args.loss)
    # create metric
    metric = AUCMetric()

    # load checkpoint OR init state_dict
    if args.checkpoint is not None:
        state_dict = load_ckpt(args.checkpoint,
                               reset_epoch=args.ckpt_epoch,
                               no_scheduler=args.ckpt_no_scheduler,
                               no_optimizer=args.ckpt_no_optimizer,
                               no_loss_fn=args.ckpt_no_loss_fn,
                               map_values=args.ckpt_map_values)
        model_dict = {'model': model} if 'model' in state_dict else None
        apply_state_dict(state_dict, model=model_dict)
        best_val_loss = state_dict['val_loss']
        epoch = state_dict['epoch']
        global_i = state_dict['global_i']
        LOG.info(
            f"Checkpoint loaded. Epoch trained {epoch}, global_i {global_i}, best val {best_val_loss:.6f}"
        )
    else:
        raise AssertionError("Pre-trained checkpoint must be provided.")

    # summary writer
    writer = SummaryWriter(log_dir=p_out.as_posix(), filename_suffix='-test')

    # start testing
    model.eval()
    sigmoid = Sigmoid().to(device)
    status_col = TextColumn("")
    running_loss = 0
    if args.data_normalization:
        fetcher = DataPrefetcher(test_loader,
                                 mean=MTT_MEAN,
                                 std=MTT_STD,
                                 device=device)
    else:
        fetcher = DataPrefetcher(test_loader,
                                 mean=None,
                                 std=None,
                                 device=device)
    samples, targets = fetcher.next()

    with Progress("[progress.description]{task.description}",
                  "[{task.completed}/{task.total}]",
                  BarColumn(),
                  "[progress.percentage]{task.percentage:>3.0f}%",
                  TimeRemainingColumn(),
                  TextColumn("/"),
                  TimeElapsedColumn(),
                  status_col,
                  expand=False,
                  console=CONSOLE,
                  refresh_per_second=5) as progress:
        task = progress.add_task(description=f'[Test]', total=test_steps)
        i = 0  # counter
        t_start = time.time()

        with torch.no_grad():
            while samples is not None:
                # forward model
                logits = model(samples)
                out = sigmoid(logits)
                test_loss = loss_fn(logits, targets)

                # collect running loss
                running_loss += test_loss.item()
                i += 1
                writer.add_scalar('Test/Loss', running_loss / i, i)

                # auc metric
                metric.step(targets.cpu().numpy(), out.cpu().numpy())

                # pre-fetch next samples
                samples, targets = fetcher.next()

                if not progress.finished:
                    status_col.text_format = f"Test loss: {running_loss/i:.06f}"
                    progress.update(task, advance=1)

    auc_tag, auc_sample, ap_tag, ap_sample = metric.auc_ap_score
    LOG.info(f"Testing speed: {(time.time() - t_start)/i:.4f}s/it, "
             f"auc_tag: {auc_tag:.04f}, "
             f"auc_sample: {auc_sample:.04f}, "
             f"ap_tag: {ap_tag:.04f}, "
             f"ap_sample: {ap_sample:.04f}")
    writer.close()
    return
Exemplo n.º 7
0
def eval_on_model(args):
    device = args.device
    if device == 'cpu':
        raise NotImplementedError("CPU training is not implemented.")
    device = torch.device(args.device)
    torch.cuda.set_device(device)

    # build model
    model = build_model(args)
    model.to(device)

    # output dir
    p_out = Path(
        args.p_out).joinpath(f"{model.name}-{args.tensorboard_exp_name}")
    if not p_out.exists():
        p_out.mkdir(exist_ok=True, parents=True)

    # dataset & loader
    annotation = pd.read_csv(args.annotation_file)
    query = annotation[annotation.mp3_path.str.match('/'.join(
        args.audio_file.split('/')[-2:]))]
    assert query.shape[0] != 0, f"Cannot find the audio file: {args.audio_file}"
    # split audio info and segment audio
    threshold = args.eval_threshold
    song_info = query[query.columns.values[50:]]
    tags = query.columns.values[:50]
    labels = query[tags].values[0]
    label_names = tags[labels.astype(bool)]
    segments = _segment_audio(_load_audio(args.audio_file, sample_rate=22050),
                              n_samples=59049)
    LOG.info(f"Song info: {song_info}")
    LOG.info(f"Number of segments: {len(segments)}")
    LOG.info(f"Ground truth tags: {label_names}")
    LOG.info(f"Positive tag threshold: {threshold}")

    # create loss
    loss_fn = get_loss(args.loss)

    # load checkpoint OR init state_dict
    if args.checkpoint is not None:
        state_dict = load_ckpt(args.checkpoint,
                               reset_epoch=args.ckpt_epoch,
                               no_scheduler=args.ckpt_no_scheduler,
                               no_optimizer=args.ckpt_no_optimizer,
                               no_loss_fn=args.ckpt_no_loss_fn,
                               map_values=args.ckpt_map_values)
        model_dict = {'model': model} if 'model' in state_dict else None
        apply_state_dict(state_dict, model=model_dict)
        best_val_loss = state_dict['val_loss']
        epoch = state_dict['epoch']
        global_i = state_dict['global_i']
        LOG.info(
            f"Checkpoint loaded. Epoch trained {epoch}, global_i {global_i}, best val {best_val_loss:.6f}"
        )
    else:
        raise AssertionError("Pre-trained checkpoint must be provided.")

    # start testing
    model.eval()
    sigmoid = Sigmoid().to(device)
    t_start = time.time()

    # concatenate segments
    segments = torch.from_numpy(
        np.concatenate([seg.reshape(1, 1, -1) for seg in segments
                        ])).to(torch.float32).cuda(device=device)
    targets = torch.from_numpy(np.concatenate(
        [labels.reshape(1, -1)] * 10)).to(torch.float32).cuda(device=device)

    # forward pass
    with torch.no_grad():
        logits = model(segments)
        out = sigmoid(logits)
        loss = loss_fn(logits, targets)

    out = out.cpu().numpy()
    out[out > threshold] = 1
    out[out <= threshold] = 0
    out = np.sum(out, axis=0)
    res = pd.DataFrame(data={'tags': tags, 'freq': out})
    res = res[res.freq != 0].sort_values(by='freq', ascending=False)
    CONSOLE.print(res)

    LOG.info(f"Testing speed: {time.time() - t_start:.4f}s, "
             f"loss: {loss.item()}, ")
    return
Exemplo n.º 8
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===========================================================================
"""DSCNN export."""
import argparse

import numpy as np
from mindspore import Tensor
from mindspore.train.serialization import export

from src.config import eval_config
from src.ds_cnn import DSCNN
from src.models import load_ckpt

parser = argparse.ArgumentParser()

args, model_settings = eval_config(parser)
network = DSCNN(model_settings, args.model_size_info)
load_ckpt(network, args.model_dir, False)
x = np.random.uniform(0.0,
                      1.0,
                      size=[
                          1, 1, model_settings['spectrogram_length'],
                          model_settings['dct_coefficient_count']
                      ]).astype(np.float32)
export(network,
       Tensor(x),
       file_name=args.model_dir.replace('.ckpt', '.air'),
       file_format='AIR')
Exemplo n.º 9
0
def train_on_model(args):
    if args.device == 'cpu':
        raise NotImplementedError("CPU training is not implemented.")
    device = torch.device(args.device)
    torch.cuda.set_device(device)

    # build model
    model = build_model(args)
    model.to(device)

    # output dir
    p_out = Path(args.p_out).joinpath(f"{model.name}-{args.tensorboard_exp_name}")
    if not p_out.exists():
        p_out.mkdir(exist_ok=True, parents=True)

    # dataset & loader
    train_dataset = MTTDataset(path=args.p_data, split='train')
    val_dataset = MTTDataset(path=args.p_data, split='val')
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.n_workers,
                              pin_memory=True,
                              drop_last=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=args.n_workers,
                            pin_memory=True,
                            drop_last=True)
    train_steps = train_dataset.calc_steps(args.batch_size)
    val_steps = val_dataset.calc_steps(args.batch_size)
    if args.data_normalization:
        normalize = (MTT_MEAN, MTT_STD)
        LOG.info("Data normalization [bold cyan]on[/]")
    else:
        normalize = None
    LOG.info(f"Total training steps: {train_steps}")
    LOG.info(f"Total validation steps: {val_steps}")
    LOG.info(f"Training data size: {len(train_dataset)}")
    LOG.info(f"Validation data size: {len(val_dataset)}")

    # create optimizer
    optim = get_optimizer(model.parameters(), args=args)

    # create loss
    loss_fn = get_loss(args.loss)

    # creating scheduler
    scheduler_plateau = ReduceLROnPlateau(optim,
                                          factor=args.lr_decay_plateau,
                                          patience=args.plateau_patience,
                                          min_lr=args.min_lr,
                                          verbose=True,
                                          prefix="[Scheduler Plateau] ",
                                          logger=LOG)
    scheduler_es = EarlyStopping(patience=args.early_stop_patience,
                                 min_delta=args.early_stop_delta,
                                 verbose=True,
                                 prefix="[Scheduler Early Stop] ",
                                 logger=LOG)

    # load checkpoint OR init state_dict
    if args.checkpoint is not None:
        state_dict = load_ckpt(args.checkpoint,
                               reset_epoch=args.ckpt_epoch,
                               no_scheduler=args.ckpt_no_scheduler,
                               no_optimizer=args.ckpt_no_optimizer,
                               no_loss_fn=args.ckpt_no_loss_fn,
                               map_values=args.ckpt_map_values)
        model_dict = {'model': model} if 'model' in state_dict else None
        optim_dict = {'optim': optim} if 'optim' in state_dict else None
        loss_fn_dict = {'loss_fn': loss_fn} if 'loss_fn' in state_dict else None
        scheduler_dict = {'scheduler_plateau': scheduler_plateau} \
            if 'scheduler_plateau' in state_dict else None
        apply_state_dict(state_dict,
                         model=model_dict,
                         optimizer=optim_dict,
                         loss_fn=loss_fn_dict,
                         scheduler=scheduler_dict)
        best_val_loss = state_dict['val_loss']
        epoch = state_dict['epoch']
        global_i = state_dict['global_i']
        LOG.info(f"Checkpoint loaded. Epoch trained {epoch}, global_i {global_i}, best val {best_val_loss:.6f}")
    else:
        # fresh training
        best_val_loss = 9999
        epoch = 0
        global_i = 0

    # tensorboard
    purge_step = None if global_i == 0 else global_i
    writer = SummaryWriter(log_dir=VAR
                           .log
                           .joinpath('tensorboard')
                           .joinpath(f"{model.name}-{args.tensorboard_exp_name}")
                           .as_posix(),
                           purge_step=purge_step,
                           filename_suffix='-train')

    # train model for epochs
    assert epoch < args.max_epoch, "Initial epoch value must be smaller than max_epoch in order to train model"
    for i in range(epoch, args.max_epoch):

        # train
        init_lr = optim.param_groups[0]['lr']
        train_loss, global_i = train_one_epoch(model, optim, loss_fn, train_loader,
                                               epoch+1, train_steps, device, writer, global_i,
                                               writer_interval=args.tensorboard_interval,
                                               normalize=normalize)

        # validate
        val_loss = evaluate(model, loss_fn, val_loader, epoch+1, val_steps, device, normalize=normalize)
        writer.add_scalar('Loss/Val', val_loss, global_i)

        epoch += 1

        # update scheduler
        scheduler_plateau.step(val_loss)
        scheduler_es.step(val_loss)

        # save checkpoint
        if optim.param_groups[0]['lr'] != init_lr:
            LOG.info(f"Saving [red bold]checkpoint[/] at epoch {epoch}, model saved to {p_out.as_posix()}")
            torch.save({
                'model': model.state_dict(),
                'optim': optim.state_dict(),
                'loss_fn': loss_fn.state_dict(),
                'scheduler_plateau': scheduler_plateau.state_dict(),
                'scheduler_es': scheduler_es.state_dict(),
                'epoch': epoch,
                'loss': train_loss,
                'val_loss': val_loss,
                'p_out': p_out,
                'global_i': global_i
            },
                p_out.joinpath(f'ckpt@epoch-{epoch:03d}-loss-{val_loss:.6f}.tar').as_posix())

        # save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            LOG.info(f"New [red bold]best[/] validation loss {val_loss:.6f}, model saved to {p_out.as_posix()}")
            torch.save({
                'model': model.state_dict(),
                'optim': optim.state_dict(),
                'loss_fn': loss_fn.state_dict(),
                'scheduler_plateau': scheduler_plateau.state_dict(),
                'scheduler_es': scheduler_es.state_dict(),
                'epoch': epoch,
                'loss': train_loss,
                'val_loss': val_loss,
                'p_out': p_out,
                'global_i': global_i
            },
                p_out.joinpath(f'best@epoch-{epoch:03d}-loss-{val_loss:.6f}.tar').as_posix())

        # save latest model
        else:
            torch.save({
                'model': model.state_dict(),
                'optim': optim.state_dict(),
                'loss_fn': loss_fn.state_dict(),
                'scheduler_plateau': scheduler_plateau.state_dict(),
                'scheduler_es': scheduler_es.state_dict(),
                'epoch': epoch,
                'loss': train_loss,
                'val_loss': val_loss,
                'p_out': p_out,
                'global_i': global_i
            },
                p_out.joinpath(f'latest.tar').as_posix())

        # early stop, if enabled
        if scheduler_es.early_stop:
            break

        # if load optimal model when lr changed
        if optim.param_groups[0]['lr'] != init_lr and args.load_optimal_on_plateau:
            # save lr before restoring
            cur_lr = [param_group['lr'] for param_group in optim.param_groups]
            # restore last best model
            state_dict = find_optimal_model(p_out)
            apply_state_dict(state_dict,
                             model={'model': model},
                             optimizer={'optim': optim},
                             loss_fn=None,
                             scheduler=None)
            apply_lr(optim, cur_lr)
            # reset global_i
            global_i = state_dict['global_i']
            epoch = state_dict['epoch']
            LOG.info(f"Best model (val loss {state_dict['val_loss']}) applied. Roll back to epoch {epoch}")
            # reset tensorboard writer
            writer.close()
            writer = SummaryWriter(log_dir=VAR
                                   .log
                                   .joinpath('tensorboard')
                                   .joinpath(f"{model.name}-{args.tensorboard_exp_name}")
                                   .as_posix(),
                                   purge_step=global_i,
                                   filename_suffix='-train')

    # close tensorboard
    writer.close()
    return