def main(args):
    set_seed(args.seed)
    
    loader = {}
    
    train_dataset = WaveTrainDataset(args.wav_root, args.train_json_path)
    valid_dataset = WaveTrainDataset(args.wav_root, args.valid_json_path)
    print("Training dataset includes {} samples.".format(len(train_dataset)))
    print("Valid dataset includes {} samples.".format(len(valid_dataset)))
    
    loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    loader['valid'] = TrainDataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False)
    
    model = ConvTasNet(args.n_basis, args.kernel_size, stride=args.stride, enc_basis=args.enc_basis, dec_basis=args.dec_basis, enc_nonlinear=args.enc_nonlinear, window_fn=args.window_fn, sep_hidden_channels=args.sep_hidden_channels, sep_bottleneck_channels=args.sep_bottleneck_channels, sep_skip_channels=args.sep_skip_channels, sep_kernel_size=args.sep_kernel_size, sep_num_blocks=args.sep_num_blocks, sep_num_layers=args.sep_num_layers, dilated=args.dilated, separable=args.separable, causal=args.causal, sep_nonlinear=args.sep_nonlinear, sep_norm=args.sep_norm, mask_nonlinear=args.mask_nonlinear, n_sources=args.n_sources)
    print(model)
    print("# Parameters: {}".format(model.num_parameters))
    
    if args.use_cuda:
        if torch.cuda.is_available():
            model.cuda()
            model = nn.DataParallel(model)
            print("Use CUDA")
        else:
            raise ValueError("Cannot use CUDA.")
    else:
        print("Does NOT use CUDA")
        
    # Optimizer
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    else:
        raise ValueError("Not support optimizer {}".format(args.optimizer))
    
    # Criterion
    if args.criterion == 'sisdr':
        criterion = NegSISDR()
    else:
        raise ValueError("Not support criterion {}".format(args.criterion))
    
    pit_criterion = PIT1d(criterion, n_sources=args.n_sources)
    
    trainer = Trainer(model, loader, pit_criterion, optimizer, args)
    trainer.run()
def main(args):
    set_seed(args.seed)
    
    samples = int(args.sr * args.duration)
    overlap = 0
    max_samples = int(args.sr * args.valid_duration)

    train_dataset = IdealMaskSpectrogramTrainDataset(args.train_wav_root, args.train_list_path, fft_size=args.fft_size, hop_size=args.hop_size, window_fn=args.window_fn, mask_type=args.ideal_mask, threshold=args.threshold, samples=samples, overlap=overlap, n_sources=args.n_sources)
    valid_dataset = IdealMaskSpectrogramEvalDataset(args.valid_wav_root, args.valid_list_path, fft_size=args.fft_size, hop_size=args.hop_size, window_fn=args.window_fn, mask_type=args.ideal_mask, threshold=args.threshold, max_samples=max_samples, n_sources=args.n_sources)
    print("Training dataset includes {} samples.".format(len(train_dataset)))
    print("Valid dataset includes {} samples.".format(len(valid_dataset)))
    
    loader = {}
    loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    loader['valid'] = EvalDataLoader(valid_dataset, batch_size=1, shuffle=False)
    
    if args.max_norm is not None and args.max_norm == 0:
        args.max_norm = None
    
    args.n_bins = args.fft_size//2 + 1
    model = DANet(args.n_bins, embed_dim=args.embed_dim, hidden_channels=args.hidden_channels, num_blocks=args.num_blocks, causal=args.causal, mask_nonlinear=args.mask_nonlinear, iter_clustering=args.iter_clustering)
    print(model)
    print("# Parameters: {}".format(model.num_parameters))
    
    if args.use_cuda:
        if torch.cuda.is_available():
            model.cuda()
            model = nn.DataParallel(model)
            print("Use CUDA")
        else:
            raise ValueError("Cannot use CUDA.")
    else:
        print("Does NOT use CUDA")
        
    # Optimizer
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    else:
        raise ValueError("Not support optimizer {}".format(args.optimizer))
    
    # Criterion
    if args.criterion == 'l1loss':
        criterion = L1Loss(dim=(2,3), reduction='mean') # (batch_size, n_sources, n_bins, n_frames)
    elif args.criterion == 'l2loss':
        criterion = L2Loss(dim=(2,3), reduction='mean') # (batch_size, n_sources, n_bins, n_frames)
    else:
        raise ValueError("Not support criterion {}".format(args.criterion))
    
    trainer = AdhocTrainer(model, loader, criterion, optimizer, args)
    trainer.run()
def main(args):
    set_seed(args.seed)

    loader = {}

    train_dataset = ThresholdWeightSpectrogramTrainDataset(
        args.wav_root,
        args.train_json_path,
        fft_size=args.fft_size,
        hop_size=args.hop_size,
        window_fn=args.window_fn,
        threshold=args.threshold)
    valid_dataset = ThresholdWeightSpectrogramTrainDataset(
        args.wav_root,
        args.valid_json_path,
        fft_size=args.fft_size,
        hop_size=args.hop_size,
        window_fn=args.window_fn,
        threshold=args.threshold)
    print("Training dataset includes {} samples.".format(len(train_dataset)))
    print("Valid dataset includes {} samples.".format(len(valid_dataset)))

    loader['train'] = TrainDataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True)
    loader['valid'] = TrainDataLoader(valid_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=False)

    args.F_bin = args.fft_size // 2 + 1
    model = ADANet(args.F_bin,
                   embed_dim=args.embed_dim,
                   hidden_channels=args.hidden_channels,
                   num_blocks=args.num_blocks,
                   n_anchors=args.n_anchors,
                   causal=args.causal,
                   mask_nonlinear=args.mask_nonlinear)
    print(model)
    print("# Parameters: {}".format(model.num_parameters))

    if args.use_cuda:
        if torch.cuda.is_available():
            model.cuda()
            model = nn.DataParallel(model)
            print("Use CUDA")
        else:
            raise ValueError("Cannot use CUDA.")
    else:
        print("Does NOT use CUDA")

    # Optimizer
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=args.lr,
                                        weight_decay=args.weight_decay)
    else:
        raise ValueError("Not support optimizer {}".format(args.optimizer))

    # Criterion
    if args.criterion == 'l2loss':
        criterion = L2Loss(
            dim=(2, 3),
            reduction='mean')  # (batch_size, n_sources, F_bin, T_bin)
    else:
        raise ValueError("Not support criterion {}".format(args.criterion))

    trainer = AnchoredAttractorTrainer(model, loader, criterion, optimizer,
                                       args)
    trainer.run()
示例#4
0
def main(args):
    set_seed(args.seed)

    samples = int(args.sr * args.duration)
    overlap = samples // 2
    max_samples = int(args.sr * args.valid_duration)

    train_dataset = WaveTrainDataset(args.train_wav_root,
                                     args.train_list_path,
                                     samples=samples,
                                     overlap=overlap,
                                     n_sources=args.n_sources)
    valid_dataset = WaveEvalDataset(args.valid_wav_root,
                                    args.valid_list_path,
                                    max_samples=max_samples,
                                    n_sources=args.n_sources)
    print("Training dataset includes {} samples.".format(len(train_dataset)))
    print("Valid dataset includes {} samples.".format(len(valid_dataset)))

    loader = {}
    loader['train'] = TrainDataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True)
    loader['valid'] = EvalDataLoader(valid_dataset,
                                     batch_size=1,
                                     shuffle=False)

    if not args.enc_nonlinear:
        args.enc_nonlinear = None
    if args.max_norm is not None and args.max_norm == 0:
        args.max_norm = None
    model = DPRNNTasNet(args.n_bases,
                        args.kernel_size,
                        stride=args.stride,
                        enc_bases=args.enc_bases,
                        dec_bases=args.dec_bases,
                        enc_nonlinear=args.enc_nonlinear,
                        window_fn=args.window_fn,
                        sep_hidden_channels=args.sep_hidden_channels,
                        sep_bottleneck_channels=args.sep_bottleneck_channels,
                        sep_chunk_size=args.sep_chunk_size,
                        sep_hop_size=args.sep_hop_size,
                        sep_num_blocks=args.sep_num_blocks,
                        causal=args.causal,
                        sep_norm=args.sep_norm,
                        mask_nonlinear=args.mask_nonlinear,
                        n_sources=args.n_sources)
    print(model)
    print("# Parameters: {}".format(model.num_parameters))

    if args.use_cuda:
        if torch.cuda.is_available():
            model.cuda()
            model = nn.DataParallel(model)
            print("Use CUDA")
        else:
            raise ValueError("Cannot use CUDA.")
    else:
        print("Does NOT use CUDA")

    # Optimizer
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=args.lr,
                                        weight_decay=args.weight_decay)
    else:
        raise ValueError("Not support optimizer {}".format(args.optimizer))

    # Criterion
    if args.criterion == 'sisdr':
        criterion = NegSISDR()
    else:
        raise ValueError("Not support criterion {}".format(args.criterion))

    pit_criterion = PIT1d(criterion, n_sources=args.n_sources)

    trainer = AdhocTrainer(model, loader, pit_criterion, optimizer, args)
    trainer.run()
def main(args):
    set_seed(args.seed)

    sources = [
        source
        for source in args.sources.replace('[', '').replace(']', '').split(',')
    ]
    args.n_sources = len(sources)

    samples = int(args.sr * args.duration)
    overlap = samples // 2
    max_samples = int(args.sr * args.valid_duration)

    train_dataset = WaveTrainDataset(args.dsd100_root,
                                     sources,
                                     args.sr,
                                     samples,
                                     overlap=overlap,
                                     n_train=40)
    valid_dataset = WaveEvalDataset(args.dsd100_root,
                                    sources,
                                    args.sr,
                                    max_samples,
                                    n_train=40)
    print("Training dataset includes {} samples.".format(len(train_dataset)))
    print("Valid dataset includes {} samples.".format(len(valid_dataset)))

    loader = {}
    loader['train'] = TrainDataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True)
    loader['valid'] = EvalDataLoader(valid_dataset,
                                     batch_size=1,
                                     shuffle=False)

    model = ConvTasNet(args.n_bases,
                       args.kernel_size,
                       stride=args.stride,
                       enc_bases=args.enc_bases,
                       dec_bases=args.dec_bases,
                       enc_nonlinear=args.enc_nonlinear,
                       window_fn=args.window_fn,
                       sep_hidden_channels=args.sep_hidden_channels,
                       sep_bottleneck_channels=args.sep_bottleneck_channels,
                       sep_skip_channels=args.sep_skip_channels,
                       sep_kernel_size=args.sep_kernel_size,
                       sep_num_blocks=args.sep_num_blocks,
                       sep_num_layers=args.sep_num_layers,
                       dilated=args.dilated,
                       separable=args.separable,
                       causal=args.causal,
                       sep_nonlinear=args.sep_nonlinear,
                       sep_norm=args.sep_norm,
                       mask_nonlinear=args.mask_nonlinear,
                       n_sources=args.n_sources)
    print(model)
    print("# Parameters: {}".format(model.num_parameters))

    if args.use_cuda:
        if torch.cuda.is_available():
            model.cuda()
            model = nn.DataParallel(model)
            print("Use CUDA")
        else:
            raise ValueError("Cannot use CUDA.")
    else:
        print("Does NOT use CUDA")

    # Optimizer
    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=args.lr,
                                        weight_decay=args.weight_decay)
    else:
        raise ValueError("Not support optimizer {}".format(args.optimizer))

    # Criterion
    if args.criterion == 'sisdr':
        criterion = NegSISDR()
    else:
        raise ValueError("Not support criterion {}".format(args.criterion))

    trainer = Trainer(model, loader, criterion, optimizer, args)
    trainer.run()
示例#6
0
def main(args):
    if args.checkpoint == '':
        args.checkpoint = "checkpoints/ic17_%s_bs_%d_ep_%d" % (
            args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_ic17"

    print(('checkpoint path: %s' % args.checkpoint))
    print(('init lr: %.8f' % args.lr))
    print(('schedule: ', args.schedule))
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = TrainDataLoader(input_dir=args.input_dir,
                                  is_transform=True,
                                  img_size=args.img_size,
                                  kernel_num=kernel_num,
                                  min_scale=min_scale)
    train_loader = torch.utils.data.DataLoader(data_loader,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               drop_last=True,
                                               pin_memory=True)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)

    model = torch.nn.DataParallel(model).cuda()

    if hasattr(model.module, 'optimizer'):
        optimizer = model.module.optimizer
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=0.99,
                                    weight_decay=5e-4)

    title = 'icdar2017'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.pretrain)
        model.load_state_dict(checkpoint['state_dict'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    for epoch in range(start_epoch, args.n_epoch):
        adjust_learning_rate(args, optimizer, epoch)
        print(('\nEpoch: [%d | %d] LR: %f' %
               (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr'])))

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(
            train_loader, model, dice_loss, optimizer, epoch)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'lr': args.lr,
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

        logger.append([
            optimizer.param_groups[0]['lr'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
示例#7
0
def main(args):
    set_seed(111)

    train_dataset = TrainDataset(args.train_image_root, args.train_path, H=args.H, W=args.W, R=args.R, G=args.G)
    print("Training dataset includes {} images.".format(len(train_dataset)))
    valid_dataset = TrainDataset(args.valid_image_root, args.valid_path, H=args.H, W=args.W, R=args.R, G=args.G)
    print("Validation dataset includes {} images.".format(len(valid_dataset)))
    
    loader = {}
    loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    loader['valid'] = TrainDataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False)

    C = 3
    channels = args.channels.replace('[','').replace(']','').split(',')
    channels_backbone = [
        int(channel) for channel in channels
    ]
    logR = int(math.log2(args.R))
    
    channels_down = [C]
    
    for r in range(logR//2):
        channel = channels_backbone[0]//(logR//2 - r)
        channels_down.append(channel)

    downsample_net = DownsampleNetBase(channels_down, kernel_size=args.K_down, stride=args.S_down, pool=args.pool_down)
    
    backbone = UNet2d(channels_backbone, kernel_size=args.K_backbone, stride=args.S_backbone, dilated=args.dilated, separable=args.separable, nonlinear_enc=args.nonlinear_backbone, nonlinear_dec=args.nonlinear_backbone)
    
    head_list = ['heatmap', 'local_offset', 'size']
    head_modules = {
        'heatmap': HeatmapNetBase(channels_backbone[0]),
        'local_offset': LocalOffsetNetBase(channels_backbone[0]),
        'size': SizeNetBase(channels_backbone[0])
    }
    
    head_net = ObjectDetectionNetBase(head_modules=head_modules)

    model = CenterNet(downsample_net, backbone, head_net)
    print(model, flush=True)
    print("# Parameters:", model.num_parameters)
    
    if torch.cuda.is_available():
        model.cuda()
        model = nn.DataParallel(model)
        print("Use CUDA")
    else:
        print("Does NOT use CUDA")
        
    # Optimizer
    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    else:
        raise ValueError("Not support optimizer {}".format(args.optimizer))
        
    # Criterion
    importance = args.importance.replace('[','').replace(']','').split(',')
    importance = {
        head_key: float(importance[idx]) for idx, head_key in enumerate(head_list)
    }
    criterion = ObjectDetectionLoss(importance, args.heatmap_loss)
        
    trainer = Trainer(model, loader, criterion, optimizer, args)
    trainer.run()