def main(args): set_seed(args.seed) loader = {} train_dataset = WaveTrainDataset(args.wav_root, args.train_json_path) valid_dataset = WaveTrainDataset(args.wav_root, args.valid_json_path) print("Training dataset includes {} samples.".format(len(train_dataset))) print("Valid dataset includes {} samples.".format(len(valid_dataset))) loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) loader['valid'] = TrainDataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False) model = ConvTasNet(args.n_basis, args.kernel_size, stride=args.stride, enc_basis=args.enc_basis, dec_basis=args.dec_basis, enc_nonlinear=args.enc_nonlinear, window_fn=args.window_fn, sep_hidden_channels=args.sep_hidden_channels, sep_bottleneck_channels=args.sep_bottleneck_channels, sep_skip_channels=args.sep_skip_channels, sep_kernel_size=args.sep_kernel_size, sep_num_blocks=args.sep_num_blocks, sep_num_layers=args.sep_num_layers, dilated=args.dilated, separable=args.separable, causal=args.causal, sep_nonlinear=args.sep_nonlinear, sep_norm=args.sep_norm, mask_nonlinear=args.mask_nonlinear, n_sources=args.n_sources) print(model) print("# Parameters: {}".format(model.num_parameters)) if args.use_cuda: if torch.cuda.is_available(): model.cuda() model = nn.DataParallel(model) print("Use CUDA") else: raise ValueError("Cannot use CUDA.") else: print("Does NOT use CUDA") # Optimizer if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("Not support optimizer {}".format(args.optimizer)) # Criterion if args.criterion == 'sisdr': criterion = NegSISDR() else: raise ValueError("Not support criterion {}".format(args.criterion)) pit_criterion = PIT1d(criterion, n_sources=args.n_sources) trainer = Trainer(model, loader, pit_criterion, optimizer, args) trainer.run()
def main(args): set_seed(args.seed) samples = int(args.sr * args.duration) overlap = 0 max_samples = int(args.sr * args.valid_duration) train_dataset = IdealMaskSpectrogramTrainDataset(args.train_wav_root, args.train_list_path, fft_size=args.fft_size, hop_size=args.hop_size, window_fn=args.window_fn, mask_type=args.ideal_mask, threshold=args.threshold, samples=samples, overlap=overlap, n_sources=args.n_sources) valid_dataset = IdealMaskSpectrogramEvalDataset(args.valid_wav_root, args.valid_list_path, fft_size=args.fft_size, hop_size=args.hop_size, window_fn=args.window_fn, mask_type=args.ideal_mask, threshold=args.threshold, max_samples=max_samples, n_sources=args.n_sources) print("Training dataset includes {} samples.".format(len(train_dataset))) print("Valid dataset includes {} samples.".format(len(valid_dataset))) loader = {} loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) loader['valid'] = EvalDataLoader(valid_dataset, batch_size=1, shuffle=False) if args.max_norm is not None and args.max_norm == 0: args.max_norm = None args.n_bins = args.fft_size//2 + 1 model = DANet(args.n_bins, embed_dim=args.embed_dim, hidden_channels=args.hidden_channels, num_blocks=args.num_blocks, causal=args.causal, mask_nonlinear=args.mask_nonlinear, iter_clustering=args.iter_clustering) print(model) print("# Parameters: {}".format(model.num_parameters)) if args.use_cuda: if torch.cuda.is_available(): model.cuda() model = nn.DataParallel(model) print("Use CUDA") else: raise ValueError("Cannot use CUDA.") else: print("Does NOT use CUDA") # Optimizer if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("Not support optimizer {}".format(args.optimizer)) # Criterion if args.criterion == 'l1loss': criterion = L1Loss(dim=(2,3), reduction='mean') # (batch_size, n_sources, n_bins, n_frames) elif args.criterion == 'l2loss': criterion = L2Loss(dim=(2,3), reduction='mean') # (batch_size, n_sources, n_bins, n_frames) else: raise ValueError("Not support criterion {}".format(args.criterion)) trainer = AdhocTrainer(model, loader, criterion, optimizer, args) trainer.run()
def main(args): set_seed(args.seed) loader = {} train_dataset = ThresholdWeightSpectrogramTrainDataset( args.wav_root, args.train_json_path, fft_size=args.fft_size, hop_size=args.hop_size, window_fn=args.window_fn, threshold=args.threshold) valid_dataset = ThresholdWeightSpectrogramTrainDataset( args.wav_root, args.valid_json_path, fft_size=args.fft_size, hop_size=args.hop_size, window_fn=args.window_fn, threshold=args.threshold) print("Training dataset includes {} samples.".format(len(train_dataset))) print("Valid dataset includes {} samples.".format(len(valid_dataset))) loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) loader['valid'] = TrainDataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False) args.F_bin = args.fft_size // 2 + 1 model = ADANet(args.F_bin, embed_dim=args.embed_dim, hidden_channels=args.hidden_channels, num_blocks=args.num_blocks, n_anchors=args.n_anchors, causal=args.causal, mask_nonlinear=args.mask_nonlinear) print(model) print("# Parameters: {}".format(model.num_parameters)) if args.use_cuda: if torch.cuda.is_available(): model.cuda() model = nn.DataParallel(model) print("Use CUDA") else: raise ValueError("Cannot use CUDA.") else: print("Does NOT use CUDA") # Optimizer if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("Not support optimizer {}".format(args.optimizer)) # Criterion if args.criterion == 'l2loss': criterion = L2Loss( dim=(2, 3), reduction='mean') # (batch_size, n_sources, F_bin, T_bin) else: raise ValueError("Not support criterion {}".format(args.criterion)) trainer = AnchoredAttractorTrainer(model, loader, criterion, optimizer, args) trainer.run()
def main(args): set_seed(args.seed) samples = int(args.sr * args.duration) overlap = samples // 2 max_samples = int(args.sr * args.valid_duration) train_dataset = WaveTrainDataset(args.train_wav_root, args.train_list_path, samples=samples, overlap=overlap, n_sources=args.n_sources) valid_dataset = WaveEvalDataset(args.valid_wav_root, args.valid_list_path, max_samples=max_samples, n_sources=args.n_sources) print("Training dataset includes {} samples.".format(len(train_dataset))) print("Valid dataset includes {} samples.".format(len(valid_dataset))) loader = {} loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) loader['valid'] = EvalDataLoader(valid_dataset, batch_size=1, shuffle=False) if not args.enc_nonlinear: args.enc_nonlinear = None if args.max_norm is not None and args.max_norm == 0: args.max_norm = None model = DPRNNTasNet(args.n_bases, args.kernel_size, stride=args.stride, enc_bases=args.enc_bases, dec_bases=args.dec_bases, enc_nonlinear=args.enc_nonlinear, window_fn=args.window_fn, sep_hidden_channels=args.sep_hidden_channels, sep_bottleneck_channels=args.sep_bottleneck_channels, sep_chunk_size=args.sep_chunk_size, sep_hop_size=args.sep_hop_size, sep_num_blocks=args.sep_num_blocks, causal=args.causal, sep_norm=args.sep_norm, mask_nonlinear=args.mask_nonlinear, n_sources=args.n_sources) print(model) print("# Parameters: {}".format(model.num_parameters)) if args.use_cuda: if torch.cuda.is_available(): model.cuda() model = nn.DataParallel(model) print("Use CUDA") else: raise ValueError("Cannot use CUDA.") else: print("Does NOT use CUDA") # Optimizer if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("Not support optimizer {}".format(args.optimizer)) # Criterion if args.criterion == 'sisdr': criterion = NegSISDR() else: raise ValueError("Not support criterion {}".format(args.criterion)) pit_criterion = PIT1d(criterion, n_sources=args.n_sources) trainer = AdhocTrainer(model, loader, pit_criterion, optimizer, args) trainer.run()
def main(args): set_seed(args.seed) sources = [ source for source in args.sources.replace('[', '').replace(']', '').split(',') ] args.n_sources = len(sources) samples = int(args.sr * args.duration) overlap = samples // 2 max_samples = int(args.sr * args.valid_duration) train_dataset = WaveTrainDataset(args.dsd100_root, sources, args.sr, samples, overlap=overlap, n_train=40) valid_dataset = WaveEvalDataset(args.dsd100_root, sources, args.sr, max_samples, n_train=40) print("Training dataset includes {} samples.".format(len(train_dataset))) print("Valid dataset includes {} samples.".format(len(valid_dataset))) loader = {} loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) loader['valid'] = EvalDataLoader(valid_dataset, batch_size=1, shuffle=False) model = ConvTasNet(args.n_bases, args.kernel_size, stride=args.stride, enc_bases=args.enc_bases, dec_bases=args.dec_bases, enc_nonlinear=args.enc_nonlinear, window_fn=args.window_fn, sep_hidden_channels=args.sep_hidden_channels, sep_bottleneck_channels=args.sep_bottleneck_channels, sep_skip_channels=args.sep_skip_channels, sep_kernel_size=args.sep_kernel_size, sep_num_blocks=args.sep_num_blocks, sep_num_layers=args.sep_num_layers, dilated=args.dilated, separable=args.separable, causal=args.causal, sep_nonlinear=args.sep_nonlinear, sep_norm=args.sep_norm, mask_nonlinear=args.mask_nonlinear, n_sources=args.n_sources) print(model) print("# Parameters: {}".format(model.num_parameters)) if args.use_cuda: if torch.cuda.is_available(): model.cuda() model = nn.DataParallel(model) print("Use CUDA") else: raise ValueError("Cannot use CUDA.") else: print("Does NOT use CUDA") # Optimizer if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("Not support optimizer {}".format(args.optimizer)) # Criterion if args.criterion == 'sisdr': criterion = NegSISDR() else: raise ValueError("Not support criterion {}".format(args.criterion)) trainer = Trainer(model, loader, criterion, optimizer, args) trainer.run()
def main(args): if args.checkpoint == '': args.checkpoint = "checkpoints/ic17_%s_bs_%d_ep_%d" % ( args.arch, args.batch_size, args.n_epoch) if args.pretrain: if 'synth' in args.pretrain: args.checkpoint += "_pretrain_synth" else: args.checkpoint += "_pretrain_ic17" print(('checkpoint path: %s' % args.checkpoint)) print(('init lr: %.8f' % args.lr)) print(('schedule: ', args.schedule)) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) kernel_num = 7 min_scale = 0.4 start_epoch = 0 data_loader = TrainDataLoader(input_dir=args.input_dir, is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale) train_loader = torch.utils.data.DataLoader(data_loader, batch_size=args.batch_size, shuffle=True, num_workers=3, drop_last=True, pin_memory=True) if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=kernel_num) model = torch.nn.DataParallel(model).cuda() if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) title = 'icdar2017' if args.pretrain: print('Using pretrained model.') assert os.path.isfile( args.pretrain), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.pretrain) model.load_state_dict(checkpoint['state_dict']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) elif args.resume: print('Resuming from checkpoint.') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print('Training from scratch.') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) for epoch in range(start_epoch, args.n_epoch): adjust_learning_rate(args, optimizer, epoch) print(('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))) train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train( train_loader, model, dice_loss, optimizer, epoch) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'lr': args.lr, 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.append([ optimizer.param_groups[0]['lr'], train_loss, train_te_acc, train_te_iou ]) logger.close()
def main(args): set_seed(111) train_dataset = TrainDataset(args.train_image_root, args.train_path, H=args.H, W=args.W, R=args.R, G=args.G) print("Training dataset includes {} images.".format(len(train_dataset))) valid_dataset = TrainDataset(args.valid_image_root, args.valid_path, H=args.H, W=args.W, R=args.R, G=args.G) print("Validation dataset includes {} images.".format(len(valid_dataset))) loader = {} loader['train'] = TrainDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) loader['valid'] = TrainDataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False) C = 3 channels = args.channels.replace('[','').replace(']','').split(',') channels_backbone = [ int(channel) for channel in channels ] logR = int(math.log2(args.R)) channels_down = [C] for r in range(logR//2): channel = channels_backbone[0]//(logR//2 - r) channels_down.append(channel) downsample_net = DownsampleNetBase(channels_down, kernel_size=args.K_down, stride=args.S_down, pool=args.pool_down) backbone = UNet2d(channels_backbone, kernel_size=args.K_backbone, stride=args.S_backbone, dilated=args.dilated, separable=args.separable, nonlinear_enc=args.nonlinear_backbone, nonlinear_dec=args.nonlinear_backbone) head_list = ['heatmap', 'local_offset', 'size'] head_modules = { 'heatmap': HeatmapNetBase(channels_backbone[0]), 'local_offset': LocalOffsetNetBase(channels_backbone[0]), 'size': SizeNetBase(channels_backbone[0]) } head_net = ObjectDetectionNetBase(head_modules=head_modules) model = CenterNet(downsample_net, backbone, head_net) print(model, flush=True) print("# Parameters:", model.num_parameters) if torch.cuda.is_available(): model.cuda() model = nn.DataParallel(model) print("Use CUDA") else: print("Does NOT use CUDA") # Optimizer if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) else: raise ValueError("Not support optimizer {}".format(args.optimizer)) # Criterion importance = args.importance.replace('[','').replace(']','').split(',') importance = { head_key: float(importance[idx]) for idx, head_key in enumerate(head_list) } criterion = ObjectDetectionLoss(importance, args.heatmap_loss) trainer = Trainer(model, loader, criterion, optimizer, args) trainer.run()