def generate(parser): parser.add_argument("--val", dest="val", action='store_true') parser.add_argument("-c", dest="config", type=str) args = parser.parse_args() config = read_json(args.config) # We load a dummy data loader for post-processing transform_config = config['transform_config'] loader_config = config['loader_config'] processor = AudioProcessor(**transform_config) postprocess = processor.get_postprocessor() assert os.path.exists(args.outdir), "Output path does not exist" # Create output evaluation dir trval = 'val' if args.val else 'train' output_dir = mkdir_in_path(args.outdir, f"true_sample_{config['name']}") output_dir = mkdir_in_path( output_dir, f"{trval}_{args.n_gen}_{datetime.now().strftime('%Y-%m-%d_%H_%M')}") dbname = loader_config['dbname'] loader = get_data_loader(dbname)(name=dbname + '_' + transform_config['transform'], preprocessing=processor, **loader_config) if args.val: data, _ = loader.get_validation_set(args.n_gen) else: data = random.sample(loader.data, k=args.n_gen) audio_out = map(postprocess, data) saveAudioBatch(audio_out, path=output_dir, basename='true_sample', sr=config["transform_config"]["sample_rate"]) print("FINISHED!\n")
def generate(parser): args = parser.parse_args() device = get_device() model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.noiseVectorDim transform_config = config['transform_config'] loader_config = config['loader_config'] # We load a dummy data loader for post-processing processor = AudioProcessor(**transform_config) dbname = loader_config['dbname'] loader_config["criteria"]["size"] = 1000 loader = get_data_loader(dbname)( name=dbname + '_' + transform_config['transform'], preprocessing=processor, **loader_config) label = torch.Tensor(random.sample(loader.metadata, k=1)) labels, _ = model.buildNoiseData(1, inputLabels=label, skipAtts=True) z = labels.repeat(args.n_gen, 1) z_noise = radial_interpolation(latentDim, args.n_gen) z[:, :latentDim] = z_noise gnet = model.getOriginalG() gnet.eval() with torch.no_grad(): out = gnet(z.to(device)).detach().cpu() audio_out = loader.postprocess(out) # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "radial_interpolation") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) saveAudioBatch(audio_out, path=output_dir, basename='test_radial_interpolation', sr=config["transform_config"]["sample_rate"]) print("FINISHED!\n")
def test(parser): parser.add_argument('--size', dest='size', default=1000, type=int) parser.add_argument('--gen', dest='gen', action='store_true') args = parser.parse_args() kargs = vars(args) device = get_device() model, config, model_name = load_model_checkp(**kargs) transform_config = config['transform_config'] loader_config = config['loader_config'] d_net = model.getOriginalD().to(device) g_net = model.netG.to(device).eval() d_net.eval() # We load a dummy data loader for post-processing processor = AudioProcessor(**transform_config) dbname = loader_config['dbname'] loader_config["criteria"]["size"] = args.size loader = get_data_loader(dbname)(name=dbname + '_' + transform_config['transform'], preprocessing=processor, **loader_config) att_dict = loader.header['attributes'] criterion = ACGANCriterion(att_dict) # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"tests_D") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) batch_size = min(args.batch_size, len(loader)) data_loader = DataLoader(loader, batch_size=batch_size, shuffle=True, num_workers=2) data_iter = iter(data_loader) iter_bar = trange(len(data_iter), desc='epoch-loop') D_loss = [] data = [] for j in iter_bar: with torch.no_grad(): input, target = data_iter.next() if args.gen: z, _ = model.buildNoiseData(target.size(0), inputLabels=target, skipAtts=True) input = g_net(z) pred = d_net(input.float().to(device)).cpu() clf_loss = criterion.getCriterion(pred, target.cpu()) # get D loss D_loss.append(pred[:, -1]) data.append(input.cpu()) state_msg = f'Iter: {j}; avg D_nloss: {sum(pred[:, -1])/len(pred[:, -1]):0.3f}, classif_loss: {clf_loss:0.3f}' iter_bar.set_description(state_msg) # Create evaluation manager D_loss = torch.cat(D_loss) data = torch.cat(data) D_loss, idx = abs(D_loss).sort() audio_out = loader.postprocess(data[idx[:20]]) saveAudioBatch(audio_out, path=output_dir, basename='low_W-distance', sr=config["transform_config"]["sample_rate"]) audio_out = loader.postprocess(data[idx[-20:]]) saveAudioBatch(audio_out, path=output_dir, basename='high_W-distance', sr=config["transform_config"]["sample_rate"]) print("FINISHED!\n")
def generate(parser): parser.add_argument("--val", dest="val", action='store_true') parser.add_argument("--train", dest="train", action='store_true') parser.add_argument("--avg-net", dest="avg_net", action='store_true') parser.add_argument("--name", dest="name", default="") parser.add_argument("--dump-labels", dest="dump_labels", action="store_true") args = parser.parse_args() model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.categoryVectorDim_G # We load a dummy data loader for post-processing transform_config = config['transform_config'] loader_config = config['loader_config'] processor = AudioProcessor(**transform_config) postprocess = processor.get_postprocessor() # Create output evaluation dir if args.val: name = args.name + '_val_labels' elif args.train: name = args.name + '_train_labels' else: name = args.name + '_rand_labels' if args.outdir == "": args.outdir = args.dir output_dir = mkdir_in_path(args.outdir, f"generation_samples") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "random") output_dir = mkdir_in_path( output_dir, name + '_' + datetime.now().strftime('%Y-%m-%d_%H_%M')) dbname = loader_config['dbname'] loader = get_data_loader(dbname)(name=dbname + '_' + transform_config['transform'], preprocessing=processor, **loader_config) labels = None if model.config.ac_gan: if args.val: val_set = loader.get_validation_set()[1] perm = torch.randperm(val_set.size(0)) idx = perm[:args.n_gen] labels = val_set[idx] elif args.train: labels = torch.Tensor(random.sample(loader.metadata, k=args.n_gen)) else: labels = loader.get_random_labels(args.n_gen) z, _ = model.buildNoiseData(args.n_gen, inputLabels=labels, skipAtts=True) data_batch = [] with torch.no_grad(): for i in range(int(np.ceil(args.n_gen / args.batch_size))): data_batch.append( model.test(z[i * args.batch_size:args.batch_size * (i + 1)], toCPU=True, getAvG=args.avg_net).cpu()) data_batch = torch.cat(data_batch, dim=0) audio_out = map(postprocess, data_batch) saveAudioBatch(audio_out, path=output_dir, basename='sample', sr=config["transform_config"]["sample_rate"]) if args.dump_labels: with open(f"{output_dir}/params_in.txt", "a") as f: for i in tqdm(range(args.n_gen), desc='Creating Samples'): params = labels[i, :-1].tolist() f.writelines([f"{i}, {list(params)}\n"]) print("FINISHED!\n")
exp_name = config.get("name", "default") checkpoint_dir = config["output_path"] checkpoint_dir = mkdir_in_path(checkpoint_dir, exp_name) # config["output_shapetput_path"] = checkpoint_dir # configure processor print("Data manager configuration") transform_config = config['transform_config'] audio_processor = AudioProcessor(**transform_config) # configure loader loader_config = config['loader_config'] dbname = loader_config.get('dbname', args.dataset) loader_module = get_data_loader(dbname) loader = loader_module(name=dbname + '_' + transform_config['transform'], output_path=checkpoint_dir, preprocessing=audio_processor, **loader_config) print(f"Loading data. Found {len(loader)} instances") model_config['output_shape'] = audio_processor.get_output_shape() config["model_config"] = model_config # load checkpoint print("Search and load last checkpoint") checkpoint_state = getLastCheckPoint(checkpoint_dir, exp_name, iter=args.iter,
def train_inception_model(name: str, path: str, labels: list, config: str, batch_size: int = 50, n_epoch=100): output_path = mkdir_in_path(path, 'inception_models') output_file = join(output_path, f"{name}_{datetime.now().strftime('%Y-%m-%d')}.pt") output_log = join(output_path, f"{name}_{datetime.now().strftime('%Y-%m-%d')}.log") logging.basicConfig(filename=output_log, level=logging.INFO) assert os.path.exists(config), f"Path to config {config} does not exist" config = read_json(config) loader_config = config['loader_config'] transform_config = config['transform_config'] transform = transform_config['transform'] dbname = loader_config.pop('dbname') loader_module = get_data_loader(dbname) processor = AudioProcessor(**transform_config) loader = loader_module(name=dbname + '_' + transform, preprocessing=processor, **loader_config) mel = MelScale(sample_rate=transform_config['sample_rate'], fft_size=transform_config['fft_size'], n_mel=transform_config.get('n_mel', 256), rm_dc=True) val_data, val_labels = loader.get_validation_set() val_data = val_data[:, 0:1] att_dict = loader.header['attributes'] att_classes = att_dict.keys() num_classes = sum(len(att_dict[k]['values']) for k in att_classes) data_loader = DataLoader(loader, batch_size=batch_size, shuffle=True, num_workers=2) device = "cuda" if GPU_is_available() else "cpu" inception_model = nn.DataParallel( SpectrogramInception3(num_classes, aux_logits=False)) inception_model.to(device) optim = torch.optim.Adam(filter(lambda p: p.requires_grad, inception_model.parameters()), betas=[0, 0.99], lr=0.001) criterion = ACGANCriterion(att_dict) epochs = trange(n_epoch, desc='train-loop') for i in epochs: data_iter = iter(data_loader) iter_bar = trange(len(data_iter), desc='epoch-loop') inception_model.train() for j in iter_bar: input, target = data_iter.next() input.requires_grad = True input.to(device) # take magnitude input = mel(input.float()) mag_input = F.interpolate(input[:, 0:1], (299, 299)) optim.zero_grad() output = inception_model(mag_input.float()) loss = criterion.getCriterion(output, target.to(device)) loss.backward() state_msg = f'Iter: {j}; loss: {loss.item():0.2f} ' iter_bar.set_description(state_msg) optim.step() # SAVE CHECK-POINT if i % 10 == 0: if isinstance(inception_model, torch.nn.DataParallel): torch.save(inception_model.module.state_dict(), output_file) else: torch.save(inception_model.state_dict(), output_file) # EVALUATION with torch.no_grad(): inception_model.eval() val_i = int(np.ceil(len(val_data) / batch_size)) vloss = 0 prec = 0 y_pred = [] y_true = [] prec = {k: 0 for k in att_classes} for k in range(val_i): vlabels = val_labels[k * batch_size:batch_size * (k + 1)] vdata = val_data[k * batch_size:batch_size * (k + 1)] vdata = mel(vdata.float()) vdata = F.interpolate(vdata, (299, 299)) vpred = inception_model(vdata.to(device)) vloss += criterion.getCriterion(vpred, vlabels.to(device)).item() vlabels_pred, _ = criterion.getPredictionLabels(vpred) y_pred.append(vlabels_pred) # y_true += list(vlabels) y_pred = torch.cat(y_pred) pred_labels = loader.index_to_labels(y_pred) true_labels = loader.index_to_labels(val_labels) for i, c in enumerate(att_classes): # if class is xentroopy... if att_dict[c]['loss'] == 'mse': continue logging.info(c) pred = [l[i] for l in pred_labels] true = [l[i] for l in true_labels] cm = confusion_matrix(true, pred, labels=att_dict[c]['values']) print("") print("Confusion Matrix") print(cm) logging.info(cm) print("") target_names = [str(v) for v in att_dict[c]['values']] crep = classification_report(true, pred, target_names=target_names, labels=target_names) logging.info(crep) print(crep) state_msg2 = f'epoch {i}; val_loss: {vloss / val_i: 0.2f}' logging.info(state_msg2) epochs.set_description(state_msg2)