def __init__(self, model_path, dataset): self.model = Decoder() self.model.load_state_dict(torch.load(model_path)) self.model.eval() self.data = NewDataset(dataset) found = False i = 1 while not found: if model_path[-i] == '_': self.name = model_path[-i + 1:] found = True i += 1
class ModelEvaluator: # This class creates audio file using trained model so that we can listen the result def __init__(self, model_path, dataset): self.model = Decoder() self.model.load_state_dict(torch.load(model_path)) self.model.eval() self.data = NewDataset(dataset) found = False i = 1 while not found: if model_path[-i] == '_': self.name = model_path[-i + 1:] found = True i += 1 def play_output(self, output_path, key, speaker_id=None, iter=100, fs=16000, return_original=False): index = 0 for k in self.data.keys: if key in k: break else: index += 1 original_id, embedding, fft, n_frames_before_pad, mean, std = self.data.__getitem__( index) if speaker_id is None: speaker_id = int(original_id) speaker_id = torch.tensor([speaker_id]).long() embedding = torch.from_numpy( embedding[:n_frames_before_pad]).unsqueeze(0).long() output = self.model(embedding, speaker_id).squeeze(0) output = output.detach().numpy() mean = np.repeat(mean, n_frames_before_pad, axis=1) std = np.repeat(std, n_frames_before_pad, axis=1) output = np.multiply(output, std) + mean '''' import matplotlib.pyplot as plt plt.figure(figsize=(output.shape[0]//20, 12)) #plt.subplot(1, 2, 1) plt.matshow(output, fignum=False) #plt.subplot(1, 2, 2) #plt.matshow(embedding[0,:,:].T, fignum=False) plt.show() ''' grif_out = lib.core.griffinlim(np.exp(output), n_iter=iter, hop_length=160, win_length=512) write(output_path + key + '_' + self.name + '.wav', fs, grif_out) """
log_loc = './logs/' now = datetime.datetime.now() time = str(now.day) + '.' + str(now.month) + '.' + str( now.year) + '__' + str(now.hour) + ':' + str(now.minute) logFileName = arch + '_' + embed_type + '_' + embed_crop + '_' + dataset + '_' + run_name + '_' + time + '.log' log = get_logger('zerospeech', logFileName) #server = 'gpu1' server = 'gpu2' if server == 'gpu1': prefix = '/mnt/gpu2' else: prefix = '' output_path = prefix + '/home/mansur/zerospeech/models/cnn_models/' device = "cuda" data = NewDataset(dataset) print('Data is ready') loader = DataLoader(data, batch_size=128, num_workers=8, shuffle=True) model = CNN().to(device) criterion = nn.MSELoss() # first try to reconstruct the spectrum optG = optim.Adam(model.parameters()) max_epoch = 300 print('Start Training') for epoch in range(max_epoch): totalLoss = 0 lens = 0.0 counter = 0.0 for speaker, embedding, fft, lengths, _, _ in loader: max_len = int(lengths.float().mean()) embedding = embedding.to(device).long()[:, :max_len]
history = 0.0 new = 1.0 BS = {'64':200, '128': 90, '256':40} for epoch in range(1,opt.epoches+1): batch_size = BS['%d'%finesize] print('Batch Size: %d'%(batch_size)) Loss_Dis_ = [] Loss_Stylied_2_ = [] Loss_D_ = [] ############### DATASET ################## dataset = NewDataset(opt.loadSize, opt.fineSize, opt.flip, finesize) loader_ = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=8) loader = iter(loader_) iter_per_epoch = int(len(dataset) / batch_size) for iteration in range(1,iter_per_epoch+1): netG.zero_grad() if(history>0 and new<1): history -= 0.001 new += 0.001
opt.cuda = (opt.gpu != -1) if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) cudnn.benchmark = True device = torch.device("cuda:%d" % (opt.gpu) if opt.cuda else "cpu") ############### Dataset ################## dataset = NewDataset(opt.loadSize, opt.fineSize, opt.flip) loader_ = torch.utils.data.DataLoader(dataset=dataset, batch_size=opt.batchSize, shuffle=True, num_workers=8) loader = iter(loader_) if opt.domain_adaptation: wild_dataset = NewDataset(opt.loadSize, opt.fineSize, opt.flip) wild_dataset.unsupervied = True wild_loader_ = torch.utils.data.DataLoader(dataset=wild_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=8) wild_loader = iter(wild_loader_)
disc_interpolates = netD(interpolates) gradients = grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones( disc_interpolates.size()).to(device), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean() return gradient_penalty ############### Dataset ################## dataset = NewDataset(opt.style_path, opt.glyph_path) loader_ = torch.utils.data.DataLoader(dataset=dataset, batch_size=opt.batchSize, shuffle=True, num_workers=8) loader = iter(loader_) ########### Training ########### CRITIC_ITERS = 2 lambda_gp = 10 current_size = 256 Min_loss = 100000 for iteration in range(1, opt.niter + 1): ############################