Пример #1
0
def test_model(model, data_loader, args):
    model.eval()
    predictions = list()
    targets = list()
    inds = list()
    tqdm_loader = tqdm(enumerate(data_loader))
    for step, (features, truth_data) in tqdm_loader:
        if 'inds' in features.keys():
            inds.append(features['inds'])
        features = to_var(features, args.device)
        truth_data = to_var(truth_data, args.device)
        if args.lossinside:
            _, outputs = model(features, truth_data, args, loss_func=None)
        else:
            outputs = model(features, args)
        # outputs = model(features, truth_data=truth_data)

        targets.append(truth_data.cpu().numpy())
        predictions.append(outputs.cpu().detach().numpy())
    pre2 = np.concatenate(predictions).squeeze()
    tar2 = np.concatenate(targets)
    if len(inds) > 0:
        print(len(inds))
        print(inds[0])
        inds = np.concatenate(inds)
    else:
        inds = None
    metric = calculate_metrics(pre2, tar2, args, plot=True, inds=inds)
    print(metric)
    with open(f'data/result_{args.model}.txt', 'a') as f:
        f.write(time.strftime("%m/%d %H:%M:%S",time.localtime(time.time())))
        f.write(f"epoch:{args.epochs} lr:{args.lr}\ndataset:{args.dataset} identify:{args.identify}\n")
        f.write(f"{args.model_config}\n")
        f.write(f"{args.data_config}\n")
        f.write(f"{metric}\n\n")
Пример #2
0
    def parse_batch(self, batch):
        text_padded, input_lengths, mel_padded, gate_padded, output_lengths = batch
        text_padded = to_var(text_padded).long()
        input_lengths = to_var(input_lengths).long()
        max_len = torch.max(input_lengths.data).item()
        mel_padded = to_var(mel_padded).float()
        gate_padded = to_var(gate_padded).float()
        output_lengths = to_var(output_lengths).long()

        return ((text_padded, input_lengths, mel_padded, max_len,
                 output_lengths), (mel_padded, gate_padded))
Пример #3
0
def infer(wav_path, text, model):
	sequence = text_to_sequence(text, hps.text_cleaners)
	sequence = to_var(torch.IntTensor(sequence)[None, :]).long()
	mel = melspectrogram(load_wav(wav_path))
	r = mel.shape[1]%hps.n_frames_per_step
	mel_in = to_var(torch.Tensor([mel[:, :-r]]))
	if mel_in.shape[2] < 1:
		return None
	sequence = torch.cat([sequence, sequence], 0)
	mel_in = torch.cat([mel_in, mel_in], 0)
	_, mel_outputs_postnet, _, _ = model.teacher_infer(sequence, mel_in)
	ret = mel
	ret[:, :-r] = to_arr(mel_outputs_postnet[0])
	return ret
Пример #4
0
    def inference(self, inputs, mode='train'):
        if mode == 'train':
            vid_inputs, vid_lengths, mels, max_len, output_lengths = inputs
        else:
            vid_inputs = inputs
            vid_inputs = to_var(torch.from_numpy(vid_inputs)).float()
            vid_inputs = vid_inputs.permute(3, 0, 1,
                                            2).unsqueeze(0).contiguous()

        # vid_lengths, output_lengths = vid_lengths.data, output_lengths.data
        # embedded_inputs = self.embedding(inputs).transpose(1, 2)

        embedded_inputs = vid_inputs.type(torch.FloatTensor)

        encoder_outputs = self.encoder.inference(embedded_inputs.cuda())

        mel_outputs, gate_outputs, alignments = self.decoder.inference(
            encoder_outputs)
        mel_outputs_postnet = self.postnet(mel_outputs)

        mel_outputs_postnet = mel_outputs + mel_outputs_postnet
        outputs = self.parse_output(
            [mel_outputs, mel_outputs_postnet, gate_outputs, alignments])

        return outputs
Пример #5
0
	def teacher_infer(self, inputs, mels):
		il, _ =  torch.sort(torch.LongTensor([len(x) for x in inputs]),
							dim = 0, descending = True)
		text_lengths = to_var(il)

		embedded_inputs = self.embedding(inputs).transpose(1, 2)

		encoder_outputs = self.encoder(embedded_inputs, text_lengths)

		mel_outputs, gate_outputs, alignments = self.decoder(
			encoder_outputs, mels, memory_lengths=text_lengths)
		
		mel_outputs_postnet = self.postnet(mel_outputs)
		mel_outputs_postnet = mel_outputs + mel_outputs_postnet

		return self.parse_output(
			[mel_outputs, mel_outputs_postnet, gate_outputs, alignments])
Пример #6
0
    def parse_batch_vid(self, batch):
        vid_padded, input_lengths, mel_padded, gate_padded, target_lengths, split_infos, embed_targets = batch
        vid_padded = to_var(vid_padded).float()
        input_lengths = to_var(input_lengths).float()
        mel_padded = to_var(mel_padded).float()
        gate_padded = to_var(gate_padded).float()
        target_lengths = to_var(target_lengths).float()

        max_len_vid = split_infos[0].data.item()
        max_len_target = split_infos[1].data.item()

        mel_padded = to_var(mel_padded).float()

        return ((vid_padded, input_lengths, mel_padded, max_len_vid,
                 target_lengths), (mel_padded, gate_padded))
Пример #7
0
def infer(text, model):
    sequence = text_to_sequence(text, hps.text_cleaners)
    sequence = to_var(torch.IntTensor(sequence)[None, :]).long()
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    return (mel_outputs, mel_outputs_postnet, alignments)
Пример #8
0
def train_model(model: nn.Module, data_loaders: Dict[str, DataLoader],
                loss_func: callable, optimizer: optim,
                model_folder: str, tensorboard_folder: str,
                args, **kwargs):
    num_epochs = args.epochs
    phases = ['train', 'val', 'test']

    writer = SummaryWriter(tensorboard_folder)

    since = time.clock()

    # save_dict, best_rmse = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}, 100000
    save_dict, best_pcc = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}, 0

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.2, patience=5, threshold=1e-3, min_lr=1e-6)

    try:
        for epoch in range(num_epochs):
            running_loss = {phase: 0.0 for phase in phases}
            for phase in phases:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                steps, predictions, targets = 0, list(), list()
                tqdm_loader = tqdm(enumerate(data_loaders[phase]))
                for step, (features, truth_data) in tqdm_loader:
                    features = to_var(features, args.device)
                    truth_data = to_var(truth_data, args.device)
                    with torch.set_grad_enabled(phase == 'train'):
                        if args.lossinside:
                            loss, outputs = model(features, truth_data, args, loss_func=loss_func)
                        else:
                            outputs = model(features, args)
                            loss = loss_func(truth=truth_data, predict=outputs)
                        # loss = loss_func(outputs, truth_data)

                        if phase == 'train':
                            if torch.isnan(loss):
                                print("=============LOSS NAN============")
                                print(features)
                                print(truth_data)
                                print(outputs)
                            else:
                                optimizer.zero_grad()
                                loss.backward()
                                optimizer.step()

                    targets.append(truth_data.cpu().numpy())
                    with torch.no_grad():
                        predictions.append(outputs.cpu().detach().numpy())

                    running_loss[phase] += loss * truth_data.size(0)
                    steps += truth_data.size(0)

                    tqdm_loader.set_description(
                        f'{phase} epoch: {epoch}, {phase} loss: {running_loss[phase] / steps}')

                    # For the issue that the CPU memory increases while training. DO NOT know why, but it works.
                    torch.cuda.empty_cache()
                # 性能
                predictions = np.concatenate(predictions)
                targets = np.concatenate(targets)
                # print(2)
                # print(predictions[:3, :3])
                # print(targets[:3, :3])
                scores = calculate_metrics(predictions.reshape(predictions.shape[0], -1),
                                           targets.reshape(targets.shape[0], -1), args, plot=epoch % 5 == 0, **kwargs)
                # print(3)
                writer.add_scalars(f'score/{phase}', scores, global_step=epoch)
                with open(model_folder+"/output.txt", "a") as f:
                    f.write(f'{phase} epoch: {epoch}, {phase} loss: {running_loss[phase] / steps}\n')
                    f.write(str(scores))
                    f.write('\n')
                    f.write(str(time.time()))
                    f.write("\n\n")
                print(scores)
                # if phase == 'val' and scores['RMSE'] < best_rmse:
                if phase == 'val' and scores['pearr'] > best_pcc:
                    best_pcc = scores['pearr']
                    # best_rmse = scores['RMSE']
                    save_dict.update(model_state_dict=copy.deepcopy(model.state_dict()),
                                     epoch=epoch,
                                     optimizer_state_dict=copy.deepcopy(optimizer.state_dict()))

            scheduler.step(running_loss['train'])

            writer.add_scalars('Loss', {
                f'{phase} loss': running_loss[phase] / len(data_loaders[phase].dataset) for phase in phases},
                               global_step=epoch)
    finally:
        time_elapsed = time.clock() - since
        print(f"cost {time_elapsed} seconds")

        save_model(f"{model_folder}/best_model.pkl", **save_dict)
        save_model(f"{model_folder}/final_model.pkl",
                   **{'model_state_dict': copy.deepcopy(model.state_dict()),
                      'epoch': num_epochs,
                      'optimizer_state_dict': copy.deepcopy(optimizer.state_dict())})