Пример #1
0
def init():
    """ Sample script """

    import preprocess
    if args.use_stopwords == 1:
        stopwords = set(line.strip()
                        for line in open("stopwords_en.txt", encoding='utf-8'))
    else:
        stopwords = set()

    word_to_file = {}
    word_to_file, _, files = preprocess.get_dataset(dataset=args.data,
                                                    type="train")

    if args.use_full_vocab == 1:
        valid_vocab = -1
    else:
        valid_vocab = word_to_file.keys()

    ### this is what you care about
    encoder = BertWordFromTextEncoder(valid_vocab=valid_vocab)
    encoder.test_encoder()
    encoder.encode_docs(docs=files,
                        save_fn=args.save_fn,
                        agg_by=args.agg_by,
                        layer=args.nlayer)
Пример #2
0
def experiment_fn(run_config, params):
    run_config = run_config.replace(
        save_checkpoints_steps=params.min_eval_frequency)
    estimator = get_estimator(run_config, params)
    # Setup data loaders
    if params.run_preprocess:
        print('Running preprocess')
    datasets = preprocess.get_dataset(params.data_path) if params.run_preprocess else preprocess.preprocess_ego(
        params.data_path)

    train_input_fn, train_input_hook = get_train_inputs(
        batch_size=data.BATCH_SIZE, datasets=datasets)
    eval_input_fn, eval_input_hook = get_test_inputs(
        batch_size=data.BATCH_SIZE, datasets=datasets)
    # Define the experiment
    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,  # Estimator
        train_input_fn=train_input_fn,  # First-class function
        eval_input_fn=eval_input_fn,  # First-class function
        train_steps=params.train_steps,  # Mini-batch steps
        min_eval_frequency=params.min_eval_frequency,  # Eval frequency
        train_monitors=[train_input_hook],  # Hooks for training
        eval_hooks=[eval_input_hook],  # Hooks for evaluation
        eval_steps=None  # Use evaluation feeder until its empty
    )
    return experiment
Пример #3
0
def main():
    dataset = get_dataset()

    modelo.train()
    writer = SummaryWriter("runs/tranformer")

    estep = 0
    for epoch in range(NUM_EPOCHS):
        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                shuffle=True)
        pbar = tqdm(dataloader)
        losses = 0
        for i, data in enumerate(pbar):
            estep = estep + 1
            pbar.set_description("Processing at epoch %d" % epoch)
            character, mel_input, pos_text, pos_mel, _ = data
            character = character.to(DEVICE)
            mel_input = mel_input.to(DEVICE)
            pos_text = pos_text.to(DEVICE)
            pos_mel = pos_mel.to(DEVICE)

            output = modelo(character, mel_input, pos_text, pos_mel)
            # print(output)
            if estep == 1:
                writer.add_graph(
                    modelo,
                    input_to_model=[character, mel_input, pos_text, pos_mel])

            # print("output modelo...."+str(output.shape))
            # print("output trasformado..."+str(output.reshape(-1, output.shape[-1]).shape))
            # print("caracter ......"+str(character.reshape(-1).shape))
            optimizer.zero_grad()
            loss = loss_fn(output.reshape(-1, output.shape[-1]),
                           character.reshape(-1))
            output = output.transpose(0, 1)
            loss2 = loss.item()
            writer.add_scalar("loss :", loss2, estep)
            # print("/////////////////")
            # print(np.argmax(output[0].detach().numpy(),axis=1))
            print("loss..........." + str(loss2))
            # print("Epoch.........."+str(epoch))

            loss.backward()
            optimizer.step()
            losses += loss.item()
        writer.add_scalar("loss2 :", losses, epoch)
        if epoch + 1 % hp.save_step == 0:
            t.save(
                {
                    'model': modelo.state_dict(),
                    'optimizer': optimizer.state_dict()
                },
                os.path.join(hp.checkpoint_path,
                             'checkpoint_transformer_%d.pth.tar' % epoch))
    writer.close()
Пример #4
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    dataset = preprocessor.get_dataset(
        preprocessor.load_dataset(FLAGS.dataset_path),
        preprocessor.load_json(FLAGS.split_path))
    preprocessor.write_dataset(dataset, FLAGS.save_path)
    token_vocab = preprocessor.get_token_vocab(FLAGS.save_path)
    preprocessor.write_token_vocab(token_vocab, FLAGS.save_path)
Пример #5
0
def main():
    # hyperparameters
    num_layers = 4
    d_model = 128
    dff = 512
    num_heads = 8
    dropout_rate = 0.1
    epochs = 20
    pe_input, pe_target = 500, 500

    # prepare dataset
    train_dataset, val_dataset, enc_vocab_size, dec_vocab_size  = get_dataset(
        trainfile ='data/retrosynthesis-train.smi',
        validfile='data/retrosynthesis-valid.smi',
        n_read_threads=5, BUFFER_SIZE=20000, BATCH_SIZE=64)


    input_vocab_size = enc_vocab_size + 2
    target_vocab_size = dec_vocab_size + 2

    # build transformer model
    transformer = Transformer(num_layers, d_model, num_heads, dff,
                              input_vocab_size, target_vocab_size,
                              pe_input=pe_input,
                              pe_target=pe_target,
                              rate=dropout_rate)

    # Create optimizer
    learning_rate = CustomSchedule(d_model)
    optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

    # create model checkpoint
    ckpt_manager = get_ckpt_manager(transformer, optimizer)

    # training
    # train(train_dataset, transformer, epochs, ckpt_manager, optimizer)

    # evaluating


    # predicting
    inp_sequence = "Ic1ccc2n(CC(=O)N3CCCCC3)c3CCN(C)Cc3c2c1"
    reactant = predict(transformer, inp_sequence, max_length=160)
    print('Input Product:       {}'.format(inp_sequence))
    print('Predicted Reactants: {}'.format(reactant))
Пример #6
0
def run_test(model,
             fit_kwargs=None,
             predict_kwargs=None,
             seed=None,
             save_json=False,
             save_tex=False,
             df=False,
             verbose=False,
             position=0):
    from preprocess import get_dataset, preprocess_all, dataset_to_X_y, RUN_FEATURES
    from metrics import score_regression
    if not callable(model):
        modelfn = lambda: model
    else:
        modelfn = model
    _model = modelfn()
    assert hasattr(_model, "fit") and hasattr(_model, "predict")
    fit_kwargs = fit_kwargs or dict()
    predict_kwargs = predict_kwargs or dict()
    dataset = get_dataset(seed=seed)
    d = dict()
    dic_feat = RUN_FEATURES.items()
    if verbose:
        dic_feat = tqdm(dic_feat, desc="run", position=position)
    for k, v in dic_feat:
        _model = modelfn()
        run_data = preprocess_all(dataset, subset=v)
        X_train, y_train, X_val, y_val = dataset_to_X_y(
            run_data, keys=["train", "validation"], datatype="numpy")
        _model.fit(X_train, y_train, **fit_kwargs)
        y_train_hat = _model.predict(X_train, **predict_kwargs)
        train_loss = score_regression(y_train, y_train_hat)
        y_val_hat = _model.predict(X_val, **predict_kwargs)
        val_loss = score_regression(y_val, y_val_hat)
        d[k] = dict(train_loss=train_loss, val_loss=val_loss)
    if save_json:
        jsonsave(d, _model.__class__.__name__ + ".json")
    if save_tex:
        dumptex(d, _model.__class__.__name__ + ".tex")
    if not df:
        return d
    return pd.DataFrame(d)
def make_history_month_features_all():
    pw_df_list = []
    dataset = get_dataset()
    dataset.power_consumption = dataset.power_consumption
    for user_id in get_user_id_list():
        print user_id
        if not check_empty(user_id):
            user_df = filter_user_id(dataset,
                                     user_id).resample('1D').mean().fillna(1)
            #add to list
            pw_df_list.append((user_id, user_df))
            #make_features(user_id,user_df)

    p = m_Pool(64)
    for arg in pw_df_list:
        p.apply_async(make_history_month_features, args=(arg))

    print 'Waiting for all subprocesses done...'
    p.close()
    p.join()
Пример #8
0
def main():
    print('starting here...')
    dataset = get_dataset()
    global_step = 0

    m = nn.DataParallel(Model().cuda())

    # if LOADCHECKPOINT:
    #     m.load_state_dict(t.load(hp.checkpoint_file_transformer))
    #     print('loaded checkpoint...')
    #     m.eval()

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(hp.epochs):
        print('at epoch', epoch)
        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=1)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            eeg_array, mel, mel_input, pos_eeg_signal, pos_mel, _ = data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            eeg_array = eeg_array.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_eeg_signal = pos_eeg_signal.cuda()
            pos_mel = pos_mel.cuda()

            print('before m.forward()...')

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                eeg_array, mel_input, pos_eeg_signal, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                # summarywriter add_image params
                num_images_per_loop = 4
                writer_start_val = int(hp.batch_size / 2)
                writer_end_val = int(hp.batch_size * num_images_per_loop)
                writer_step_val = int(hp.batch_size)

                for i, prob in enumerate(attn_probs):
                    num_h = prob.size(0)
                    for j in range(writer_start_val, writer_end_val,
                                   writer_step_val):
                        x = vutils.make_grid([prob[j] * 255])
                        # x  = prob[j] * 255
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * num_images_per_loop + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)
                    for j in range(writer_start_val, writer_end_val,
                                   writer_step_val):
                        x = vutils.make_grid([prob[j] * 255])
                        # x  = prob[j] * 255
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * num_images_per_loop + j)

                for i, prob in enumerate(attns_dec):
                    num_h = prob.size(0)
                    for j in range(writer_start_val, writer_end_val,
                                   writer_step_val):
                        x = vutils.make_grid([prob[j] * 255])
                        # x  = prob[j] * 255
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * num_images_per_loop + j)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Пример #9
0
def synthesis(args):
    m = Model()
    m_post = ModelPostNet()
    m_stop = ModelStopToken()
    m.load_state_dict(load_checkpoint(args.restore_step1, "transformer"))
    m_stop.load_state_dict(load_checkpoint(args.restore_step3, "stop_token"))
    m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet"))

    m=m.cuda()
    m_post = m_post.cuda()
    m_stop = m_stop.cuda()
    m.train(False)
    m_post.train(False)
    m_stop.train(False)
    test_dataset = get_dataset(hp.test_data_csv)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1)
    ref_dataset = get_dataset(hp.test_data_csv)
    ref_dataloader = DataLoader(ref_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=1)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)

    ref_dataloader_iter = iter(ref_dataloader)
    for i, data in enumerate(test_dataloader):
        character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data
        ref_character, ref_mel, ref_mel_input, ref_pos_text, ref_pos_mel, ref_text_length, ref_mel_length, ref_fname = next(ref_dataloader_iter)
        stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)
        mel_input = t.zeros([1,1,80]).cuda()
        stop=[]
        character = character.cuda()
        mel = mel.cuda()
        mel_input = mel_input.cuda()
        pos_text = pos_text.cuda()
        pos_mel = pos_mel.cuda()
        ref_character = ref_character.cuda()
        ref_mel = ref_mel.cuda()
        ref_mel_input = ref_mel_input.cuda()
        ref_pos_text = ref_pos_text.cuda()
        ref_pos_mel = ref_pos_mel.cuda()

        with t.no_grad():
            start=time.time()
            for i in range(args.max_len):
                pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda()
                mel_pred, postnet_pred, attn_probs, decoder_output, attns_enc, attns_dec, attns_style = m.forward(character, mel_input, pos_text, pos_mel, ref_mel, ref_pos_mel)
                stop_token = m_stop.forward(decoder_output)
                mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1)
                stop.append(t.sigmoid(stop_token).squeeze(-1)[0,-1])
                if stop[-1] > 0.5:
                    print("stop token at " + str(i) + " is :", stop[-1])
                    print("model inference time: ", time.time() - start)
                    break
            if stop[-1] == 0:
                continue
            mag_pred = m_post.forward(postnet_pred)
            inf_time = time.time() - start
            print("inference time: ", inf_time)

        wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy())
        print("rtx : ", (len(wav)/hp.sr) / inf_time)
        wav_path = os.path.join(hp.sample_path, 'wav')
        if not os.path.exists(wav_path):
            os.makedirs(wav_path)
        write(os.path.join(wav_path, "text_{}_ref_{}_synth.wav".format(fname, ref_fname)), hp.sr, wav)
        print("written as text{}_ref_{}_synth.wav".format(fname, ref_fname))
        attns_enc_new=[]
        attns_dec_new=[]
        attn_probs_new=[]
        attns_style_new=[]
        for i in range(len(attns_enc)):
            attns_enc_new.append(attns_enc[i].unsqueeze(0))
            attns_dec_new.append(attns_dec[i].unsqueeze(0))
            attn_probs_new.append(attn_probs[i].unsqueeze(0))
            attns_style_new.append(attns_style[i].unsqueeze(0))
        attns_enc = t.cat(attns_enc_new, 0)
        attns_dec = t.cat(attns_dec_new, 0)
        attn_probs = t.cat(attn_probs_new, 0)
        attns_style = t.cat(attns_style_new, 0)

        attns_enc = attns_enc.contiguous().view(attns_enc.size(0), 1, hp.n_heads, attns_enc.size(2), attns_enc.size(3))
        attns_enc = attns_enc.permute(1,0,2,3,4)
        attns_dec = attns_dec.contiguous().view(attns_dec.size(0), 1, hp.n_heads, attns_dec.size(2), attns_dec.size(3))
        attns_dec = attns_dec.permute(1,0,2,3,4)
        attn_probs = attn_probs.contiguous().view(attn_probs.size(0), 1, hp.n_heads, attn_probs.size(2), attn_probs.size(3))
        attn_probs = attn_probs.permute(1,0,2,3,4)
        attns_style = attns_style.contiguous().view(attns_style.size(0), 1, hp.n_heads, attns_style.size(2), attns_style.size(3))
        attns_style = attns_style.permute(1,0,2,3,4)

        save_dir = os.path.join(hp.sample_path, 'figure', "text_{}_ref_{}_synth.wav".format(fname, ref_fname))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        writer.add_alignments(attns_enc.detach().cpu(), attns_dec.detach().cpu(), attn_probs.detach().cpu(), attns_style.detach().cpu(), mel_length, text_length, args.restore_step1, 'Validation', save_dir)
Пример #10
0
def main():

    train_dataset = get_dataset(hp.train_data_csv)
    val_dataset = get_dataset(hp.val_data_csv)
    restore_step = hp.restore_step
    global_step = restore_step
    if restore_step != 0:
        restore_flag = True
    else:
        restore_flag = False

    m = Model()
    if os.path.exists('./checkpoints/checkpoint_%s_%d.pth.tar' %
                      ('transformer', global_step)):
        state_dict = t.load('./checkpoints/checkpoint_%s_%d.pth.tar' %
                            ('transformer', global_step))
        new_state_dict = OrderedDict()
        for k, value in state_dict['model'].items():
            key = k[7:]
            new_state_dict[key] = value

        m.load_state_dict(new_state_dict)

    m = nn.DataParallel(m.cuda())
    m.train()

    vocoder = SmartVocoder(Hyperparameters(parse_args()))
    vocoder.load_state_dict(
        t.load('./mel2audio/checkpoint_step000588458.pth')["state_dict"])
    vocoder = vocoder.cuda()
    vocoder.eval()

    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)
    cur_epoch = 0

    for epochs in range(hp.epochs):
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=hp.batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn_transformer,
                                      drop_last=True,
                                      num_workers=1)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=hp.batch_size,
                                    shuffle=True,
                                    collate_fn=collate_fn_transformer,
                                    drop_last=True)
        if restore_flag:
            cur_epoch = int(restore_step / len(train_dataloader))
            restore_flag = not restore_flag
        for i, data in enumerate(train_dataloader):
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mag, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data

            mel_max_length_array = t.zeros(mel_length.size(0)).long()
            mel_max_length_array = t.LongTensor(mel_max_length_array)
            mel_max_length_array[:] = t.max(mel_length)
            mel_max_length_array = mel_max_length_array.cuda()

            character = character.cuda()
            mel = mel.cuda()
            mag = mag.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            text_length = text_length.cuda()
            mel_length = mel_length.cuda()
            loading_time = time.time()
            mask = get_mask_from_lengths(mel_length).cuda()

            mel_pred, postnet_pred, attn_probs, decoder_outputs, attns_enc, attns_dec, attns_style, post_linear, duration_predictor_output, duration, weights = m.forward(
                character,
                mel_input,
                pos_text,
                pos_mel,
                mel,
                pos_mel,
                mel_max_length_array=mel_max_length_array)

            mel_loss = t.mean(
                t.abs(mel_pred - mel).masked_select(mask.unsqueeze(-1)))
            post_mel_loss = t.mean(
                t.abs(postnet_pred - mel).masked_select(mask.unsqueeze(-1)))
            n_priority_freq = int(2000 / (hp.sr * 0.5) * (hp.n_fft / 2 + 1))
            post_linear_loss = 0.5 * t.mean(
                t.abs(post_linear - mag).masked_select(mask.unsqueeze(-1))
            ) + 0.5 * t.mean(
                t.abs(post_linear - mag)[:, :, :n_priority_freq].masked_select(
                    mask.unsqueeze(-1)))
            duration_loss = nn.L1Loss()(t.sum(
                duration_predictor_output, -1, keepdim=True),
                                        mel_length) / t.sum(text_length)

            loss = (mel_loss + post_mel_loss + 0.3 * post_linear_loss +
                    duration_loss) / hp.accum
            writer.add_losses(mel_loss.item(), post_mel_loss.item(),
                              0.3 * post_linear_loss, duration_loss,
                              global_step, 'Train')

            # Calculate gradients
            loss.backward()
            msg = "| Epoch: {}, {}/{}th loss : {:.4f} + {:.4f} + {:.4f} + {:.4f} = {:.4f}".format(
                cur_epoch, i, len(train_dataloader), mel_loss, post_mel_loss,
                0.3 * post_linear_loss, duration_loss, loss)
            stream(msg)

            if global_step % hp.accum == 0:
                nn.utils.clip_grad_norm_(m.parameters(), 1.)
                # Update weights
                optimizer.step()
                optimizer.zero_grad()

            if global_step % hp.val_step == 0 or global_step == 1:
                validate(m, vocoder, val_dataloader, global_step, writer)

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
        if cur_epoch == hp.stop_epoch:
            break
        cur_epoch += 1
        print(' ')
Пример #11
0
# Cuda Flags #
##############
if config["cuda"]:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")

###############################
# Creating the dataset object #
###############################
# Create training data object
bidirectional = config.getboolean("bidirectional")
trainset, source_vocab, target_vocab = get_dataset(
    types="train",
    batch_size=int(config["batch_size"]),
    shuffle=True,
    num_workers=int(config["num_workers"]),
    pin_memory=False,
    drop_last=True)
encoder1 = EncoderRNN(int(config["hidden_size_encoder"]),
                      len(source_vocab) + 2,
                      int(config["batch_size"]),
                      num_layers=int(config["num_layer_encoder"]),
                      bidirectional=bidirectional).to(device)
bridge = Linear(bidirectional, int(config["hidden_size_encoder"]),
                int(config["hidden_size_decoder"])).to(device)
decoder1 = DecoderRNN(int(config["hidden_size_decoder"]),
                      len(target_vocab) + 2,
                      int(config["batch_size"]),
                      num_layers=int(config["num_layer_decoder"])).to(device)
trainIters(trainset,
Пример #12
0
def synthesis(args):
    m = Model()
    m.load_state_dict(load_checkpoint(args.restore_step1, "transformer"))
    m = m.cuda()
    m.train(False)
    vocoder = SmartVocoder(Hyperparameters(parse_args()))
    vocoder.load_state_dict(
        t.load('./mel2audio/merged_STFT_checkpoint.pth')["state_dict"])
    vocoder = vocoder.cuda()
    vocoder.eval()
    with open('./hifi_gan/config.json') as f:
        data = f.read()
    json_config = json.loads(data)
    h = AttrDict(json_config)
    hifi_gan = Generator(h).cuda()
    state_dict_g = t.load('./hifi_gan/g_00334000', map_location='cuda')
    hifi_gan.load_state_dict(state_dict_g['generator'])
    hifi_gan.eval()
    hifi_gan.remove_weight_norm()

    test_dataset = get_dataset(hp.test_data_csv)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 collate_fn=collate_fn_transformer,
                                 drop_last=True,
                                 num_workers=1)
    ref_dataset = get_dataset(hp.test_data_csv_shuf)
    ref_dataloader = DataLoader(ref_dataset,
                                batch_size=1,
                                shuffle=False,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=1)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)

    mel_basis = t.from_numpy(
        librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels, 50,
                            11000)).unsqueeze(0)  # (n_mels, 1+n_fft//2)

    ref_dataloader_iter = iter(ref_dataloader)
    _, ref_mel, _, _, _, ref_pos_mel, _, _, ref_fname = next(
        ref_dataloader_iter)

    for i, data in enumerate(test_dataloader):
        character, _, _, _, pos_text, _, text_length, _, fname = data
        mel_input = t.zeros([1, 1, 80]).cuda()
        character = character.cuda()
        ref_mel = ref_mel.cuda()
        mel_input = mel_input.cuda()
        pos_text = pos_text.cuda()
        with t.no_grad():
            start = time.time()
            memory, c_mask, attns_enc, duration_mask = m.encoder(character,
                                                                 pos=pos_text)
            style, coarse_emb = m.ref_encoder(ref_mel)
            memory = t.cat((memory, coarse_emb.expand(-1, memory.size(1), -1)),
                           -1)
            memory = m.memory_coarse_layer(memory)
            duration_predictor_output = m.duration_predictor(
                memory, duration_mask)
            duration = t.ceil(duration_predictor_output)
            duration = duration * duration_mask
            #            max_length = t.sum(duration).type(t.LongTensor)
            #            print("length : ", max_length)

            monotonic_interpolation, pos_mel_, weights = m.length_regulator(
                memory, duration, duration_mask)
            kv_mask = t.zeros([1, mel_input.size(1),
                               character.size(1)]).cuda()  # B, t', N
            kv_mask[:, :, :3] = 1
            kv_mask = kv_mask.eq(0)
            stop_flag = False
            ctr = 0
            for j in range(1200):
                pos_mel = t.arange(1,
                                   mel_input.size(1) + 1).unsqueeze(0).cuda()
                mel_pred, postnet_pred, attn_probs, decoder_output, attns_dec, attns_style = m.decoder(
                    memory,
                    style,
                    mel_input,
                    c_mask,
                    pos=pos_mel,
                    ref_pos=ref_pos_mel,
                    mono_inter=monotonic_interpolation[:, :mel_input.shape[1]],
                    kv_mask=kv_mask)
                mel_input = t.cat([mel_input, postnet_pred[:, -1:, :]], dim=1)
                #                print("j", j, "mel_input", mel_input.shape)
                if stop_flag and ctr == 10:
                    break
                elif stop_flag:
                    ctr += 1
                kv_mask, stop_flag = update_kv_mask(
                    kv_mask, attn_probs)  # B, t', N --> B, t'+1, N
            postnet_pred = t.cat((postnet_pred,
                                  t.zeros(postnet_pred.size(0), 5,
                                          postnet_pred.size(-1)).cuda()), 1)
            gen_length = mel_input.size(1)
            #            print("gen_length", gen_length)
            post_linear = m.postnet(postnet_pred)
            post_linear = resample(post_linear,
                                   seq_len=mel_input.size(1),
                                   scale=args.rhythm_scale)
            postnet_pred = resample(mel_input,
                                    seq_len=mel_input.size(1),
                                    scale=args.rhythm_scale)
            inf_time = time.time() - start
            print("inference time: ", inf_time)
            #            print("speech_rate: ", len(postnet_pred[0])/len(character[0]))

            postnet_pred_v = postnet_pred.transpose(2, 1)
            postnet_pred_v = (postnet_pred_v * 100 + 20 - 100) / 20
            B, C, T = postnet_pred_v.shape
            z = t.randn(1, 1, T * hp.hop_length).cuda()
            z = z * 0.6  # Temp
            t.cuda.synchronize()
            timestemp = time.time()
            with t.no_grad():
                y_gen = vocoder.reverse(z, postnet_pred_v).squeeze()
            t.cuda.synchronize()
            print('{} seconds'.format(time.time() - timestemp))
            wav = y_gen.to(t.device("cpu")).data.numpy()
            wav = np.pad(
                wav, [0, 4800], mode='constant',
                constant_values=0)  #pad 0 for 0.21 sec silence at the end

            post_linear_v = post_linear.transpose(1, 2)
            post_linear_v = 10**((post_linear_v * 100 + 20 - 100) / 20)
            mel_basis = mel_basis.repeat(post_linear_v.shape[0], 1, 1)
            post_linear_mel_v = t.log10(t.bmm(mel_basis.cuda(), post_linear_v))
            B, C, T = post_linear_mel_v.shape
            z = t.randn(1, 1, T * hp.hop_length).cuda()
            z = z * 0.6  # Temp
            t.cuda.synchronize()
            timestemp = time.time()
            with t.no_grad():
                y_gen_linear = vocoder.reverse(z, post_linear_mel_v).squeeze()
            t.cuda.synchronize()
            wav_linear = y_gen_linear.to(t.device("cpu")).data.numpy()
            wav_linear = np.pad(
                wav_linear, [0, 4800], mode='constant',
                constant_values=0)  #pad 0 for 0.21 sec silence at the end

            wav_hifi = hifi_gan(post_linear_mel_v).squeeze().clamp(
                -1, 1).detach().cpu().numpy()
            wav_hifi = np.pad(
                wav_hifi, [0, 4800], mode='constant',
                constant_values=0)  #pad 0 for 0.21 sec silence at the end

        mel_path = os.path.join(hp.sample_path + '_' + str(args.rhythm_scale),
                                'mel')
        if not os.path.exists(mel_path):
            os.makedirs(mel_path)
        np.save(
            os.path.join(
                mel_path,
                'text_{}_ref_{}_synth_{}.mel'.format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            postnet_pred.cpu())

        linear_path = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'linear')
        if not os.path.exists(linear_path):
            os.makedirs(linear_path)
        np.save(
            os.path.join(
                linear_path, 'text_{}_ref_{}_synth_{}.linear'.format(
                    i, ref_fname, str(args.rhythm_scale))), post_linear.cpu())

        wav_path = os.path.join(hp.sample_path + '_' + str(args.rhythm_scale),
                                'wav')
        if not os.path.exists(wav_path):
            os.makedirs(wav_path)
        write(
            os.path.join(
                wav_path,
                "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            hp.sr, wav)
        print("rtx : ", (len(wav) / hp.sr) / inf_time)

        wav_linear_path = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'wav_linear')
        if not os.path.exists(wav_linear_path):
            os.makedirs(wav_linear_path)
        write(
            os.path.join(
                wav_linear_path,
                "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            hp.sr, wav_linear)

        wav_hifi_path = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'wav_hifi')
        if not os.path.exists(wav_hifi_path):
            os.makedirs(wav_hifi_path)
        write(
            os.path.join(
                wav_hifi_path,
                "text_{}_ref_{}_synth_{}.wav".format(i, ref_fname,
                                                     str(args.rhythm_scale))),
            hp.sr, wav_hifi)

        show_weights = weights.contiguous().view(weights.size(0), 1, 1,
                                                 weights.size(1),
                                                 weights.size(2))
        attns_enc_new = []
        attns_dec_new = []
        attn_probs_new = []
        attns_style_new = []
        for i in range(len(attns_enc)):
            attns_enc_new.append(attns_enc[i].unsqueeze(0))
            attns_dec_new.append(attns_dec[i].unsqueeze(0))
            attn_probs_new.append(attn_probs[i].unsqueeze(0))
            attns_style_new.append(attns_style[i].unsqueeze(0))
        attns_enc = t.cat(attns_enc_new, 0)
        attns_dec = t.cat(attns_dec_new, 0)
        attn_probs = t.cat(attn_probs_new, 0)
        attns_style = t.cat(attns_style_new, 0)

        attns_enc = attns_enc.contiguous().view(attns_enc.size(0), 1,
                                                hp.n_heads, attns_enc.size(2),
                                                attns_enc.size(3))
        attns_enc = attns_enc.permute(1, 0, 2, 3, 4)
        attns_dec = attns_dec.contiguous().view(attns_dec.size(0), 1,
                                                hp.n_heads, attns_dec.size(2),
                                                attns_dec.size(3))
        attns_dec = attns_dec.permute(1, 0, 2, 3, 4)
        attn_probs = attn_probs.contiguous().view(attn_probs.size(0),
                                                  1, hp.n_heads,
                                                  attn_probs.size(2),
                                                  attn_probs.size(3))
        attn_probs = attn_probs.permute(1, 0, 2, 3, 4)
        attns_style = attns_style.contiguous().view(attns_style.size(0), 1,
                                                    hp.n_heads,
                                                    attns_style.size(2),
                                                    attns_style.size(3))
        attns_style = attns_style.permute(1, 0, 2, 3, 4)

        save_dir = os.path.join(
            hp.sample_path + '_' + str(args.rhythm_scale), 'figure',
            "text_{}_ref_{}_synth_{}.wav".format(fname, ref_fname,
                                                 str(args.rhythm_scale)))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        writer.add_alignments(attns_enc.detach().cpu(),
                              attns_dec.detach().cpu(),
                              attn_probs.detach().cpu(),
                              attns_style.detach().cpu(),
                              show_weights.detach().cpu(),
                              [t.tensor(gen_length).type(t.LongTensor)],
                              text_length, args.restore_step1, 'Inference',
                              save_dir)
Пример #13
0
decay_rate = 0.99  # 学习率的衰减速度
moving_average_decay_rate = 0.99  # 滑动平均衰减率
bottleneck_layer_size = 512  # 最后一层的输出维度
keep_probability = 0.8  # Dropout参数
weight_decay = 5e-5  # L2权重正则化参数
center_loss_alfa = 0.95  # 中心损失的中心更新率
center_loss_factor = 0.5  # 中心损失权重
train_step = tf.Variable(0, trainable=False)  # 当前训练步数
pretrained_model_path = "/home/dc2-user/biyesheji/models_lfw/"  # 之前训练的模型的路径
pretrained_model = False  # 是否有已训练过的模型

if len(os.listdir(pretrained_model_path)) > 0:
    pretrained_model = True
    print("Using pretrained model")

dataset = preprocess.get_dataset(image_path, dataset_type)
image_path_list, label_list = preprocess.create_image_path_list_and_label_list(
    dataset=dataset)

labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
size = array_ops.shape(labels)[0]
index_queue = tf.train.range_input_producer(limit=size,
                                            num_epochs=None,
                                            shuffle=True,
                                            capacity=32)
index_dequeue_op = index_queue.dequeue_many(batch_size * epoch_size)

image_paths_placeholder = tf.placeholder(shape=(None, 1),
                                         dtype=tf.string,
                                         name="image_paths")
labels_placeholder = tf.placeholder(dtype=tf.int32, name="labels")
def main():

    train_dataset = get_dataset(hp.train_data_csv)
    val_dataset = get_dataset(hp.val_data_csv)
    restore_step = hp.restore_step
    global_step = restore_step
    if restore_step != 0:
        restore_flag = True
    else:
        restore_flag = False

    m = Model()
    if os.path.exists('./checkpoints/checkpoint_%s_%d.pth.tar' %
                      ('transformer', global_step)):
        state_dict = t.load('./checkpoints/checkpoint_%s_%d.pth.tar' %
                            ('transformer', global_step))
        new_state_dict = OrderedDict()
        for k, value in state_dict['model'].items():
            key = k[7:]
            new_state_dict[key] = value

        m.load_state_dict(new_state_dict)

    m = nn.DataParallel(m.cuda())
    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = get_writer(hp.checkpoint_path, hp.log_directory)
    cur_epoch = 0

    for epochs in range(hp.epochs):
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=hp.batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn_transformer,
                                      drop_last=True,
                                      num_workers=1)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=hp.batch_size,
                                    shuffle=True,
                                    collate_fn=collate_fn_transformer,
                                    drop_last=True)
        if restore_flag:
            cur_epoch = int(restore_step / len(train_dataloader))
            restore_flag = not restore_flag
        for i, data in enumerate(train_dataloader):
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data
            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            text_length = text_length.cuda()
            mel_length = mel_length.cuda()
            loading_time = time.time()
            mel_pred, postnet_pred, attn_probs, decoder_output, attns_enc, attns_dec, attns_style = m.forward(
                character, mel_input, pos_text, pos_mel, mel, pos_mel)
            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = (mel_loss + post_mel_loss) / hp.accum
            writer.add_losses(mel_loss.item(), post_mel_loss.item(),
                              global_step, 'Train')

            # Calculate gradients
            loss.backward()
            msg = "| Epoch: {}, {}/{}th loss : {:.4f} + {:.4f} = {:.4f}".format(
                cur_epoch, i, len(train_dataloader), mel_loss, post_mel_loss,
                loss)
            stream(msg)

            if global_step % hp.accum == 0:
                nn.utils.clip_grad_norm_(m.parameters(), 1.)
                # Update weights
                optimizer.step()
                optimizer.zero_grad()

            if global_step % hp.val_step == 0 or global_step == 1:
                validate(m, val_dataloader, global_step, writer)

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
        if cur_epoch == hp.stop_epoch:
            break
        cur_epoch += 1
        print(' ')
Пример #15
0
def main():
    if not os.path.exists("logger"):
        os.mkdir("logger")

    dataset = get_dataset()
    global_step = 0

    m = nn.DataParallel(Model().cuda())
    num_param = sum(param.numel() for param in m.parameters())
    print('Number of Transformer-TTS Parameters:', num_param)

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    pos_weight = t.FloatTensor([5.]).cuda()
    # writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=16)
        # pbar = tqdm(dataloader)
        for i, data in enumerate(dataloader):
            # pbar.set_description("Processing at epoch %d"%epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, _ = data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            # print(mel)

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            t_l = loss.item()
            m_l = mel_loss.item()
            m_p_l = post_mel_loss.item()
            # s_l = stop_pred_loss.item()

            with open(os.path.join("logger", "total_loss.txt"),
                      "a") as f_total_loss:
                f_total_loss.write(str(t_l) + "\n")

            with open(os.path.join("logger", "mel_loss.txt"),
                      "a") as f_mel_loss:
                f_mel_loss.write(str(m_l) + "\n")

            with open(os.path.join("logger", "mel_postnet_loss.txt"),
                      "a") as f_mel_postnet_loss:
                f_mel_postnet_loss.write(str(m_p_l) + "\n")

            # with open(os.path.join("logger", "stop_pred_loss.txt"), "a") as f_s_loss:
            #     f_s_loss.write(str(s_l)+"\n")

            # Print
            if global_step % hp.log_step == 0:
                # Now = time.clock()

                str1 = "Epoch [{}/{}], Step [{}], Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f};".format(
                    epoch + 1, hp.epochs, global_step, mel_loss.item(),
                    post_mel_loss.item())
                str2 = "Total Loss: {:.4f}.".format(loss.item())
                current_learning_rate = 0
                for param_group in optimizer.param_groups:
                    current_learning_rate = param_group['lr']
                str3 = "Current Learning Rate is {:.6f}.".format(
                    current_learning_rate)
                # str4 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format(
                #     (Now-Start), (total_step-current_step)*np.mean(Time))

                print("\n" + str1)
                print(str2)
                print(str3)
                # print(str4)

                with open(os.path.join("logger", "logger.txt"),
                          "a") as f_logger:
                    f_logger.write(str1 + "\n")
                    f_logger.write(str2 + "\n")
                    f_logger.write(str3 + "\n")
                    # f_logger.write(str4 + "\n")
                    f_logger.write("\n")

            # writer.add_scalars('training_loss',{
            #         'mel_loss':mel_loss,
            #         'post_mel_loss':post_mel_loss,

            #     }, global_step)

            # writer.add_scalars('alphas',{
            #         'encoder_alpha':m.module.encoder.alpha.data,
            #         'decoder_alpha':m.module.decoder.alpha.data,
            #     }, global_step)

            # if global_step % hp.image_step == 1:

            #     for i, prob in enumerate(attn_probs):

            #         num_h = prob.size(0)
            #         for j in range(4):

            #             x = vutils.make_grid(prob[j*16] * 255)
            #             writer.add_image('Attention_%d_0'%global_step, x, i*4+j)

            #     for i, prob in enumerate(attns_enc):
            #         num_h = prob.size(0)

            #         for j in range(4):

            #             x = vutils.make_grid(prob[j*16] * 255)
            #             writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j)

            #     for i, prob in enumerate(attns_dec):

            #         num_h = prob.size(0)
            #         for j in range(4):

            #             x = vutils.make_grid(prob[j*16] * 255)
            #             writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Пример #16
0
print(G)
D = Discriminator_noSigmoid(in_dim=3).cuda() #channel=3
#D.load_state_dict(torch.load('model/wgangp_d.pth30'))
print(D)
G.train()
D.train()

# loss criterion
criterion = nn.BCELoss()

# optimizer
opt_D = torch.optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))
opt_G = torch.optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))

# dataloader (You might need to edit the dataset path if you use extra dataset.)
dataset = get_dataset(os.path.join(workspace_dir))
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# show one image
#plt.imshow(dataset[10].numpy().transpose(1,2,0))
#plt.show()

# for logging
z_sample = Variable(torch.randn(100, z_dim)).cuda()

# main training loop
for e, epoch in enumerate(range(n_epoch)):
    for i, data in enumerate(dataloader):
        imgs = data
        imgs = imgs.cuda()
        bs = imgs.size(0)
Пример #17
0
                                input_tensor,
                                device,
                                idx2word_hin,
                                max_length=20,
                                bidirectional=False)  #CODE_BlANK_6

        #Joining the predicted output to form the predicted sentence
        output_sentence = ' '.join(output_words)
        print('Predicted Output: ', output_sentence)
        print('')
        if (j == n):
            break


from preprocess import get_dataset
device = torch.device("cpu")
testset, idx2word_en, idx2word_hin = get_dataset(batch_size=1,
                                                 types="val",
                                                 shuffle=False,
                                                 num_workers=1,
                                                 pin_memory=False,
                                                 drop_last=False)
encoder = torch.load("encoder.pt")
encoder = encoder.to(device)
decoder = torch.load("decoder.pt")
decoder = decoder.to(device)
bridge = torch.load("bridge.pt")
bridge = bridge.to(device)
evaluateRandomly(encoder, decoder, bridge, device, testset, idx2word_en,
                 idx2word_hin)
Пример #18
0
def main():

    dataset = get_dataset()
    global_step = 0
    sum_loss = 0
    
    m = nn.DataParallel(Model().cuda()) # TODO:dataparalle
    # m = Model().cuda()

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()
    
    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True, collate_fn=collate_fn_transformer, drop_last=True, num_workers=16)
        pbar = tqdm(dataloader)
        sum_loss = 0
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d"%epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)
                
            character, mel, mel_input, pos_text, pos_mel, _ = data
            
            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)
            
            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            
            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)
            
            loss = mel_loss + post_mel_loss
            
            writer.add_scalars('training_loss',{
                    'mel_loss':mel_loss,
                    'post_mel_loss':post_mel_loss,

                }, global_step)
                
            writer.add_scalars('alphas',{
                    'encoder_alpha':m.module.encoder.alpha.data,
                    'decoder_alpha':m.module.decoder.alpha.data,
                }, global_step)
            
            
            if global_step % hp.image_step == 1:
                
                for i, prob in enumerate(attn_probs):
                    
                    num_h = prob.size(0)
                    for j in range(4):
                
                        x = vutils.make_grid(prob[j*16] * 255)
                        writer.add_image('Attention_%d_0'%global_step, x, i*4+j)
                
                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)
                    
                    for j in range(4):
                
                        x = vutils.make_grid(prob[j*16] * 255)
                        writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j)
            
                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):
                
                        x = vutils.make_grid(prob[j*16] * 255)
                        writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j)
                
            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()
            
            nn.utils.clip_grad_norm_(m.parameters(), 1.)
            
            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save({'model':m.state_dict(),
                                 'optimizer':optimizer.state_dict()},
                                os.path.join(hp.checkpoint_path,'checkpoint_transformer_%d.pth.tar' % global_step))
            sum_loss += loss.item()

        print(f'epoch:{epoch}, sum_loss: {sum_loss / (i + 1)}')
def main():

    dataset = get_dataset(hp.train_data_csv)
    global_step = 0

    m = nn.DataParallel(ModelStopToken().cuda())
    trans_model = Model()
    trans_model.load_state_dict(load_checkpoint(100000, "transformer"))
    for name, param in trans_model.named_parameters():
        param.requires_grad = False
        print(name, " : weight frozen")
    trans_model = nn.DataParallel(trans_model.cuda())

    m.train()
    trans_model.train(False)

    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=8)
        for i, data in enumerate(dataloader):
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, text_length, mel_length, fname = data
            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()
            mel_length = mel_length.cuda()

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1).cuda()
            for j, length in enumerate(mel_length):
                stop_tokens[j, length - 1] += 1

            mel_pred, postnet_pred, attn, decoder_output, _, attn_dec, attn_style = trans_model.forward(
                character, mel_input, pos_text, pos_mel, mel, pos_mel)
            stop_preds = m.forward(decoder_output)

            if global_step % 100 == 0:
                print("pos_mel", pos_mel[0])
                print("stop_pred", t.sigmoid(stop_preds.squeeze()[0]))
                print("stop_tokens", stop_tokens[0])

            mask = get_mask_from_lengths(mel_length)
            stop_preds = stop_preds.squeeze().masked_select(mask)
            stop_tokens = stop_tokens.masked_select(mask)

            loss = nn.BCEWithLogitsLoss(
                pos_weight=t.tensor(hp.bce_pos_weight))(stop_preds,
                                                        stop_tokens)

            print("| Epoch: {}, {}/{}th loss : {:.4f}".format(
                epoch, i, len(dataloader), loss))

            writer.add_scalars('training_loss', {
                'loss': loss,
            }, global_step)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_stop_token_%d.pth.tar' % global_step))

        if epoch == hp.stop_epoch:
            break
Пример #20
0
        D[max_index] = D[max_index] + 1

    return D


if not os.path.exists('BN_alignments'):
    os.mkdir('BN_alignments')
check_point = './BZ_checkpoint/checkpoint_transformer_820000.pth.tar'
para_file = t.load(check_point)

model = nn.DataParallel(Model().cuda())
model.load_state_dict(para_file['model'], map_location={'cuda:5': 'cuda:0'})
model.eval()
for epoch in range(1):

    dataset = get_dataset()
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            collate_fn=collate_fn_transformer,
                            drop_last=False,
                            num_workers=1)
    k = 0
    # pbar = tqdm(dataloader)
    # for i, data in enumerate(pbar):
    for character, mel, mel_input, pos_text, pos_mel, _ in dataloader:
        # pbar.set_description("Processing at epoch %d"%epoch)

        # character, mel, mel_input, pos_text, pos_mel, _ = data

        stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)
Пример #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--step',
                        type=int,
                        help='Global step to restore checkpoint',
                        default=0)
    args = parser.parse_args()

    dataset = get_dataset()

    global_step = args.step

    m = Model().cuda()
    m = nn.DataParallel(m, device_ids=[i for i in range(8)])

    if not os.path.exists(hp.checkpoint_path):
        os.makedirs(hp.checkpoint_path)

    if args.step > 0:
        ckpt_path = os.path.join(
            hp.checkpoint_path,
            'checkpoint_transformer_%d.pth.tar' % global_step)
        ckpt = torch.load(ckpt_path)
        m.load_state_dict(ckpt['model'])

    m.train()
    optimizer = torch.optim.Adam(m.parameters(), lr=hp.lr)

    if args.step > 0:
        optimizer.load_state_dict(ckpt['optimizer'])

    pos_weight = torch.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=16)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, _ = data

            stop_tokens = torch.abs(pos_mel.ne(0).type(torch.float) - 1)

            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                for i, prob in enumerate(attn_probs):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)

                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * 4 + j)

            optimizer.zero_grad()

            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            optimizer.step()

            if global_step % hp.save_step == 0:
                torch.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Пример #22
0
import numpy as np
import torch
from tqdm import tqdm
import json
from model.Bilstm import BiLSTM
from torch import nn
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from preprocess import get_iterator,get_dataset
from configs import *

train,val = get_dataset()


num_epochs = 50
batch_size= 32

vocab_size = len(train.fields['src'].vocab.stoi)
pos_vocab_size = len(train.fields['pos'].vocab.stoi)
output_dim = len(train.fields['tgt'].vocab.stoi)
'''
vocab_set = {
    'word2idx' : {},
    'pos2idx' : {},
    'tag2idx' : {},
    'idx2tag' : {}
    }

vocab_set['word2idx'] = dict(train.fields['src'].vocab.stoi)
vocab_set['pos2idx'] = dict(train.fields['pos'].vocab.stoi)
vocab_set['tag2idx'] = dict(train.fields['tgt'].vocab.stoi)
def main():

    dataset = get_dataset()
    global_step = 0
    # inference: https://blog.csdn.net/weixin_40087578/article/details/87186613
    m = nn.DataParallel(
        Model().cuda())  # 将data分配给多GPU,默认用0号卡训练。如使用多卡,需提前指定device编号并设置环境变量

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)  # Adam

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=16)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer,
                                     global_step)  # 调整学习率。但对Adam来说,似乎没什么必要。
            # pos_text和pos_mel是全局排序。
            character, mel, mel_input, pos_text, pos_mel, _ = data  #取data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            character = character.cuda()  #data拷贝至GPU
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)
            # 这里的stop_token原本是用来标记音频结尾的符号。但代码作者表示,按原文加上loss会使模型不收敛。后续生成的 时候也只能凭借经验值确定生成长度。
            mel_loss = nn.L1Loss()(mel_pred, mel)  # L1 loss
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                for i, prob in enumerate(attn_probs):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)

                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * 4 + j)

            optimizer.zero_grad()  # 手动清零梯度数组,方便下次计算。
            # Calculate gradients
            loss.backward()  # BP

            nn.utils.clip_grad_norm_(m.parameters(), 1.)  # 梯度裁剪

            # Update weights 更新权重。
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))
Пример #24
0
def main(args):

    dataset = get_dataset()
    global_step = args.restore_step

    m = nn.DataParallel(Model().cuda())
    # # print(type(m.module))
    # for block in m.module:
    #     for each in block.parameters():
    #         print(each.reqiures_grad)
    # for paras in m.parameters():
    #     print(paras.size(), paras.requires_grad)

    m.train()
    optimizer = t.optim.Adam(m.parameters(), lr=hp.lr)

    # print(os.path.join(
    #         hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % args.restore_step))
    try:
        print(
            os.path.join(
                hp.checkpoint_path,
                'checkpoint_transformer_%d.pth.tar' % args.restore_step))
        checkpoint = torch.load(
            os.path.join(
                hp.checkpoint_path,
                'checkpoint_transformer_%d.pth.tar' % args.restore_step))
        m.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step %d---\n" % args.restore_step)
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(hp.checkpoint_path):
            os.mkdir(hp.checkpoint_path)

    pos_weight = t.FloatTensor([5.]).cuda()
    writer = SummaryWriter()

    for epoch in range(args.start_epoch, hp.epochs):

        dataloader = DataLoader(dataset,
                                batch_size=hp.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn_transformer,
                                drop_last=True,
                                num_workers=0)
        pbar = tqdm(dataloader)
        for i, data in enumerate(pbar):
            pbar.set_description("Processing at epoch %d" % epoch)
            global_step += 1
            if global_step < 400000:
                adjust_learning_rate(optimizer, global_step)

            character, mel, mel_input, pos_text, pos_mel, _ = data

            stop_tokens = t.abs(pos_mel.ne(0).type(t.float) - 1)

            character = character.cuda()
            mel = mel.cuda()
            mel_input = mel_input.cuda()
            pos_text = pos_text.cuda()
            pos_mel = pos_mel.cuda()

            mel_pred, postnet_pred, attn_probs, stop_preds, attns_enc, attns_dec = m.forward(
                character, mel_input, pos_text, pos_mel)

            mel_loss = nn.L1Loss()(mel_pred, mel)
            post_mel_loss = nn.L1Loss()(postnet_pred, mel)

            loss = mel_loss + post_mel_loss

            writer.add_scalars('training_loss', {
                'mel_loss': mel_loss,
                'post_mel_loss': post_mel_loss,
            }, global_step)

            writer.add_scalars(
                'alphas', {
                    'encoder_alpha': m.module.encoder.alpha.data,
                    'decoder_alpha': m.module.decoder.alpha.data,
                }, global_step)

            if global_step % hp.image_step == 1:

                for i, prob in enumerate(attn_probs):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_enc):
                    num_h = prob.size(0)

                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_enc_%d_0' % global_step, x,
                                         i * 4 + j)

                for i, prob in enumerate(attns_dec):

                    num_h = prob.size(0)
                    for j in range(4):

                        x = vutils.make_grid(prob[j * 16] * 255)
                        writer.add_image('Attention_dec_%d_0' % global_step, x,
                                         i * 4 + j)

            optimizer.zero_grad()
            # Calculate gradients
            loss.backward()

            nn.utils.clip_grad_norm_(m.parameters(), 1.)

            # Update weights
            optimizer.step()

            if global_step % hp.save_step == 0:
                t.save(
                    {
                        'model': m.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(
                        hp.checkpoint_path,
                        'checkpoint_transformer_%d.pth.tar' % global_step))