示例#1
0
    def __init__(self, in_channels=13, len_max_seq=100,
            d_word_vec=512, d_model=512, d_inner=2048,
            n_layers=6, n_head=8, d_k=64, d_v=64,
            dropout=0.2, nclasses=6):

        super(TransformerEncoder, self).__init__()

        self.d_model = 512

        self.inlayernorm = nn.LayerNorm(in_channels)
        self.convlayernorm = nn.LayerNorm(d_model)
        self.outlayernorm = nn.LayerNorm(d_model)

        self.inconv = torch.nn.Conv1d(in_channels, d_model, 1)

        self.encoder = Encoder(
            n_src_vocab=None, len_max_seq=len_max_seq,
            d_word_vec=d_word_vec, d_model=d_model, d_inner=d_inner,
            n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v,
            dropout=dropout)

        self.outlinear = nn.Linear(d_model, nclasses, bias=False)

        self.tempmaxpool = nn.MaxPool1d(len_max_seq)

        self.logsoftmax = nn.LogSoftmax(dim=-1)
示例#2
0
    def __init__(self,
                 n_src_vocab,
                 n_max_seq,
                 n_layers=2,
                 n_head=2,
                 d_word_vec=100,
                 d_model=100,
                 d_inner_hid=100,
                 d_k=100,
                 d_v=100,
                 dropout=0.1,
                 proj_share_weight=True):

        super(Decepticon, self).__init__()
        self.encoder = Encoder(n_src_vocab,
                               n_max_seq,
                               n_layers=n_layers,
                               n_head=n_head,
                               d_word_vec=d_word_vec,
                               d_model=d_model,
                               d_inner_hid=d_inner_hid,
                               dropout=dropout)

        assert d_model == d_word_vec, 'To facilitate the residual connections' \
                'the dimensions of all module output shall be the same.'
    def __init__(self,
                 d_word_vec=77,
                 n_layers=3,
                 n_head=1,
                 d_k=16,
                 d_v=16,
                 d_model=77,
                 d_inner=16,
                 dropout=0.1,
                 n_position=200,
                 seq_len=15,
                 con_size=3,
                 days=1,
                 kernel='linear',
                 kernel_size_tcn=3,
                 kernel_size_scn=2):

        super().__init__()
        self.encoder = Encoder(d_word_vec,
                               n_layers,
                               n_head,
                               d_k,
                               d_v,
                               d_model,
                               d_inner,
                               dropout,
                               n_position,
                               kernel=kernel,
                               kernel_size_tcn=kernel_size_tcn,
                               kernel_size_scn=kernel_size_scn)
        self.con1 = nn.Conv1d(d_model, days, con_size)
        self.ff1 = nn.Linear(seq_len - con_size + 1, d_word_vec)
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
示例#4
0
 def __init__(self, use_postnet=True, n_spkers=1):
     super(FastSpeech2, self).__init__()
     
     ### Speaker Embedding Table ###
     self.use_spk_embed = hp.use_spk_embed
     if self.use_spk_embed:
         self.n_spkers = n_spkers
         self.spk_embed_dim = hp.spk_embed_dim
         self.spk_embed_weight_std = hp.spk_embed_weight_std
         self.embed_speakers = Embedding(n_spkers, self.spk_embed_dim, padding_idx=None, std=self.spk_embed_weight_std)
         
     self.use_emo_embed = hp.use_emo_embed
     if self.use_emo_embed:
         self.n_emotes = n_emotes
         self.emo_embed_dim = hp.emo_embed_dim
         self.emo_embed_weight_std = hp.emo_embed_weight_std
         self.embed_emotions = Embedding(n_emotes, self.emo_embed_dim, padding_idx=None, std=self.emo_embed_weight_std)
     
     ### Encoder, Speaker Integrator, Variance Adaptor, Deocder, Postnet ###
     self.encoder = Encoder()
     if self.use_spk_embed:
         self.speaker_integrator = SpeakerIntegrator()
     self.variance_adaptor = VarianceAdaptor()
     self.decoder = Decoder()
     self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)
     self.use_postnet = use_postnet
     if self.use_postnet:
         self.postnet = PostNet()
示例#5
0
    def __init__(self):
        super(FastSpeech, self).__init__()

        self.encoder = Encoder()
        self.length_regulator = LengthRegulator()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_output_size, hp.num_mels)
        self.postnet = PostNet()
示例#6
0
    def __init__(self):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels)
        self.postnet = PostNet()
示例#7
0
    def __init__(self):
        super(FastSpeech, self).__init__()

        self.encoder = Encoder()
        self.length_regulator = LengthRegulator()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_dim, hp.num_mels)
        self.postnet = CBHG(hp.num_mels, K=8, projections=[256, hp.num_mels])
        self.last_linear = Linear(hp.num_mels * 2, hp.num_mels)
示例#8
0
 def __init__(self):
     super(StyleEncoder, self).__init__()
     self.text_encoder = Encoder()
     self.audio_encoder = AudioEncoder()
     self.text_linear_down = nn.Sequential(
         nn.Linear(hp.encoder_hidden, hp.va_neck_hidden_t), nn.ReLU())
     self.speaker_linear_p = nn.Sequential(
         nn.Linear(hp.speaker_embed_dim, hp.va_neck_hidden_p * 2),
         nn.ReLU())
     self.speaker_linear = nn.Sequential(
         nn.Linear(hp.speaker_embed_dim, hp.encoder_hidden), nn.ReLU())
示例#9
0
    def __init__(self,  py_vocab_size,hz_vocab_size=None, use_postnet=True):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder(py_vocab_size, hz_vocab_size = hz_vocab_size)
        self.variance_adaptor = VarianceAdaptor()

        self.decoder = Decoder()
        self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)

        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = UNet(scale=8)
    def __init__(self, use_postnet=True):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = TacotronDuration()

        self.decoder = Decoder()
        self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)

        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()
示例#11
0
    def __init__(self, use_postnet=True):
        super(FastSpeech2, self).__init__()
        
#         self.gst = GST()
        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()

        self.decoder = Decoder()
        
        if hp.vocoder=='WORLD':
#             self.f0_decoder= Decoder()
            self.ap_linear = nn.Linear(hp.decoder_hidden, hp.n_ap_channels)
            self.sp_linear = nn.Linear(hp.decoder_hidden, hp.n_sp_channels)
        else:
            self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)
        
        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()
示例#12
0
    def __init__(self, hparams):
        super().__init__()
        self.hparams = hparams
        hparams = self.hparams  # a['key'] (if so) -> a.key

        self.enc = Encoder(
            n_src_vocab=hparams.vocab_size, len_max_seq=hparams.max_len,
            d_word_vec=hparams.d_model, d_model=hparams.d_model,
            d_inner=hparams.d_inner_hid, d_k=hparams.d_k, d_v=hparams.d_v,
            n_layers=hparams.n_layers, n_head=hparams.n_head,
            dropout=hparams.dropout)

        self.word = nn.Linear(hparams.d_model, hparams.vocab_size, bias=False)
        nn.init.xavier_normal_(self.word.weight)
        self.x_logit_scale = 1.
        if hparams.share_emb_prj_weight:
            self.word.weight = self.enc.src_word_emb.weight
            self.x_logit_scale = (hparams.d_model ** -0.5)

        self.loc = nn.Linear(hparams.d_model, 1)
示例#13
0
    def __init__(self,
                 src_vocab_size,
                 tgt_vocab_size,
                 n_layer=6,
                 d_model=512,
                 d_ff=2048,
                 n_head=8,
                 dropout=0.1):
        super(Transformer, self).__init__()
        self.src_embed = nn.Sequential(Embeddings(d_model, src_vocab_size),
                                       PositionalEncoding(d_model, dropout))
        self.tgt_embed = nn.Sequential(Embeddings(d_model, tgt_vocab_size),
                                       PositionalEncoding(d_model, dropout))
        self.encoder = Encoder(n_head, d_model, d_ff, dropout, n_layer)
        self.decoder = Decoder(n_head, d_model, d_ff, dropout, n_layer)
        self.generator = Generator(d_model, tgt_vocab_size)

        # Initialize parameters with Glorot / fan_avg.
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    def __init__(self, d_src_vec, len_seq, d_emb_vec, n_layers, n_head, d_k,
                 d_v, d_inner, dropout):

        super(model, self).__init__()
        self.d_src_vec = d_src_vec
        self.d_emb_vec = d_emb_vec
        self.len_seq = len_seq
        self.n_layers = n_layers
        self.n_head = n_head
        self.dropout = dropout
        self.d_inner = d_inner

        self.ffn = ffn_compressed(d_in=self.d_src_vec,
                                  d_hid=self.d_inner,
                                  d_out=self.d_emb_vec)

        self.encoder = Encoder(len_seq=self.len_seq,
                               d_word_vec=self.d_emb_vec,
                               n_layers=self.n_layers,
                               n_head=self.n_head,
                               d_k=self.d_emb_vec // self.n_head,
                               d_v=self.d_emb_vec // self.n_head,
                               d_inner=self.d_inner,
                               dropout=self.dropout)

        #Fully connected. Seems to have a lot of params
        #        self.FC1 = nn.Linear(self.d_emb_vec * self.len_seq , 64)
        #        self.FC2 = nn.Linear(64, 8)
        #        self.FC3 = nn.Linear(8, 2)

        #        #Average pooling over features
        #        self.avg_pooling = nn.AvgPool1d(d_emb_vec-1, stride=1)  #d_emb_vec-1: To have 2 classes
        #        self.FC = nn.Linear(len_seq * 2, 2)  #2: binary classification
        #        self.softmax = nn.Softmax(dim=-1)

        #Average pooling over sequence
        self.avg_pooling = nn.AvgPool1d(
            len_seq, stride=1)  #self.len_seq: To have 1 averaged token
        self.FC = nn.Linear(d_emb_vec, 2)  #2: binary classification
        self.softmax = nn.Softmax(dim=-1)
示例#15
0
import torch.nn.functional as F
import torch.optim as optim
from torchtext.data import Field, Dataset, BucketIterator
from torchtext.datasets import TranslationDataset

import transformer.Constants as Constants
from transformer.Layers import EncoderLayer
from transformer.Models import Transformer, Encoder
from transformer.Optim import ScheduledOptim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder_stacks = Encoder(d_model=32,
                         d_inner=64,
                         n_layers=2,
                         n_head=4,
                         d_k=16,
                         d_v=16,
                         dropout=0.1)

criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.SGD(encoder_stacks.parameters(), lr=1)

src = torch.rand(1, 2, 32, requires_grad=True)
tgt = torch.rand(1, 2, 32)

print(src)

encoder_stacks.train()

for i in range(100):
示例#16
0
    def __init__(self,
                 source_dataset,
                 batch_size,
                 epochs,
                 window_size,
                 device,
                 plot_file,
                 train_data,
                 test_data,
                 valid_data,
                 target_column,
                 target_min,
                 target_max,
                 d_inner,
                 n_layers,
                 n_head_,
                 d_k,
                 d_v,
                 n_warmup_steps,
                 criterion,
                 target_name,
                 d_model,
                 model_file=None,
                 load_data=False,
                 load_model=False):
        self.data_frame = self.read_dataset(source_dataset)
        self.batch_size = batch_size
        self.epochs = epochs
        self.device = device
        self.target_column = target_column
        self.window = window_size
        self.plot_file = plot_file
        self.n_layers = n_layers
        self.n_head = n_head_
        self.d_inner = d_inner
        self.warmup_step = n_warmup_steps
        self.d_k = d_k
        self.d_v = d_v
        self.d_model = d_model
        self.target_name = target_name
        self.input_mask = torch.ones([self.batch_size, 1, self.window],
                                     dtype=torch.int,
                                     device=device)
        self.target_max = target_max
        self.target_min = target_min
        self.model_file = model_file
        self.prev_epoch = 0
        if load_data:
            self.train_df = pd.read_csv(train_data)
            self.test_df = pd.read_csv(test_data)
            self.valid_df = pd.read_csv(valid_data)
        else:
            self.train_df, self.valid_df, self.test_df = self.organize_dataset(
                train_data, test_data, valid_data)

        pad_col = [
            'col' + str(i) for i in range(self.train_df.shape[1], self.d_model)
        ]
        for col in pad_col:
            self.train_df[col] = 0
            self.test_df[col] = 0
            self.valid_df[col] = 0
        self.columns = self.train_df.shape[1]
        self.model = Encoder(n_position=200,
                             d_word_vec=self.columns,
                             d_model=self.columns,
                             d_inner=d_inner,
                             n_layers=n_layers,
                             n_head=n_head_,
                             d_k=d_k,
                             d_v=d_v,
                             dropout=0).to(device)

        if load_model:
            self.model = torch.load(self.model_file)['model']
            self.model.eval()
            self.model = self.model.to(device)
            self.prev_epoch = torch.load(self.model_file)['epoch']

        self.criterion = criterion
        self.optimizer = ScheduledOptim(
            optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09),
            2.0,
            self.columns,
            n_warmup_steps,
            n_step=self.prev_epoch * (math.floor(
                self.train_df.shape[0] / self.window * self.batch_size)))
        self.loss_list = []
        self.lr_list = []
示例#17
0
class Dataset:
    def __init__(self,
                 source_dataset,
                 batch_size,
                 epochs,
                 window_size,
                 device,
                 plot_file,
                 train_data,
                 test_data,
                 valid_data,
                 target_column,
                 target_min,
                 target_max,
                 d_inner,
                 n_layers,
                 n_head_,
                 d_k,
                 d_v,
                 n_warmup_steps,
                 criterion,
                 target_name,
                 d_model,
                 model_file=None,
                 load_data=False,
                 load_model=False):
        self.data_frame = self.read_dataset(source_dataset)
        self.batch_size = batch_size
        self.epochs = epochs
        self.device = device
        self.target_column = target_column
        self.window = window_size
        self.plot_file = plot_file
        self.n_layers = n_layers
        self.n_head = n_head_
        self.d_inner = d_inner
        self.warmup_step = n_warmup_steps
        self.d_k = d_k
        self.d_v = d_v
        self.d_model = d_model
        self.target_name = target_name
        self.input_mask = torch.ones([self.batch_size, 1, self.window],
                                     dtype=torch.int,
                                     device=device)
        self.target_max = target_max
        self.target_min = target_min
        self.model_file = model_file
        self.prev_epoch = 0
        if load_data:
            self.train_df = pd.read_csv(train_data)
            self.test_df = pd.read_csv(test_data)
            self.valid_df = pd.read_csv(valid_data)
        else:
            self.train_df, self.valid_df, self.test_df = self.organize_dataset(
                train_data, test_data, valid_data)

        pad_col = [
            'col' + str(i) for i in range(self.train_df.shape[1], self.d_model)
        ]
        for col in pad_col:
            self.train_df[col] = 0
            self.test_df[col] = 0
            self.valid_df[col] = 0
        self.columns = self.train_df.shape[1]
        self.model = Encoder(n_position=200,
                             d_word_vec=self.columns,
                             d_model=self.columns,
                             d_inner=d_inner,
                             n_layers=n_layers,
                             n_head=n_head_,
                             d_k=d_k,
                             d_v=d_v,
                             dropout=0).to(device)

        if load_model:
            self.model = torch.load(self.model_file)['model']
            self.model.eval()
            self.model = self.model.to(device)
            self.prev_epoch = torch.load(self.model_file)['epoch']

        self.criterion = criterion
        self.optimizer = ScheduledOptim(
            optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09),
            2.0,
            self.columns,
            n_warmup_steps,
            n_step=self.prev_epoch * (math.floor(
                self.train_df.shape[0] / self.window * self.batch_size)))
        self.loss_list = []
        self.lr_list = []

    def read_dataset(self, source_dataset):
        return pd.read_csv(source_dataset)

    def organize_dataset(self, train_data, test_data, valid_data):
        train_df = self.data_frame
        valid_df = self.data_frame
        test_df = self.data_frame
        return train_df, valid_df, test_df

    def train(self):
        train_tensor = torch.tensor(self.train_df.values,
                                    dtype=torch.float,
                                    device=self.device)
        train_rows = self.train_df.shape[0]
        section_size = self.window * self.batch_size
        avg_loss = 0
        for i in range(self.epochs):
            chosen_idx = np.random.choice(train_rows,
                                          replace=True,
                                          size=math.floor(train_rows / 10))
            imputing_df = self.train_df.copy()
            imputing_df.iloc[[j in chosen_idx for j in range(train_rows)],
                             self.target_column] = 0
            imputing_tensor = torch.tensor(imputing_df.values,
                                           dtype=torch.float,
                                           device=self.device)

            avg_loss = 0
            lr = 0

            for j in range(math.floor(train_rows / section_size)):
                batch_imputing_tensor = imputing_tensor[j *
                                                        section_size:(j + 1) *
                                                        section_size, :]
                batch_train_tensor = train_tensor[j * section_size:(j + 1) *
                                                  section_size, :]

                input_tensor = self.unsqueeze(batch_imputing_tensor)

                self.optimizer.zero_grad()

                imputed_tensor = self.squeeze(
                    self.model(input_tensor, self.input_mask)[0])

                imputing_idx = [
                    k in chosen_idx
                    for k in range(j * section_size, (j + 1) * section_size)
                ]
                imputing_idx_tensor = torch.tensor(imputing_idx)

                imputed_label_tensor = imputed_tensor[imputing_idx_tensor,
                                                      self.target_column]
                true_label_tensor = batch_train_tensor[imputing_idx_tensor,
                                                       self.target_column]

                loss = torch.sqrt(
                    self.criterion(imputed_label_tensor, true_label_tensor))
                # loss = self.criterion(imputed_label_tensor, true_label_tensor)

                if imputed_label_tensor.shape[0] > 0:

                    loss.backward()  #here compute engine
                    lr = self.optimizer.step_and_update_lr()

                    avg_loss = (j * avg_loss + loss) / (j + 1)

            self.loss_list.append(avg_loss *
                                  (self.target_max - self.target_min))
            self.lr_list.append(10000 * lr)

            self.save_model(i)

            print(avg_loss * (self.target_max - self.target_min))

        self.draw_plots(avg_loss * (self.target_max - self.target_min))

    def validate(self):
        valid_tensor = torch.tensor(self.valid_df.values,
                                    dtype=torch.float,
                                    device=self.device)
        valid_rows = self.valid_df.shape[0]
        section_size = self.window * self.batch_size

        chosen_idx = np.random.choice(valid_rows,
                                      replace=True,
                                      size=math.floor(valid_rows / 10))
        imputing_df = self.valid_df.copy()
        imputing_df.iloc[[j in chosen_idx for j in range(valid_rows)],
                         self.target_column] = 0
        imputing_tensor = torch.tensor(imputing_df.values,
                                       dtype=torch.float,
                                       device=self.device)
        avg_loss = 0

        imputed_list = []

        for j in range(math.floor(valid_rows / section_size)):
            batch_imputing_tensor = imputing_tensor[j * section_size:(j + 1) *
                                                    section_size, :]
            batch_valid_tensor = valid_tensor[j * section_size:(j + 1) *
                                              section_size, :]

            input_tensor = self.unsqueeze(batch_imputing_tensor)

            imputed_tensor = self.squeeze(
                self.model(input_tensor, self.input_mask)[0])

            imputing_idx = [
                k in chosen_idx
                for k in range(j * section_size, (j + 1) * section_size)
            ]
            imputing_idx_tensor = torch.tensor(imputing_idx)

            imputed_label_tensor = imputed_tensor[imputing_idx_tensor,
                                                  self.target_column]
            true_label_tensor = batch_valid_tensor[imputing_idx_tensor,
                                                   self.target_column]

            imputed_list = imputed_list + imputed_tensor[:, self.
                                                         target_column].tolist(
                                                         )

            # loss = torch.sqrt(self.criterion(imputed_label_tensor, true_label_tensor))
            loss = self.criterion(imputed_label_tensor, true_label_tensor)

            if imputed_label_tensor.shape[0] > 0:
                avg_loss = (j * avg_loss + loss) / (j + 1)

        print(avg_loss * (self.target_max - self.target_min))

        valid_list = valid_tensor[:, self.target_column].tolist()
        imputed_list = [(imputed_list[i] * (i in chosen_idx) + valid_list[i] *
                         (i not in chosen_idx))
                        for i in range(len(imputed_list))]

        plt.plot(imputed_list, 'r', label="Imputed")
        plt.plot(valid_list, 'b', label="True")
        plt.legend(loc="upper right")
        plt.show()

    def unsqueeze(self, batch_tensor):
        temp_tensor = torch.zeros((self.batch_size, self.window, self.columns),
                                  dtype=torch.float,
                                  device=self.device)
        for i in range(self.batch_size):
            temp_tensor[i, :, :] = batch_tensor[i * self.window:(i + 1) *
                                                self.window, :]
        return temp_tensor

    def squeeze(self, predict_tensor):
        temp_tensor = torch.zeros(
            (self.batch_size * self.window, self.columns),
            dtype=torch.float,
            device=self.device)
        for i in range(self.batch_size):
            temp_tensor[i * self.window:(i + 1) *
                        self.window, :] = predict_tensor[i, :, :]
        return temp_tensor

    def draw_plots(self, avg_loss):
        plt.plot(self.loss_list, 'r', label="Loss")
        plt.plot(self.lr_list, 'b', label="10000 * Learning Rate")
        title = 'n_layers: ' + str(self.n_layers) + '\n' + 'n_heads: ' + str(
            self.n_head
        ) + '\n' + 'd_inner: ' + str(
            self.d_inner
        ) + '\n' + 'warmup_step: ' + str(
            self.warmup_step
        ) + '\n' + 'd_v: ' + str(self.d_v) + '\n' + 'd_k: ' + str(
            self.d_k
        ) + '\n' + 'd_model: ' + str(self.d_model) + '\n' + 'window: ' + str(
            self.window
        ) + '\n' + 'target_column: ' + self.target_name + '\n' + 'Loss_function: ' + str(
            self.criterion) + '\n' + 'avg_loss: ' + str(float(avg_loss.data))
        plt.legend(loc="upper right", title=title)
        timestr = time.strftime("%Y%m%d-%H%M%S")
        plt.savefig(self.plot_file + timestr, quality=90)

    def save_model(self, epoch):
        checkpoint = {
            'epoch': epoch,
            'lr_list': self.lr_list,
            'loss_list': self.loss_list,
            'model': self.model
        }
        if self.model_file:
            torch.save(checkpoint, self.model_file)
示例#18
0
class TransformerEncoder(torch.nn.Module):
    def __init__(self, in_channels=13, len_max_seq=100,
            d_word_vec=512, d_model=512, d_inner=2048,
            n_layers=6, n_head=8, d_k=64, d_v=64,
            dropout=0.2, nclasses=6):

        super(TransformerEncoder, self).__init__()

        self.d_model = 512

        self.inlayernorm = nn.LayerNorm(in_channels)
        self.convlayernorm = nn.LayerNorm(d_model)
        self.outlayernorm = nn.LayerNorm(d_model)

        self.inconv = torch.nn.Conv1d(in_channels, d_model, 1)

        self.encoder = Encoder(
            n_src_vocab=None, len_max_seq=len_max_seq,
            d_word_vec=d_word_vec, d_model=d_model, d_inner=d_inner,
            n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v,
            dropout=dropout)

        self.outlinear = nn.Linear(d_model, nclasses, bias=False)

        self.tempmaxpool = nn.MaxPool1d(len_max_seq)

        self.logsoftmax = nn.LogSoftmax(dim=-1)

    def _logits(self, x):
        # b,d,t - > b,t,d
        x = x.transpose(1,2)

        x = self.inlayernorm(x)

        # b,
        x = self.inconv(x.transpose(1,2)).transpose(1,2)

        x = self.convlayernorm(x)

        batchsize, seq, d = x.shape
        src_pos = torch.arange(1, seq + 1, dtype=torch.long).expand(batchsize, seq)

        if torch.cuda.is_available():
            src_pos = src_pos.cuda()

        enc_output, enc_slf_attn_list = self.encoder.forward(src_seq=x, src_pos=src_pos, return_attns=True)

        enc_output = self.outlayernorm(enc_output)

        enc_output = self.tempmaxpool(enc_output.transpose(1, 2)).squeeze(-1)

        logits = self.outlinear(enc_output)

        return logits

    def forward(self, x):

        logits = self._logits(x)

        logprobabilities = self.logsoftmax(logits)

        return logprobabilities

    def save(self, path="model.pth", **kwargs):
        print("\nsaving model to "+path)
        model_state = self.state_dict()
        os.makedirs(os.path.dirname(path), exist_ok=True)
        torch.save(dict(model_state=model_state,**kwargs),path)

    def load(self, path):
        print("loading model from "+path)
        snapshot = torch.load(path, map_location="cpu")
        model_state = snapshot.pop('model_state', snapshot)
        self.load_state_dict(model_state)
        return snapshot
示例#19
0
            return mel_output, mel_output_postnet, duration_predictor_output
        else:
            length_regulator_output, decoder_pos = self.length_regulator(
                encoder_output, encoder_mask, alpha=alpha)

            decoder_output = self.decoder(length_regulator_output, decoder_pos)

            mel_output = self.mel_linear(decoder_output)
            mel_output_postnet = self.postnet(mel_output) + mel_output

            return mel_output, mel_output_postnet


if __name__ == "__main__":
    # Test
    test_encoder = Encoder()
    test_decoder = Decoder()
    # print(test_encoder)
    # print(test_decoder)

    test_src = torch.stack([
        torch.Tensor([1, 2, 4, 3, 2, 5, 0, 0]),
        torch.Tensor([3, 4, 2, 6, 7, 1, 2, 3])
    ]).long()
    test_pos = torch.stack([
        torch.Tensor([1, 2, 3, 4, 5, 6, 0, 0]),
        torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8])
    ]).long()
    test_target = torch.stack([
        torch.Tensor([0, 2, 3, 0, 3, 2, 1, 0]),
        torch.Tensor([1, 2, 3, 2, 2, 0, 3, 6])