def __init__(self, in_channels=13, len_max_seq=100, d_word_vec=512, d_model=512, d_inner=2048, n_layers=6, n_head=8, d_k=64, d_v=64, dropout=0.2, nclasses=6): super(TransformerEncoder, self).__init__() self.d_model = 512 self.inlayernorm = nn.LayerNorm(in_channels) self.convlayernorm = nn.LayerNorm(d_model) self.outlayernorm = nn.LayerNorm(d_model) self.inconv = torch.nn.Conv1d(in_channels, d_model, 1) self.encoder = Encoder( n_src_vocab=None, len_max_seq=len_max_seq, d_word_vec=d_word_vec, d_model=d_model, d_inner=d_inner, n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v, dropout=dropout) self.outlinear = nn.Linear(d_model, nclasses, bias=False) self.tempmaxpool = nn.MaxPool1d(len_max_seq) self.logsoftmax = nn.LogSoftmax(dim=-1)
def __init__(self, n_src_vocab, n_max_seq, n_layers=2, n_head=2, d_word_vec=100, d_model=100, d_inner_hid=100, d_k=100, d_v=100, dropout=0.1, proj_share_weight=True): super(Decepticon, self).__init__() self.encoder = Encoder(n_src_vocab, n_max_seq, n_layers=n_layers, n_head=n_head, d_word_vec=d_word_vec, d_model=d_model, d_inner_hid=d_inner_hid, dropout=dropout) assert d_model == d_word_vec, 'To facilitate the residual connections' \ 'the dimensions of all module output shall be the same.'
def __init__(self, d_word_vec=77, n_layers=3, n_head=1, d_k=16, d_v=16, d_model=77, d_inner=16, dropout=0.1, n_position=200, seq_len=15, con_size=3, days=1, kernel='linear', kernel_size_tcn=3, kernel_size_scn=2): super().__init__() self.encoder = Encoder(d_word_vec, n_layers, n_head, d_k, d_v, d_model, d_inner, dropout, n_position, kernel=kernel, kernel_size_tcn=kernel_size_tcn, kernel_size_scn=kernel_size_scn) self.con1 = nn.Conv1d(d_model, days, con_size) self.ff1 = nn.Linear(seq_len - con_size + 1, d_word_vec) for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__(self, use_postnet=True, n_spkers=1): super(FastSpeech2, self).__init__() ### Speaker Embedding Table ### self.use_spk_embed = hp.use_spk_embed if self.use_spk_embed: self.n_spkers = n_spkers self.spk_embed_dim = hp.spk_embed_dim self.spk_embed_weight_std = hp.spk_embed_weight_std self.embed_speakers = Embedding(n_spkers, self.spk_embed_dim, padding_idx=None, std=self.spk_embed_weight_std) self.use_emo_embed = hp.use_emo_embed if self.use_emo_embed: self.n_emotes = n_emotes self.emo_embed_dim = hp.emo_embed_dim self.emo_embed_weight_std = hp.emo_embed_weight_std self.embed_emotions = Embedding(n_emotes, self.emo_embed_dim, padding_idx=None, std=self.emo_embed_weight_std) ### Encoder, Speaker Integrator, Variance Adaptor, Deocder, Postnet ### self.encoder = Encoder() if self.use_spk_embed: self.speaker_integrator = SpeakerIntegrator() self.variance_adaptor = VarianceAdaptor() self.decoder = Decoder() self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels) self.use_postnet = use_postnet if self.use_postnet: self.postnet = PostNet()
def __init__(self): super(FastSpeech, self).__init__() self.encoder = Encoder() self.length_regulator = LengthRegulator() self.decoder = Decoder() self.mel_linear = Linear(hp.decoder_output_size, hp.num_mels) self.postnet = PostNet()
def __init__(self): super(FastSpeech2, self).__init__() self.encoder = Encoder() self.variance_adaptor = VarianceAdaptor() self.decoder = Decoder() self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels) self.postnet = PostNet()
def __init__(self): super(FastSpeech, self).__init__() self.encoder = Encoder() self.length_regulator = LengthRegulator() self.decoder = Decoder() self.mel_linear = Linear(hp.decoder_dim, hp.num_mels) self.postnet = CBHG(hp.num_mels, K=8, projections=[256, hp.num_mels]) self.last_linear = Linear(hp.num_mels * 2, hp.num_mels)
def __init__(self): super(StyleEncoder, self).__init__() self.text_encoder = Encoder() self.audio_encoder = AudioEncoder() self.text_linear_down = nn.Sequential( nn.Linear(hp.encoder_hidden, hp.va_neck_hidden_t), nn.ReLU()) self.speaker_linear_p = nn.Sequential( nn.Linear(hp.speaker_embed_dim, hp.va_neck_hidden_p * 2), nn.ReLU()) self.speaker_linear = nn.Sequential( nn.Linear(hp.speaker_embed_dim, hp.encoder_hidden), nn.ReLU())
def __init__(self, py_vocab_size,hz_vocab_size=None, use_postnet=True): super(FastSpeech2, self).__init__() self.encoder = Encoder(py_vocab_size, hz_vocab_size = hz_vocab_size) self.variance_adaptor = VarianceAdaptor() self.decoder = Decoder() self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels) self.use_postnet = use_postnet if self.use_postnet: self.postnet = UNet(scale=8)
def __init__(self, use_postnet=True): super(FastSpeech2, self).__init__() self.encoder = Encoder() self.variance_adaptor = TacotronDuration() self.decoder = Decoder() self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels) self.use_postnet = use_postnet if self.use_postnet: self.postnet = PostNet()
def __init__(self, use_postnet=True): super(FastSpeech2, self).__init__() # self.gst = GST() self.encoder = Encoder() self.variance_adaptor = VarianceAdaptor() self.decoder = Decoder() if hp.vocoder=='WORLD': # self.f0_decoder= Decoder() self.ap_linear = nn.Linear(hp.decoder_hidden, hp.n_ap_channels) self.sp_linear = nn.Linear(hp.decoder_hidden, hp.n_sp_channels) else: self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels) self.use_postnet = use_postnet if self.use_postnet: self.postnet = PostNet()
def __init__(self, hparams): super().__init__() self.hparams = hparams hparams = self.hparams # a['key'] (if so) -> a.key self.enc = Encoder( n_src_vocab=hparams.vocab_size, len_max_seq=hparams.max_len, d_word_vec=hparams.d_model, d_model=hparams.d_model, d_inner=hparams.d_inner_hid, d_k=hparams.d_k, d_v=hparams.d_v, n_layers=hparams.n_layers, n_head=hparams.n_head, dropout=hparams.dropout) self.word = nn.Linear(hparams.d_model, hparams.vocab_size, bias=False) nn.init.xavier_normal_(self.word.weight) self.x_logit_scale = 1. if hparams.share_emb_prj_weight: self.word.weight = self.enc.src_word_emb.weight self.x_logit_scale = (hparams.d_model ** -0.5) self.loc = nn.Linear(hparams.d_model, 1)
def __init__(self, src_vocab_size, tgt_vocab_size, n_layer=6, d_model=512, d_ff=2048, n_head=8, dropout=0.1): super(Transformer, self).__init__() self.src_embed = nn.Sequential(Embeddings(d_model, src_vocab_size), PositionalEncoding(d_model, dropout)) self.tgt_embed = nn.Sequential(Embeddings(d_model, tgt_vocab_size), PositionalEncoding(d_model, dropout)) self.encoder = Encoder(n_head, d_model, d_ff, dropout, n_layer) self.decoder = Decoder(n_head, d_model, d_ff, dropout, n_layer) self.generator = Generator(d_model, tgt_vocab_size) # Initialize parameters with Glorot / fan_avg. for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p)
def __init__(self, d_src_vec, len_seq, d_emb_vec, n_layers, n_head, d_k, d_v, d_inner, dropout): super(model, self).__init__() self.d_src_vec = d_src_vec self.d_emb_vec = d_emb_vec self.len_seq = len_seq self.n_layers = n_layers self.n_head = n_head self.dropout = dropout self.d_inner = d_inner self.ffn = ffn_compressed(d_in=self.d_src_vec, d_hid=self.d_inner, d_out=self.d_emb_vec) self.encoder = Encoder(len_seq=self.len_seq, d_word_vec=self.d_emb_vec, n_layers=self.n_layers, n_head=self.n_head, d_k=self.d_emb_vec // self.n_head, d_v=self.d_emb_vec // self.n_head, d_inner=self.d_inner, dropout=self.dropout) #Fully connected. Seems to have a lot of params # self.FC1 = nn.Linear(self.d_emb_vec * self.len_seq , 64) # self.FC2 = nn.Linear(64, 8) # self.FC3 = nn.Linear(8, 2) # #Average pooling over features # self.avg_pooling = nn.AvgPool1d(d_emb_vec-1, stride=1) #d_emb_vec-1: To have 2 classes # self.FC = nn.Linear(len_seq * 2, 2) #2: binary classification # self.softmax = nn.Softmax(dim=-1) #Average pooling over sequence self.avg_pooling = nn.AvgPool1d( len_seq, stride=1) #self.len_seq: To have 1 averaged token self.FC = nn.Linear(d_emb_vec, 2) #2: binary classification self.softmax = nn.Softmax(dim=-1)
import torch.nn.functional as F import torch.optim as optim from torchtext.data import Field, Dataset, BucketIterator from torchtext.datasets import TranslationDataset import transformer.Constants as Constants from transformer.Layers import EncoderLayer from transformer.Models import Transformer, Encoder from transformer.Optim import ScheduledOptim device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder_stacks = Encoder(d_model=32, d_inner=64, n_layers=2, n_head=4, d_k=16, d_v=16, dropout=0.1) criterion = torch.nn.MSELoss().to(device) optimizer = torch.optim.SGD(encoder_stacks.parameters(), lr=1) src = torch.rand(1, 2, 32, requires_grad=True) tgt = torch.rand(1, 2, 32) print(src) encoder_stacks.train() for i in range(100):
def __init__(self, source_dataset, batch_size, epochs, window_size, device, plot_file, train_data, test_data, valid_data, target_column, target_min, target_max, d_inner, n_layers, n_head_, d_k, d_v, n_warmup_steps, criterion, target_name, d_model, model_file=None, load_data=False, load_model=False): self.data_frame = self.read_dataset(source_dataset) self.batch_size = batch_size self.epochs = epochs self.device = device self.target_column = target_column self.window = window_size self.plot_file = plot_file self.n_layers = n_layers self.n_head = n_head_ self.d_inner = d_inner self.warmup_step = n_warmup_steps self.d_k = d_k self.d_v = d_v self.d_model = d_model self.target_name = target_name self.input_mask = torch.ones([self.batch_size, 1, self.window], dtype=torch.int, device=device) self.target_max = target_max self.target_min = target_min self.model_file = model_file self.prev_epoch = 0 if load_data: self.train_df = pd.read_csv(train_data) self.test_df = pd.read_csv(test_data) self.valid_df = pd.read_csv(valid_data) else: self.train_df, self.valid_df, self.test_df = self.organize_dataset( train_data, test_data, valid_data) pad_col = [ 'col' + str(i) for i in range(self.train_df.shape[1], self.d_model) ] for col in pad_col: self.train_df[col] = 0 self.test_df[col] = 0 self.valid_df[col] = 0 self.columns = self.train_df.shape[1] self.model = Encoder(n_position=200, d_word_vec=self.columns, d_model=self.columns, d_inner=d_inner, n_layers=n_layers, n_head=n_head_, d_k=d_k, d_v=d_v, dropout=0).to(device) if load_model: self.model = torch.load(self.model_file)['model'] self.model.eval() self.model = self.model.to(device) self.prev_epoch = torch.load(self.model_file)['epoch'] self.criterion = criterion self.optimizer = ScheduledOptim( optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09), 2.0, self.columns, n_warmup_steps, n_step=self.prev_epoch * (math.floor( self.train_df.shape[0] / self.window * self.batch_size))) self.loss_list = [] self.lr_list = []
class Dataset: def __init__(self, source_dataset, batch_size, epochs, window_size, device, plot_file, train_data, test_data, valid_data, target_column, target_min, target_max, d_inner, n_layers, n_head_, d_k, d_v, n_warmup_steps, criterion, target_name, d_model, model_file=None, load_data=False, load_model=False): self.data_frame = self.read_dataset(source_dataset) self.batch_size = batch_size self.epochs = epochs self.device = device self.target_column = target_column self.window = window_size self.plot_file = plot_file self.n_layers = n_layers self.n_head = n_head_ self.d_inner = d_inner self.warmup_step = n_warmup_steps self.d_k = d_k self.d_v = d_v self.d_model = d_model self.target_name = target_name self.input_mask = torch.ones([self.batch_size, 1, self.window], dtype=torch.int, device=device) self.target_max = target_max self.target_min = target_min self.model_file = model_file self.prev_epoch = 0 if load_data: self.train_df = pd.read_csv(train_data) self.test_df = pd.read_csv(test_data) self.valid_df = pd.read_csv(valid_data) else: self.train_df, self.valid_df, self.test_df = self.organize_dataset( train_data, test_data, valid_data) pad_col = [ 'col' + str(i) for i in range(self.train_df.shape[1], self.d_model) ] for col in pad_col: self.train_df[col] = 0 self.test_df[col] = 0 self.valid_df[col] = 0 self.columns = self.train_df.shape[1] self.model = Encoder(n_position=200, d_word_vec=self.columns, d_model=self.columns, d_inner=d_inner, n_layers=n_layers, n_head=n_head_, d_k=d_k, d_v=d_v, dropout=0).to(device) if load_model: self.model = torch.load(self.model_file)['model'] self.model.eval() self.model = self.model.to(device) self.prev_epoch = torch.load(self.model_file)['epoch'] self.criterion = criterion self.optimizer = ScheduledOptim( optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09), 2.0, self.columns, n_warmup_steps, n_step=self.prev_epoch * (math.floor( self.train_df.shape[0] / self.window * self.batch_size))) self.loss_list = [] self.lr_list = [] def read_dataset(self, source_dataset): return pd.read_csv(source_dataset) def organize_dataset(self, train_data, test_data, valid_data): train_df = self.data_frame valid_df = self.data_frame test_df = self.data_frame return train_df, valid_df, test_df def train(self): train_tensor = torch.tensor(self.train_df.values, dtype=torch.float, device=self.device) train_rows = self.train_df.shape[0] section_size = self.window * self.batch_size avg_loss = 0 for i in range(self.epochs): chosen_idx = np.random.choice(train_rows, replace=True, size=math.floor(train_rows / 10)) imputing_df = self.train_df.copy() imputing_df.iloc[[j in chosen_idx for j in range(train_rows)], self.target_column] = 0 imputing_tensor = torch.tensor(imputing_df.values, dtype=torch.float, device=self.device) avg_loss = 0 lr = 0 for j in range(math.floor(train_rows / section_size)): batch_imputing_tensor = imputing_tensor[j * section_size:(j + 1) * section_size, :] batch_train_tensor = train_tensor[j * section_size:(j + 1) * section_size, :] input_tensor = self.unsqueeze(batch_imputing_tensor) self.optimizer.zero_grad() imputed_tensor = self.squeeze( self.model(input_tensor, self.input_mask)[0]) imputing_idx = [ k in chosen_idx for k in range(j * section_size, (j + 1) * section_size) ] imputing_idx_tensor = torch.tensor(imputing_idx) imputed_label_tensor = imputed_tensor[imputing_idx_tensor, self.target_column] true_label_tensor = batch_train_tensor[imputing_idx_tensor, self.target_column] loss = torch.sqrt( self.criterion(imputed_label_tensor, true_label_tensor)) # loss = self.criterion(imputed_label_tensor, true_label_tensor) if imputed_label_tensor.shape[0] > 0: loss.backward() #here compute engine lr = self.optimizer.step_and_update_lr() avg_loss = (j * avg_loss + loss) / (j + 1) self.loss_list.append(avg_loss * (self.target_max - self.target_min)) self.lr_list.append(10000 * lr) self.save_model(i) print(avg_loss * (self.target_max - self.target_min)) self.draw_plots(avg_loss * (self.target_max - self.target_min)) def validate(self): valid_tensor = torch.tensor(self.valid_df.values, dtype=torch.float, device=self.device) valid_rows = self.valid_df.shape[0] section_size = self.window * self.batch_size chosen_idx = np.random.choice(valid_rows, replace=True, size=math.floor(valid_rows / 10)) imputing_df = self.valid_df.copy() imputing_df.iloc[[j in chosen_idx for j in range(valid_rows)], self.target_column] = 0 imputing_tensor = torch.tensor(imputing_df.values, dtype=torch.float, device=self.device) avg_loss = 0 imputed_list = [] for j in range(math.floor(valid_rows / section_size)): batch_imputing_tensor = imputing_tensor[j * section_size:(j + 1) * section_size, :] batch_valid_tensor = valid_tensor[j * section_size:(j + 1) * section_size, :] input_tensor = self.unsqueeze(batch_imputing_tensor) imputed_tensor = self.squeeze( self.model(input_tensor, self.input_mask)[0]) imputing_idx = [ k in chosen_idx for k in range(j * section_size, (j + 1) * section_size) ] imputing_idx_tensor = torch.tensor(imputing_idx) imputed_label_tensor = imputed_tensor[imputing_idx_tensor, self.target_column] true_label_tensor = batch_valid_tensor[imputing_idx_tensor, self.target_column] imputed_list = imputed_list + imputed_tensor[:, self. target_column].tolist( ) # loss = torch.sqrt(self.criterion(imputed_label_tensor, true_label_tensor)) loss = self.criterion(imputed_label_tensor, true_label_tensor) if imputed_label_tensor.shape[0] > 0: avg_loss = (j * avg_loss + loss) / (j + 1) print(avg_loss * (self.target_max - self.target_min)) valid_list = valid_tensor[:, self.target_column].tolist() imputed_list = [(imputed_list[i] * (i in chosen_idx) + valid_list[i] * (i not in chosen_idx)) for i in range(len(imputed_list))] plt.plot(imputed_list, 'r', label="Imputed") plt.plot(valid_list, 'b', label="True") plt.legend(loc="upper right") plt.show() def unsqueeze(self, batch_tensor): temp_tensor = torch.zeros((self.batch_size, self.window, self.columns), dtype=torch.float, device=self.device) for i in range(self.batch_size): temp_tensor[i, :, :] = batch_tensor[i * self.window:(i + 1) * self.window, :] return temp_tensor def squeeze(self, predict_tensor): temp_tensor = torch.zeros( (self.batch_size * self.window, self.columns), dtype=torch.float, device=self.device) for i in range(self.batch_size): temp_tensor[i * self.window:(i + 1) * self.window, :] = predict_tensor[i, :, :] return temp_tensor def draw_plots(self, avg_loss): plt.plot(self.loss_list, 'r', label="Loss") plt.plot(self.lr_list, 'b', label="10000 * Learning Rate") title = 'n_layers: ' + str(self.n_layers) + '\n' + 'n_heads: ' + str( self.n_head ) + '\n' + 'd_inner: ' + str( self.d_inner ) + '\n' + 'warmup_step: ' + str( self.warmup_step ) + '\n' + 'd_v: ' + str(self.d_v) + '\n' + 'd_k: ' + str( self.d_k ) + '\n' + 'd_model: ' + str(self.d_model) + '\n' + 'window: ' + str( self.window ) + '\n' + 'target_column: ' + self.target_name + '\n' + 'Loss_function: ' + str( self.criterion) + '\n' + 'avg_loss: ' + str(float(avg_loss.data)) plt.legend(loc="upper right", title=title) timestr = time.strftime("%Y%m%d-%H%M%S") plt.savefig(self.plot_file + timestr, quality=90) def save_model(self, epoch): checkpoint = { 'epoch': epoch, 'lr_list': self.lr_list, 'loss_list': self.loss_list, 'model': self.model } if self.model_file: torch.save(checkpoint, self.model_file)
class TransformerEncoder(torch.nn.Module): def __init__(self, in_channels=13, len_max_seq=100, d_word_vec=512, d_model=512, d_inner=2048, n_layers=6, n_head=8, d_k=64, d_v=64, dropout=0.2, nclasses=6): super(TransformerEncoder, self).__init__() self.d_model = 512 self.inlayernorm = nn.LayerNorm(in_channels) self.convlayernorm = nn.LayerNorm(d_model) self.outlayernorm = nn.LayerNorm(d_model) self.inconv = torch.nn.Conv1d(in_channels, d_model, 1) self.encoder = Encoder( n_src_vocab=None, len_max_seq=len_max_seq, d_word_vec=d_word_vec, d_model=d_model, d_inner=d_inner, n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v, dropout=dropout) self.outlinear = nn.Linear(d_model, nclasses, bias=False) self.tempmaxpool = nn.MaxPool1d(len_max_seq) self.logsoftmax = nn.LogSoftmax(dim=-1) def _logits(self, x): # b,d,t - > b,t,d x = x.transpose(1,2) x = self.inlayernorm(x) # b, x = self.inconv(x.transpose(1,2)).transpose(1,2) x = self.convlayernorm(x) batchsize, seq, d = x.shape src_pos = torch.arange(1, seq + 1, dtype=torch.long).expand(batchsize, seq) if torch.cuda.is_available(): src_pos = src_pos.cuda() enc_output, enc_slf_attn_list = self.encoder.forward(src_seq=x, src_pos=src_pos, return_attns=True) enc_output = self.outlayernorm(enc_output) enc_output = self.tempmaxpool(enc_output.transpose(1, 2)).squeeze(-1) logits = self.outlinear(enc_output) return logits def forward(self, x): logits = self._logits(x) logprobabilities = self.logsoftmax(logits) return logprobabilities def save(self, path="model.pth", **kwargs): print("\nsaving model to "+path) model_state = self.state_dict() os.makedirs(os.path.dirname(path), exist_ok=True) torch.save(dict(model_state=model_state,**kwargs),path) def load(self, path): print("loading model from "+path) snapshot = torch.load(path, map_location="cpu") model_state = snapshot.pop('model_state', snapshot) self.load_state_dict(model_state) return snapshot
return mel_output, mel_output_postnet, duration_predictor_output else: length_regulator_output, decoder_pos = self.length_regulator( encoder_output, encoder_mask, alpha=alpha) decoder_output = self.decoder(length_regulator_output, decoder_pos) mel_output = self.mel_linear(decoder_output) mel_output_postnet = self.postnet(mel_output) + mel_output return mel_output, mel_output_postnet if __name__ == "__main__": # Test test_encoder = Encoder() test_decoder = Decoder() # print(test_encoder) # print(test_decoder) test_src = torch.stack([ torch.Tensor([1, 2, 4, 3, 2, 5, 0, 0]), torch.Tensor([3, 4, 2, 6, 7, 1, 2, 3]) ]).long() test_pos = torch.stack([ torch.Tensor([1, 2, 3, 4, 5, 6, 0, 0]), torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8]) ]).long() test_target = torch.stack([ torch.Tensor([0, 2, 3, 0, 3, 2, 1, 0]), torch.Tensor([1, 2, 3, 2, 2, 0, 3, 6])