def __init__(self, config): super().__init__() self.encoder_word = Encoder(config, config.src_vocab_size) self.encoder_char = Encoder(config, config.tgt_vocab_size) self.pointer = Pointer(config) self.attention = Luong_Attention(config) self.decoder = Decoder(config) self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size) self.softmax = nn.Softmax(dim=-1) self.s_len = config.s_len self.bos = config.bos
def __init__(self, args): """ Basic initialization of Transformer. Arguments --------- args: <argparse.Namespace> Arguments used for overall process. """ super().__init__() self.args = args self.num_stacks = self.args.num_stacks self.d_model = self.args.d_model self.vocab_size = self.args.vocab_size self.emb = EmbeddingLayer(self.args) encoders = [Encoder(self.args) for _ in range(self.num_stacks)] self.encoder_stack = nn.Sequential(*encoders) decoders = [Decoder(self.args) for _ in range(self.num_stacks)] self.decoder_stack = nn.ModuleList(decoders) self.output_linear = nn.Linear(in_features=self.d_model, out_features=self.vocab_size, bias=False) self.output_linear.weight = self.emb.embedding_layer.weight self.softmax = nn.LogSoftmax(dim=-1) self.dropout = nn.Dropout(p=0.1)
def __init__(self, config): super().__init__() self.encoder = Encoder(config, config.src_vocab_size) self.decoder = Decoder(config) self.bos = config.bos self.s_len = config.s_len self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
def __init__(self, vocabulary_size, sos_token, eos_token, pad_token, max_string_length=default_eda['string_max_length'], attention_size=default_attention['size'], embedding_size=default_embedding['size'], hidden_size=default_gru['hidden_size'], num_layers=default_gru['num_layers'], dropout=default_gru['dropout'], fixed_encoder=None): super().__init__() self.max_string_length = max_string_length self.attention_size = attention_size self.vocabulary_size = vocabulary_size if fixed_encoder: # Fix encoder's weights for p in fixed_encoder.parameters(): p.requires_grad_(False) self.encoder = fixed_encoder else: self.encoder = Encoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout) # self.decoder = Decoder(vocabulary_size) self.decoder = DecoderAndPointer(vocabulary_size, embedding_size, hidden_size, num_layers, dropout, attention_size, pad_token, shift_focus=True) self.sos_token = sos_token self.eos_token = eos_token
def __init__(self, args, text_data): super(ModelGumbel, self).__init__() self.args = args self.text_data = text_data # embedding layer if self.args.pre_embedding and not self.args.elmo: # pre_trained embeddings are 300 dimensional, trainable self.embedding_layer = nn.Embedding.from_pretrained(torch.Tensor( self.text_data.pre_trained_embedding), freeze=False) elif self.args.elmo: self.embedding_layer = Elmo(options_file, weight_file, 1, dropout=1.0 - self.args.drop_out, requires_grad=self.args.train_elmo) else: self.embedding_layer = nn.Embedding( num_embeddings=self.text_data.getVocabularySize(), embedding_dim=self.args.embedding_size) # first generator self.generator = Generator(args=self.args) # then encoder self.encoder = Encoder(args=self.args)
def __init__(self, h_dim, res_h_dim, n_res_layers, n_embeddings, embedding_dim, beta, restart=True): super(VQVAE, self).__init__() # encode image into continuous latent space self.encoder = Encoder(in_dim=256, h_dim=h_dim, n_res_layers=n_res_layers, res_h_dim=res_h_dim) self.pre_quantization_conv = nn.Conv1d(h_dim, embedding_dim, kernel_size=3, stride=1, padding=1) # Define discretization bottleneck if not restart: self.vector_quantization = VectorQuantizer(n_embeddings, embedding_dim, beta) else: self.vector_quantization = VectorQuantizerRandomRestart( n_embeddings, embedding_dim, beta) # decode the discrete latent representation self.decoder = Decoder(embedding_dim, h_dim, n_res_layers, res_h_dim) #E_indices used in sampling, just save last to rep last latent state self.e_indices = None
def __init__(self, channels, h_dim, res_h_dim, n_res_layers, n_embeddings, embedding_dim, beta, save_img_embedding_map=False): super(VQVAE, self).__init__() # encode image into continuous latent space self.encoder = Encoder(channels, h_dim, n_res_layers, res_h_dim) self.pre_quantization_conv = nn.Conv2d(h_dim, embedding_dim, kernel_size=1, stride=1) # pass continuous latent vector through discretization bottleneck self.vector_quantization = VectorQuantizer(n_embeddings, embedding_dim, beta) # decode the discrete latent representation self.decoder = Decoder(channels, embedding_dim, h_dim, n_res_layers, res_h_dim) if save_img_embedding_map: self.img_to_embedding_map = {i: [] for i in range(n_embeddings)} else: self.img_to_embedding_map = None
def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len, factor=5, d_model=512, n_heads=8, e_layers=3, d_layers=2, d_ff=512, dropout=0.0, attn='prob', embed='fixed', data='ETTh', activation='gelu', device=torch.device('cuda:0')): super(Informer, self).__init__() self.pred_len = out_len self.attn = attn # Encoding self.enc_embedding = DataEmbedding(enc_in, d_model, embed, data, dropout) self.dec_embedding = DataEmbedding(dec_in, d_model, embed, data, dropout) # Attention Attn = ProbAttention if attn == 'prob' else FullAttention # Encoder self.encoder = Encoder([ EncoderLayer(AttentionLayer( Attn(False, factor, attention_dropout=dropout), d_model, n_heads), d_model, d_ff, dropout=dropout, activation=activation) for l in range(e_layers) ], [ConvLayer(d_model) for l in range(e_layers - 1)], norm_layer=torch.nn.LayerNorm(d_model)) # Decoder self.decoder = Decoder([ DecoderLayer( AttentionLayer( FullAttention(True, factor, attention_dropout=dropout), d_model, n_heads), AttentionLayer( FullAttention(False, factor, attention_dropout=dropout), d_model, n_heads), d_model, d_ff, dropout=dropout, activation=activation, ) for l in range(d_layers) ], norm_layer=torch.nn.LayerNorm(d_model)) # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True) # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True) self.projection = nn.Linear(d_model, c_out, bias=True)
def _build(self, images): embedder = Embedder() embedded_grads_weights = embedder.embed_all_grads_weights( self._placeholders) # Fake batching embedded_grads_weights = tf.expand_dims(embedded_grads_weights, 0) encoder = Encoder(self._source_num_way, self._target_num_way) encoded = encoder.encode(embedded_grads_weights) decoded = encoder.decode(encoded) # Fake batching decoded = tf.squeeze(decoded, [0]) weight_updates = embedder.unembed_all_weights(decoded) the_list = [tf.nn.moments(w, [0]) for w in weight_updates] mean_means = tf.reduce_mean([tf.reduce_mean(v[0]) for v in the_list]) mean_vars = tf.reduce_mean([tf.reduce_mean(v[1]) for v in the_list]) tf.summary.scalar('weight_updates_mean', mean_means, [META_TRAIN_COMBINED_SUMMARIES]) tf.summary.scalar('weight_updates_var', mean_vars, [META_TRAIN_COMBINED_SUMMARIES]) # Get the updated model new_weights = [ self._placeholders[0][1] + weight_updates[0], self._placeholders[1][1] + weight_updates[1], self._placeholders[2][1] + weight_updates[2], self._placeholders[3][1] + weight_updates[3], self._placeholders[4][1] + weight_updates[4] ] self.outputs = self.new_model_forward(new_weights, images) return self.outputs
def _build_encoder(self): loader = self.loader inputs = self.inputs contextualized_embeddings = None if sum(('elmo' in inputs, 'bert-base' in inputs, 'bert-large' in inputs)) > 1: raise ValueError( 'at most 1 contextualized emebeddings can be chosen') elif 'elmo' in inputs: contextualized_embeddings = ElmoEmbedding(usage='weighted_sum') elif 'bert-base' in inputs: contextualized_embeddings \ = BertBaseEmbedding(usage='second_to_last') elif 'bert-large' in inputs: contextualized_embeddings \ = BertLargeEmbedding(usage='second_to_last') encoder = Encoder( loader.get_embeddings( 'word', normalize=lambda W: W / np.std(W) if loader.use_pretrained_embed and np.std(W) > 0. else W), loader.get_embeddings('pos') if 'postag' in inputs else None, loader.get_embeddings('char') if 'char' in inputs else None, contextualized_embeddings, self.char_feature_size, self.char_pad_id, self.char_window_size, self.char_dropout, self.n_lstm_layers, self.lstm_hidden_size, self.embeddings_dropout, self.lstm_dropout, self.recurrent_dropout, self.bert_model, self.bert_dir) return encoder
def phase_3_train_encoder(params): from io_modules.dataset import Dataset from models.encoder import Encoder from trainers.encoder import Trainer trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') character2int = {} for train_file in trainset.files: from io_modules.dataset import DatasetIO dio = DatasetIO() lab_list = dio.read_lab(train_file + ".txt") for entry in lab_list: if entry.phoneme not in character2int: character2int[entry.phoneme] = len(character2int) sys.stdout.write('Found ' + str(len(character2int)) + ' unique phonemes\n') f = open('data/models/encoder.chars', 'w') for char in character2int: f.write( char.encode('utf-8') + '\t' + str(character2int[char]) + '\n') f.close() encoder = Encoder(params, len(character2int), character2int) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') encoder.load('data/models/rnn_encoder') trainer = Trainer(encoder, trainset, devset) trainer.start_training(10, 1000)
def synthesize(speaker, input_file, output_file, params): print("[Encoding]") from io_modules.dataset import Dataset from io_modules.dataset import Encodings from models.encoder import Encoder from trainers.encoder import Trainer encodings = Encodings() encodings.load('data/models/encoder.encodings') encoder = Encoder(params, encodings, runtime=True) encoder.load('data/models/rnn_encoder') seq = create_lab_input(input_file, speaker) mgc, att = encoder.generate(seq) _render_spectrogram(mgc, output_file + '.png') print("[Vocoding]") from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, runtime=True) vocoder.load('data/models/rnn_vocoder') import time start = time.time() signal = vocoder.synthesize(mgc, batch_size=1000, temperature=params.temperature) stop = time.time() sys.stdout.write(" execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() from io_modules.dataset import DatasetIO dio = DatasetIO() enc = dio.b16_dec(signal, discreete=True) dio.write_wave(output_file, enc, params.target_sample_rate)
def extract_imgs_feat(): encoder = Encoder(opt.resnet101_file) encoder.to(opt.device) encoder.eval() imgs = os.listdir(opt.imgs_dir) imgs.sort() if not os.path.exists(opt.out_feats_dir): os.makedirs(opt.out_feats_dir) with h5py.File(os.path.join(opt.out_feats_dir, '%s_fc.h5' % opt.dataset_name)) as file_fc, \ h5py.File(os.path.join(opt.out_feats_dir, '%s_att.h5' % opt.dataset_name)) as file_att: try: for img_nm in tqdm.tqdm(imgs, ncols=100): img = skimage.io.imread(os.path.join(opt.imgs_dir, img_nm)) with torch.no_grad(): img = encoder.preprocess(img) img = img.to(opt.device) img_fc, img_att = encoder(img) file_fc.create_dataset(img_nm, data=img_fc.cpu().float().numpy()) file_att.create_dataset(img_nm, data=img_att.cpu().float().numpy()) except BaseException as e: file_fc.close() file_att.close() print( '--------------------------------------------------------------------' ) raise e
def main(model_filename, pitch_model_filename, output_dir, batch_size): model = torch.nn.Module() model.add_module('encoder', Encoder(**encoder_config)) model.add_module('generator', Generator(sum(encoder_config['n_out_channels']))) model = load_checkpoint(model_filename, model).cuda() model.eval() if os.path.isfile(pitch_model_filename): global pitch_model, use_predicted_pitch use_predicted_pitch = True pitch_model = PitchModel(**pitch_config) pitch_model = load_checkpoint(pitch_model_filename, pitch_model).cuda() pitch_model.eval() testset = TestSet(**(data_config)) cond, name = testset[0] for files in chunker(testset, batch_size): files = list(zip(*files)) cond_input, file_paths = files[:-1], files[-1] cond_input = [ utils.to_gpu(torch.from_numpy(np.stack(x))).float() for x in cond_input ] #cond_input = model.encoder(cond_input.transpose(1, 2)).transpose(1, 2) cond_input = model.encoder(cond_input[0]) audio = model.generator(cond_input) for i, file_path in enumerate(file_paths): print("writing {}".format(file_path)) wav = audio[i].cpu().squeeze().detach().numpy() * 32768.0 write("{}/{}.wav".format(output_dir, file_path), data_config['sampling_rate'], wav.astype(np.int16))
def test_train_method(self): file_name = 'test/test_data/attention_test.txt' fine_tune_model_name = '../models/glove_model_40.pth' self.test_data_loader_attention = DataLoaderAttention( file_name=file_name) self.test_data_loader_attention.load_data() source2index, index2source, target2index, index2target, train_data = \ self.test_data_loader_attention.load_data() EMBEDDING_SIZE = 50 HIDDEN_SIZE = 32 encoder = Encoder(len(source2index), EMBEDDING_SIZE, HIDDEN_SIZE, 3, True) decoder = Decoder(len(target2index), EMBEDDING_SIZE, HIDDEN_SIZE * 2) self.trainer = Trainer(fine_tune_model=fine_tune_model_name) self.trainer.train_attention( train_data=train_data, source2index=source2index, target2index=target2index, index2source=index2source, index2target=index2target, encoder_model=encoder, decoder_model=decoder, )
def detect(path, encoder=None, decoder=None): torch.backends.cudnn.benchmark = True dataset = LoadImages(path, img_size=config.IMAGE_SIZE, used_layers=config.USED_LAYERS) if not encoder or not decoder: in_channels = num_channels(config.USED_LAYERS) encoder = Encoder(in_channels=in_channels) decoder = Decoder(num_classes=config.NUM_CLASSES+1) encoder = encoder.to(config.DEVICE) decoder = decoder.to(config.DEVICE) _, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE) encoder.eval() decoder.eval() for _, layers, path in dataset: with torch.no_grad(): layers = torch.from_numpy(layers).to(config.DEVICE, non_blocking=True) if layers.ndimension() == 3: layers = layers.unsqueeze(0) features = encoder(layers) predictions = decoder(features) _, out = predictions, predictions.sigmoid() plot_volumes(to_volume(out, config.VOXEL_THRESH).cpu(), [path], config.NAMES)
def __init__(self, d_model, d_ff, d_K, d_V, n_heads, n_layers, sourceVocabSize, sourceLength, targetVocabSize, targetLength): super(Transformer, self).__init__() self.encoder = Encoder(sourceVocabSize, sourceLength, d_model, d_ff, d_K, d_V, n_heads, n_layers) self.decoder = Decoder(targetVocabSize, targetLength, d_model, d_ff, d_K, d_V, n_heads, n_layers) self.projection = nn.Linear(d_model, targetVocabSize, bias=False)
def __init__(self, num_classes, fixed_height = 48, net='efficientnet'): super(Model, self).__init__() self.encoder = Encoder(net = net) self.decoder = Decoder(input_dim=int(fixed_height * 288 / 8), num_class=num_classes) self.crnn = nn.Sequential( self.encoder, self.decoder ) self.log_softmax = nn.LogSoftmax(dim=2)
def __init__(self, que_dim: int, que_input_embs: list, que_output_embs: list, pro_dim: int, pro_input_embs: list, pro_output_embs: list, inter_dim: int, output_dim: int): super().__init__() self.que_model = Encoder(que_dim, inter_dim, output_dim, que_input_embs, que_output_embs) self.pro_model = Encoder(pro_dim, inter_dim, output_dim, pro_input_embs, pro_output_embs) self.merged = Concatenate()( [self.que_model.outputs[0], self.pro_model.outputs[0]]) self.inter = Dense(16, activation='tanh')(self.merged) self.outputs = Dense(1, activation='sigmoid')(self.inter) super().__init__([self.que_model.inputs[0], self.pro_model.inputs[0]], self.outputs)
def test_forward(self): encoder = Encoder(self.input_size, self.hidden_size, self.num_layers, bidirectional=self.bidirectional, rnn_type=self.rnn_type) output, hidden = encoder(self.padded_input, self.input_lengths) self.assertTrue(output.size(), torch.Size([self.N, self.T, self.hidden_size]))
def evaluate_hand_draw_net(cfg): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W eval_transforms = utils.data_transforms.Compose([ utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE), utils.data_transforms.RandomBackground(cfg.TEST.RANDOM_BG_COLOR_RANGE), utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN, std=cfg.DATASET.STD), utils.data_transforms.ToTensor(), ]) # Set up networks encoder = Encoder(cfg) decoder = Decoder(cfg) azi_classes, ele_classes = int(360 / cfg.CONST.BIN_SIZE), int( 180 / cfg.CONST.BIN_SIZE) view_estimater = ViewEstimater(cfg, azi_classes=azi_classes, ele_classes=ele_classes) if torch.cuda.is_available(): encoder = torch.nn.DataParallel(encoder).cuda() decoder = torch.nn.DataParallel(decoder).cuda() view_estimater = torch.nn.DataParallel(view_estimater).cuda() # Load weight # Load weight for encoder, decoder print('[INFO] %s Loading reconstruction weights from %s ...' % (dt.now(), cfg.EVALUATE_HAND_DRAW.RECONSTRUCTION_WEIGHTS)) rec_checkpoint = torch.load(cfg.EVALUATE_HAND_DRAW.RECONSTRUCTION_WEIGHTS) encoder.load_state_dict(rec_checkpoint['encoder_state_dict']) decoder.load_state_dict(rec_checkpoint['decoder_state_dict']) print('[INFO] Best reconstruction result at epoch %d ...' % rec_checkpoint['epoch_idx']) # Load weight for view estimater print('[INFO] %s Loading view estimation weights from %s ...' % (dt.now(), cfg.EVALUATE_HAND_DRAW.VIEW_ESTIMATION_WEIGHTS)) view_checkpoint = torch.load( cfg.EVALUATE_HAND_DRAW.VIEW_ESTIMATION_WEIGHTS) view_estimater.load_state_dict( view_checkpoint['view_estimator_state_dict']) print('[INFO] Best view estimation result at epoch %d ...' % view_checkpoint['epoch_idx']) for img_path in os.listdir(cfg.EVALUATE_HAND_DRAW.INPUT_IMAGE_FOLDER): eval_id = int(img_path[:-4]) input_img_path = os.path.join( cfg.EVALUATE_HAND_DRAW.INPUT_IMAGE_FOLDER, img_path) print(input_img_path) evaluate_hand_draw_img(cfg, encoder, decoder, view_estimater, input_img_path, eval_transforms, eval_id)
def __init__(self, h_dim, res_h_dim, n_res_layers, embedding_dim, n_dimension_changes): super(E2EEncoder, self).__init__() # encode image into continuous latent space self.encoder = Encoder(3, h_dim, n_res_layers, res_h_dim, n_dimension_changes) self.pre_quantization_conv = nn.Conv2d(h_dim, embedding_dim, kernel_size=1, stride=1)
def main(): parser = argparse.ArgumentParser(description="Training attention model") parser.add_argument( "-t", "--train_data", metavar="train_data", type=str, default='../data/processed/source_replay_twitter_data.txt', dest="train_data", help="set the training data ") parser.add_argument("-e", "--embedding_size", metavar="embedding_size", type=int, default=50, dest="embedding_size", help="set the embedding size ") parser.add_argument("-H", "--hidden_size", metavar="hidden_size", type=int, default=512, dest="hidden_size", help="set the hidden size ") parser.add_argument("-f", "--fine_tune_model_name", metavar="fine_tune_model_name", type=str, default='../models/glove_wiki/glove_model_40.pth', dest="fine_tune_model_name", help="set the fine tune model name ") args = parser.parse_args() data_loader_attention = DataLoaderAttention(file_name=args.train_data) data_loader_attention.load_data() source2index, index2source, target2index, index2target, train_data = \ data_loader_attention.load_data() EMBEDDING_SIZE = args.embedding_size HIDDEN_SIZE = args.hidden_size encoder = Encoder(len(source2index), EMBEDDING_SIZE, HIDDEN_SIZE, 3, True) decoder = Decoder(len(target2index), EMBEDDING_SIZE, HIDDEN_SIZE * 2) trainer = Trainer(epoch=600, batch_size=64, fine_tune_model=args.fine_tune_model_name) trainer.train_attention(train_data=train_data, source2index=source2index, target2index=target2index, index2source=index2source, index2target=index2target, encoder_model=encoder, decoder_model=decoder)
def __init__(self, faqdataset, embedding_layer, args): ''' INFObot/qbot Uses an encoder network for input sequences (questions, answers and history) and a decoder network for generating a response (question). ''' super(INFOBOT, self).__init__() self.args = args self.tagw2i = faqdataset.tagw2i self.tagi2w = faqdataset.tagi2w self.tagw2i['STOP'] = len(self.tagw2i) self.tagi2w.append('STOP') self.faqpool = faqdataset.faqlist self.faqnum = len(faqdataset) self.actiondim = len( self.tagw2i) #The last action is 'STOP guessing'/'provide faq' self.statedim = 300 #args.embedding_dim self.hidden_size = self.statedim self.state_encoder = Encoder(embedding_layer, args) self.policynet = DQN(self.state_encoder, self.statedim, self.actiondim) self.targetnet = DQN(self.state_encoder, self.statedim, self.actiondim) if args.sharing_encoder: self.faq_encoder = self.state_encoder # print('The faq embedding and state encoding are shared') else: self.faq_encoder = Encoder(embedding_layer, args) self.faqguessed = 0 #make a change here self.steps = 0 print('action size: {}'.format(self.actiondim)) print('Infobot initialized: {} {}'.format(self.faqnum, self.faqpool[1])) '''
def load_encoder(params, base_path='data/models'): from io_modules.dataset import Encodings from models.encoder import Encoder encodings = Encodings() encodings.load('%s/encoder.encodings' % base_path) encoder = Encoder(params, encodings, runtime=True) encoder.load('%s/rnn_encoder' % base_path) return encoder
def train(): torch.backends.cudnn.benchmark = True _, dataloader = create_dataloader(config.IMG_DIR + "/train", config.MESH_DIR + "/train", batch_size=config.BATCH_SIZE, used_layers=config.USED_LAYERS, img_size=config.IMAGE_SIZE, map_size=config.MAP_SIZE, augment=config.AUGMENT, workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, shuffle=True) in_channels = num_channels(config.USED_LAYERS) encoder = Encoder(in_channels=in_channels) decoder = Decoder(num_classes=config.NUM_CLASSES+1) encoder.apply(init_weights) decoder.apply(init_weights) encoder_solver = torch.optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()), lr=config.ENCODER_LEARNING_RATE, betas=config.BETAS) decoder_solver = torch.optim.Adam(decoder.parameters(), lr=config.DECODER_LEARNING_RATE, betas=config.BETAS) encoder_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(encoder_solver, milestones=config.ENCODER_LR_MILESTONES, gamma=config.GAMMA) decoder_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(decoder_solver, milestones=config.DECODER_LR_MILESTONES, gamma=config.GAMMA) encoder = encoder.to(config.DEVICE) decoder = decoder.to(config.DEVICE) loss_fn = LossFunction() init_epoch = 0 if config.CHECKPOINT_FILE and config.LOAD_MODEL: init_epoch, encoder, decoder = load_checkpoint(encoder, decoder, config.CHECKPOINT_FILE, config.DEVICE) output_dir = os.path.join(config.OUT_PATH, re.sub("[^0-9a-zA-Z]+", "-", dt.now().isoformat())) for epoch_idx in range(init_epoch, config.NUM_EPOCHS): encoder.train() decoder.train() train_one_epoch(encoder, decoder, dataloader, loss_fn, encoder_solver, decoder_solver, epoch_idx) encoder_lr_scheduler.step() decoder_lr_scheduler.step() if config.TEST: test(encoder, decoder) if config.SAVE_MODEL: save_checkpoint(epoch_idx, encoder, decoder, output_dir) if not config.TEST: test(encoder, decoder) if not config.SAVE_MODEL: save_checkpoint(config.NUM_EPOCHS - 1, encoder, decoder, output_dir)
def __init__(self, que_dim: int, que_input_embs: list, que_output_embs: list, pro_dim: int, pro_input_embs: list, pro_output_embs: list, inter_dim: int, output_dim: int): """ :param que_dim: dimension of question's raw feature vector :param que_input_embs: number of unique classes in question's categorical features :param que_output_embs: embedding dimensions of question's categorical features :param pro_dim: dimension of professional's raw feature vector :param pro_input_embs: number of unique classes in professional's categorical features :param pro_output_embs: embedding dimensions of professional's categorical features :param inter_dim: dimension of Encoder's intermediate layer :param output_dim: dimension of high-level feature vectors """ super().__init__() # build an Encoder model for questions self.que_model = Encoder(que_dim, inter_dim, output_dim, que_input_embs, que_output_embs, reg=2.0) # same for professionals self.pro_model = Encoder(pro_dim, inter_dim, output_dim, pro_input_embs, pro_output_embs, reg=0.2) # calculate distance between high-level feature vectors self.merged = Lambda( lambda x: tf.reduce_sum(tf.square(x[0] - x[1]), axis=-1))( [self.que_model.outputs[0], self.pro_model.outputs[0]]) # and apply activation - e^-x here, actually self.outputs = Lambda(lambda x: tf.reshape(tf.exp(-self.merged), (-1, 1)))(self.merged) super().__init__([self.que_model.inputs[0], self.pro_model.inputs[0]], self.outputs)
def phase_3_train_encoder(params): from io_modules.dataset import Dataset from io_modules.dataset import Encodings from models.encoder import Encoder from trainers.encoder import Trainer trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') encodings = Encodings() count = 0 if not params.resume: for train_file in trainset.files: count += 1 if count % 100 == 0: sys.stdout.write('\r' + str(count) + '/' + str(len(trainset.files)) + ' processed files') sys.stdout.flush() from io_modules.dataset import DatasetIO dio = DatasetIO() lab_list = dio.read_lab(train_file + ".lab") for entry in lab_list: encodings.update(entry) sys.stdout.write('\r' + str(count) + '/' + str(len(trainset.files)) + ' processed files\n') sys.stdout.write('Found ' + str(len(encodings.char2int)) + ' unique symbols, ' + str(len(encodings.context2int)) + ' unique features and ' + str(len(encodings.speaker2int)) + ' unique speakers\n') encodings.store('data/models/encoder.encodings') else: encodings.load('data/models/encoder.encodings') if params.resume: runtime = True # avoid ortonormal initialization else: runtime = False encoder = Encoder(params, encodings, runtime=runtime) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') encoder.load('data/models/rnn_encoder') if params.no_guided_attention: sys.stdout.write('Disabling guided attention\n') if params.no_bounds: sys.stdout.write( 'Using internal stopping condition for synthesis\n') trainer = Trainer(encoder, trainset, devset) trainer.start_training(10, 1000, params)
def __init__(self, vocabulary_size, sos_token, eos_token, pad_token, max_string_length=default_eda['string_max_length'], attention_size=default_attention['size'], embedding_size=default_embedding['size'], hidden_size=default_gru['hidden_size'], num_layers=default_gru['num_layers'], dropout=default_gru['dropout']): super().__init__() self.max_string_length = max_string_length self.attention_size = attention_size self.vocabulary_size = vocabulary_size self.encoder = Encoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout) self.decoder = Decoder(vocabulary_size, embedding_size, hidden_size, num_layers, dropout, attention_size, pad_token) self.sos_token = sos_token self.eos_token = eos_token
def __init__(self, config, device): super(MedicalFSS, self).__init__() self.config = config resize_dim = self.config['input_size'] self.encoded_h = int(resize_dim[0] / 2**self.config['n_pool']) self.encoded_w = int(resize_dim[1] / 2**self.config['n_pool']) self.encoder = Encoder(self.config['path']['init_path'], device) # .to(device) self.decoder = Decoder(input_res=(self.encoded_h, self.encoded_w), output_res=resize_dim).to(device) self.q_slice_n = self.config['q_slice'] self.ch = 256 # number of channels of embedding vector