def create_model(self, hid_dim, num_encoder_layer, num_decoder_layer, num_head, pf_dim, dropout): self.encoder = Encoder(self.vocab_size, hid_dim, num_encoder_layer, num_head, pf_dim, EncoderLayer, SelfAttention, PositionwiseFeedforward, dropout, self.device) self.decoder = dec = Decoder(self.vocab_size, hid_dim, num_decoder_layer, num_head, pf_dim, DecoderLayer, SelfAttention, PositionwiseFeedforward, dropout, self.device) self.model = Seq2Seq(self.encoder, self.decoder, self.pad_idx, self.device).to(self.device) return self.model
def __init__(self, args, embeddings, device="gpu"): super().__init__() self.dropout = args.dropout self.device = device self.embedding = nn.Embedding(embeddings.shape[0], embeddings.shape[1]) self.embedding.weight = nn.Parameter(torch.from_numpy(embeddings)) self.embedding.float() self.embedding.weight.requires_grad = True self.embedding.to(device) self.blocks = ModuleList([ ModuleDict({ 'encoder': Encoder( args, args.embedding_dim if i == 0 else args.embedding_dim + args.hidden_size), 'alignment': alignment[args.alignment]( args, args.embedding_dim + args.hidden_size if i == 0 else args.embedding_dim + args.hidden_size * 2), 'fusion': fusion[args.fusion]( args, args.embedding_dim + args.hidden_size if i == 0 else args.embedding_dim + args.hidden_size * 2), }) for i in range(args.blocks) ]) self.connection = connection[args.connection]() self.pooling = Pooling() self.prediction = prediction[args.prediction](args)
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): attn = MultiHeadedAttention(h, d_model, dropout) ff = PositionwiseFeedForward(d_model, d_ff, dropout) pe = PositionalEncoding(d_model, dropout) encoder_layer = EncoderLayer(d_model, copy(attn), copy(ff), dropout) encoder = Encoder(encoder_layer, N) decoder_layer = DecoderLayer(d_model, copy(attn), copy(attn), copy(ff), dropout) decoder = Decoder(decoder_layer, N) src_embed = nn.Sequential(Embedding(src_vocab, d_model), copy(pe)) tgt_embed = nn.Sequential(Embedding(tgt_vocab, d_model), copy(pe)) generator = Generator(d_model, tgt_vocab) model = EncoderDecoder(encoder, decoder, src_embed, tgt_embed, generator) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform(p) return model
def create_model(vocab_src, vocab_tgt, config): inference_model = InferenceModel(config) encoder = Encoder(config) attention = create_attention(config) decoder = Decoder(attention, vocab_tgt.size(), config) language_model = LanguageModel(vocab_src.size(), config) model = AEVNMT(vocab_src, vocab_tgt, inference_model, encoder, decoder, language_model, config) return model
def main(): parser = argparse.ArgumentParser( description= "[Crop] Crop out a landscape video and make it a virtical video.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-f", "-file", type=argparse.FileType("r"), help="[required]input target video file.", required=True) parser.add_argument( "-w", "-workdir", type=str, help="[required]Directory path where script saves tmp files.", required=True) parser.add_argument( "-a", "-average", type=int, default=Const.AVERAGE_FLAMES, help= "The number of frames to be averaged over in order to make the video smooth." ) args = parser.parse_args() video: Video = Video(args.f.name) detector: ActorDetector = ActorDetector(video) convolve: Convolve = Convolve(args.a) original_centers = [] center_x = video.width // 2 print("[Step. 1/4] Create Video Resources.") vr = VideoResource(video=video, baseDir=args.w).create() print("[Step. 2/4] Detect Actor.") for image_path in tqdm(vr.get_image_paths()): actor: Person = detector.get_actor(image_path) if actor is not None: original_centers.append(actor.center_x) center_x = actor.center_x else: original_centers.append(center_x) convolved_centers: list = convolve.calculate(np.array(original_centers)) zzz = list(zip(vr.get_image_paths(), original_centers, convolved_centers)) # TODO : 座標のファイル書き出し print("[Step. 3/4] Crop Actor.") cropper = Cropper(args.w, video) for image_path, _, center_position in tqdm(zzz): cropper.crop(image_path, center_position) print("[Step. 4/4] Create Croped Video.") Encoder(args.w, cropper.get_images_path(), vr.get_sound_path(), video.fps).encode()
def create_model(src_vocab_size, trg_vocab_size, hid_dim, num_encoder_layer, num_decoder_layer, num_head, pf_dim, dropout, pad_idx, device): encoder = Encoder(src_vocab_size, hid_dim, num_encoder_layer, num_head, pf_dim, EncoderLayer, SelfAttention, PositionwiseFeedforward, dropout, device) decoder = dec = Decoder(trg_vocab_size, hid_dim, num_decoder_layer, num_head, pf_dim, DecoderLayer, SelfAttention, PositionwiseFeedforward, dropout, device) model = Seq2Seq(encoder, decoder, pad_idx, device).to(device) return model
class ET_Net(nn.Module): """ET-Net: A Generic Edge-aTtention Guidance Network for Medical Image Segmentation """ def __init__(self): super().__init__() self.encoder = Encoder() self.decoder = Decoder() self.egm = EdgeGuidanceModule() self.wam = WeightedAggregationModule() def forward(self, x): enc_1, enc_2, enc_3, enc_4 = self.encoder(x) dec_1, dec_2, dec_3 = self.decoder(enc_1, enc_2, enc_3, enc_4) edge_pred, egm = self.egm(enc_1, enc_2) pred = self.wam(dec_1, dec_2, dec_3, egm) return edge_pred, pred def load_encoder_weight(self): # One could get the pretrained weights via PyTorch official. self.encoder.load_state_dict(torch.load(ARGS['encoder_weight']))
def __init__(self, embedding_dim, num_word_embeddings, num_char_embeddings, kernels, num_input_channels, num_output_channels, rnn_hidden_dim, hidden_dim, output_dim, num_layers, bidirectional, dropout_p, word_padding_idx, char_padding_idx): super(NewsModel, self).__init__() self.encoder = Encoder(embedding_dim, num_word_embeddings, num_char_embeddings, kernels, num_input_channels, num_output_channels, rnn_hidden_dim, num_layers, bidirectional, word_padding_idx, char_padding_idx) self.decoder = Decoder(rnn_hidden_dim, hidden_dim, output_dim, dropout_p)
def __init__(self, vocabulary, training): super(Model, self).__init__() self._training = training self._embedding = Embedding(vocabulary=vocabulary) self._encoder = Encoder(embedding=self._embedding, training=training) self._decoder = Decoder(embedding=self._embedding, training=training) if training: self._teacher_forcing = TeacherForcing() # TODO: Look at other possible loss functions self._loss = masked_nll_loss
def __init__(self, num_layers=4, d_model=512, num_heads=8, dff=2048, pe_max_len=8000, target_vocab_size=8000, rate=0.1, config=None, logger=None): super(Transformer, self).__init__() if config is not None: num_enc_layers = config.model.N_encoder if logger is not None: logger.info('config.model.N_encoder: ' + str(num_enc_layers)) num_dec_layers = config.model.N_decoder if logger is not None: logger.info('config.model.N_decoder: ' + str(num_dec_layers)) d_model = config.model.d_model if logger is not None: logger.info('config.model.d_model: ' + str(d_model)) num_heads = config.model.n_heads if logger is not None: logger.info('config.model.n_heads: ' + str(num_heads)) dff = config.model.d_ff if logger is not None: logger.info('config.model.d_ff: ' + str(dff)) pe_max_len = config.model.pe_max_len if logger is not None: logger.info('config.model.pe_max_len:' + str(pe_max_len)) target_vocab_size = config.model.vocab_size if logger is not None: logger.info('config.model.vocab_size:' + str(target_vocab_size)) rate = config.model.dropout if logger is not None: logger.info('config.model.dropout: ' + str(rate)) else: print('use default params') num_enc_layers = num_layers num_dec_layers = num_layers self.encoder = Encoder(num_enc_layers, d_model, num_heads, dff, pe_max_len, 'encoder', rate) self.decoder = Decoder(num_dec_layers, d_model, num_heads, dff, target_vocab_size, 'decoder', pe_max_len, rate)
def __init__(self, vocab_size, label_size, feature_dim, model_dim, filter_dim): super(DeepAttn, self).__init__() self.feature_dim = feature_dim self.model_dim = model_dim self.word_embed = nn.Embedding(vocab_size, feature_dim) self.pred_embed = nn.Embedding(2, feature_dim) self.position_embed = PositionEmbedding(model_dim, residual_dropout) self.encoder = Encoder(model_dim, filter_dim, layer_num) self.bias = torch.nn.Parameter(torch.zeros([model_dim]), requires_grad=True) self.project = Affine(model_dim, label_size) self.criterion = SmoothedCrossEntropyLoss(label_smoothing)
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target, rate=config.dropout_rate): super(Transformer, self).__init__() self.encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, pe_input, rate) self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, pe_target, rate) self.final_layer = tf.keras.layers.Dense(target_vocab_size)
def __init__(self, input_dim_encoder: int, hidden_dim_encoder: int, output_dim_encoder: int, dropout_p_encoder: float, output_dim_h_decoder: int, nb_classes: int, dropout_p_decoder: float, max_out_t_steps: int) \ -> None: """Baseline method for audio captioning with Clotho dataset. :param input_dim_encoder: Input dimensionality of the encoder. :type input_dim_encoder: int :param hidden_dim_encoder: Hidden dimensionality of the encoder. :type hidden_dim_encoder: int :param output_dim_encoder: Output dimensionality of the encoder. :type output_dim_encoder: int :param dropout_p_encoder: Encoder RNN dropout. :type dropout_p_encoder: float :param output_dim_h_decoder: Hidden output dimensionality of the decoder. :type output_dim_h_decoder: int :param nb_classes: Amount of output classes. :type nb_classes: int :param dropout_p_decoder: Decoder RNN dropout. :type dropout_p_decoder: float :param max_out_t_steps: Maximum output time-steps of the decoder. :type max_out_t_steps: int """ super().__init__() self.encoder: Module = Encoder( input_dim=input_dim_encoder, hidden_dim=hidden_dim_encoder, output_dim=output_dim_encoder, dropout_p=dropout_p_encoder) self.decoder: Module = AttentionDecoder( input_dim=output_dim_encoder * 2, output_dim=output_dim_h_decoder, nb_classes=nb_classes, dropout_p=dropout_p_decoder, max_out_t_steps=max_out_t_steps)
def _get_encoder(self, name): args = (hp.embedding_dimension, hp.encoder_dimension, hp.encoder_blocks, hp.encoder_kernel_size, hp.dropout) ln = 1 if not hp.multi_language else hp.language_number if name == "simple": return Encoder(*args) elif name == "separate": return MultiEncoder(hp.language_number, args) elif name == "shared": return ConditionalEncoder(hp.language_number, hp.input_language_embedding, args) elif name == "convolutional": return ConvolutionalEncoder(hp.embedding_dimension, hp.encoder_dimension, 0.05, ln) elif name == "generated": return GeneratedConvolutionalEncoder(hp.embedding_dimension, hp.encoder_dimension, 0.05, hp.generator_dim, hp.generator_bottleneck_dim, groups=ln)
img_size = config["img_size"] means = config["means"] std = config["stds"] channels = config["channel"] alpha = config["alpha"] csv_path = config["csv_path"] img_dir = config["image_dir"] output_dir = config["output_dir"] if not os.path.exists(output_dir): os.mkdir(output_dir) data = pd.read_csv(csv_path) paths = data["ImageId"].values paths = [os.path.join(img_dir, p) for p in paths] labels = data["TrueLabel"].values encoder = Encoder(channels, out_ch=2048) decoder = Decoder(2048, channels) encoder.load_state_dict(torch.load(config["encoder"], map_location="cpu")) decoder.load_state_dict(torch.load(config["decoder"], map_location="cpu")) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() x_adv = [] with torch.no_grad(): bar = tqdm.tqdm(paths) for path in bar:
def train(args): # initalize dataset with Timed('Loading dataset'): ds = tiny_words(max_text_length=hp.max_text_length, max_audio_length=hp.max_audio_length, max_dataset_size=args.data_size) # initialize model with Timed('Initializing model.'): encoder = Encoder(ds.lang.num_chars, hp.embedding_dim, hp.encoder_bank_k, hp.encoder_bank_ck, hp.encoder_proj_dims, hp.encoder_highway_layers, hp.encoder_highway_units, hp.encoder_gru_units, dropout=hp.dropout, use_cuda=hp.use_cuda) decoder = AttnDecoder(hp.max_text_length, hp.attn_gru_hidden_size, hp.n_mels, hp.rf, hp.decoder_gru_hidden_size, hp.decoder_gru_layers, dropout=hp.dropout, use_cuda=hp.use_cuda) postnet = PostNet(hp.n_mels, 1 + hp.n_fft // 2, hp.post_bank_k, hp.post_bank_ck, hp.post_proj_dims, hp.post_highway_layers, hp.post_highway_units, hp.post_gru_units, use_cuda=hp.use_cuda) if args.multi_gpus: all_devices = list(range(torch.cuda.device_count())) encoder = nn.DataParallel(encoder, device_ids=all_devices) decoder = nn.DataParallel(decoder, device_ids=all_devices) postnet = nn.DataParallel(postnet, device_ids=all_devices) if hp.use_cuda: encoder.cuda() decoder.cuda() postnet.cuda() # initialize optimizers and criterion all_paramters = (list(encoder.parameters()) + list(decoder.parameters()) + list(postnet.parameters())) optimizer = optim.Adam(all_paramters, lr=hp.lr) criterion = nn.L1Loss() # configuring traingin print_every = 100 save_every = 1000 # Keep track of time elapsed and running averages start = time.time() print_loss_total = 0 # Reset every print_every for epoch in range(1, hp.n_epochs + 1): # get training data for this cycle mels, mags, indexed_texts = ds.next_batch(hp.batch_size) mels_v = Variable(torch.from_numpy(mels).float()) mags_v = Variable(torch.from_numpy(mags).float()) texts_v = Variable(torch.from_numpy(indexed_texts)) if hp.use_cuda: mels_v = mels_v.cuda() mags_v = mags_v.cuda() texts_v = texts_v.cuda() loss = train_batch(mels_v, mags_v, texts_v, encoder, decoder, postnet, optimizer, criterion, multi_gpus=args.multi_gpus) # Keep track of loss print_loss_total += loss if epoch == 0: continue if epoch % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '%s (%d %d%%) %.4f' % \ (time_since(start, epoch / hp.n_epochs), epoch, epoch / hp.n_epochs * 100, print_loss_avg) print(print_summary) if epoch % save_every == 0: save_checkpoint({ 'epoch': epoch + 1, 'encoder': encoder.state_dict(), 'decoder': decoder.state_dict(), 'postnet': postnet.state_dict(), 'optimizer': optimizer.state_dict(), })
def __init__(self, input_channels, h_size, z_size): super(VAE, self).__init__() self.encoder = Encoder(input_channels, h_size, z_size) self.decoder = Decoder(input_channels, z_size)
def __init__( self, encoder_dict, encoder_padding_idx, encoder_emb_size, encoder_hid_size, encoder_bidirectional, encoder_rnn_cell_type, encoder_is_packed, encoder_batch_first, encoder_num_layers, encoder_dropout, decoder_dict, decoder_padding_idx, decoder_emb_size, decoder_hid_size, decoder_rnn_cell_type, decoder_num_layers, decoder_dropout, global_attention_type, generator_dim_lst, generator_num_layers ): super(GlobalAttentionSeq2Seq, self).__init__() self.name = 'GlobalAttentionSeq2Seq' self.encoder = Encoder( encoder_dict, encoder_padding_idx, encoder_emb_size, encoder_hid_size, encoder_bidirectional, encoder_rnn_cell_type, encoder_is_packed, encoder_batch_first, encoder_num_layers, encoder_dropout ) self.bridge = Bridge( encoder_bidirectional, encoder_num_layers, encoder_hid_size, encoder_rnn_cell_type, decoder_num_layers, decoder_hid_size, decoder_rnn_cell_type ) self.decoder = GlobalAttentiveDecoder( decoder_dict, decoder_padding_idx, decoder_emb_size, decoder_hid_size, decoder_rnn_cell_type, decoder_num_layers, decoder_dropout, encoder_hid_size, global_attention_type, encoder_bidirectional ) self.generator = Generator( decoder_dict.size(), decoder_hid_size, generator_dim_lst, generator_num_layers )
#paths P_TOKENS = Path('tokens').absolute() # paths that needs to be checked inside clipper P_PACKAGES = Path('packages').absolute() P_CLIPS = Path('clips').absolute() P_OUTPUTS = Path('outputs').absolute() P_VIDEOS_MEDIA = Path('videos').absolute() dirs_paths = list([P_PACKAGES, P_CLIPS, P_OUTPUTS, P_VIDEOS_MEDIA]) check_paths() modules = list() modules.append( Packager(ERROR_MESSAGES, P_PACKAGES, P_CLIPS, P_OUTPUTS, P_VIDEOS_MEDIA)) modules.append(Downloader(ERROR_MESSAGES, modules[0], P_TOKENS_FILE)) modules.append(Encoder(ERROR_MESSAGES, modules[0])) modules.append(Editor(ERROR_MESSAGES, modules[0], P_VIDEOS_MEDIA)) modules.append(Uploader(ERROR_MESSAGES, modules[0], P_TOKENS, P_VIDEOS_MEDIA)) modules.append(Tweeter(ERROR_MESSAGES, P_TOKENS_FILE, modules[0], P_TWEETS)) modules.append(Wrapper(ERROR_MESSAGES, modules, P_SCHEDULE)) modules.append(Helper()) user_input('clear', []) welcome() while True: user_inp = input('>> ') if user_inp: inp = user_inp.split(' ') command = inp[0]
class ModelGraph(object): def __init__(self, word_vocab=None, POS_vocab=None, NER_vocab=None, flags=None, mode='ce_train'): # mode can have the following values: # 'ce_train', # 'rl_train', # 'evaluate', # 'evaluate_bleu', # 'decode'. # it is different from mode in decoder which can be # 'ce_train', 'loss', 'greedy' or 'sample' self.mode = mode # is_training controls whether to use dropout is_training = True if mode in ('ce_train') else False self.flags = flags self.word_vocab = word_vocab # create placeholders self.create_placeholders() # create encoder self.encoder = Encoder(flags, self.passage_words, self.passage_POSs, self.passage_NERs, self.passage_lengths, self.answer_span, word_vocab=word_vocab, POS_vocab=POS_vocab, NER_vocab=NER_vocab) # encode the input instance self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.decoder_init_state = self.encoder.encode( is_training=is_training) max_passage_length = tf.shape(self.passage_words)[1] self.passage_mask = tf.sequence_mask(self.passage_lengths, max_passage_length, dtype=tf.float32) loss_weights = tf.sequence_mask( self.question_lengths, flags.max_question_len, dtype=tf.float32) # [batch_size, gen_steps] loss_weights_rl = tf.sequence_mask( self.question_lengths_rl, flags.max_question_len, dtype=tf.float32) # [batch_size, gen_steps] with tf.variable_scope("generator"): # create decoder self.decoder = Decoder(flags, word_vocab, self.rewards, is_training) if mode == 'decode': self.context_t_1 = tf.placeholder( tf.float32, [None, self.encoder_dim], name='context_t_1') # [batch_size, encoder_dim] self.coverage_t_1 = tf.placeholder( tf.float32, [None, None], name='coverage_t_1') # [batch_size, encoder_dim] self.word_t = tf.placeholder(tf.int32, [None], name='word_t') # [batch_size] (self.state_t, self.context_t, self.coverage_t, self.attn_dist_t, self.p_gen_t, self.ouput_t, self.topk_log_probs, self.topk_ids, self.greedy_prediction, self.multinomial_prediction) = self.decoder.decode( self.decoder_init_state, self.context_t_1, self.coverage_t_1, self.word_t, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask) # not buiding training op for this mode return elif mode == 'evaluate_bleu': _, _, self.greedy_words = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='greedy') # not buiding training op for this mode return elif mode in ('ce_train', 'evaluate'): self.accu, self.loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='ce_train') if mode == 'evaluate': # not buiding training op for evaluation return elif mode == 'rl_train': _, self.loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='loss') tf.get_variable_scope().reuse_variables() _, _, self.greedy_words = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, None, mode='greedy') elif mode == 'rl_ce_train': self.accu, self.ce_loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='ce_train') tf.get_variable_scope().reuse_variables() _, self.rl_loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs_rl, self.question_words_rl, loss_weights_rl, mode='loss') self.loss = BETA * self.ce_loss + self.rl_loss _, _, self.greedy_words = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, None, mode='greedy') # defining optimizer and train op optimizer = tf.train.AdagradOptimizer( learning_rate=flags.learning_rate) tvars = tf.trainable_variables() total_parameters = 0 for variable in tvars: shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print("Total number of parameters is equal: %s" % total_parameters) if flags.lambda_l2 > 0.0: l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1]) self.loss = self.loss + flags.lambda_l2 * l2_loss grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), flags.clip_value) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) ema = tf.train.ExponentialMovingAverage(decay=0.9999) with tf.control_dependencies([self.train_op]): self.train_op = ema.apply(tvars) with tf.variable_scope('backup_variables'): backup_vars = [ tf.get_variable(var.op.name, dtype=var.value().dtype, trainable=False, initializer=var.initialized_value()) for var in tvars ] save_backup_vars_op = tf.group( *(tf.assign(bck, var.read_value()) for var, bck in zip(tvars, backup_vars))) with tf.control_dependencies([save_backup_vars_op]): self.ema_to_vars_op = tf.group( *(tf.assign(var, ema.average(var).read_value()) for var in tvars)) self.restore_backup_vars_op = tf.group( *(tf.assign(var, bck.read_value()) for var, bck in zip(tvars, backup_vars))) def create_placeholders(self): # build placeholder for input passage/article self.passage_lengths = tf.placeholder(tf.int32, [None], name='passage_lengths') self.passage_words = tf.placeholder( tf.int32, [None, None], name="passage_words") # [batch_size, passage_len] self.passage_POSs = tf.placeholder( tf.int32, [None, None], name="passage_POSs") # [batch_size, passage_len] self.passage_NERs = tf.placeholder( tf.int32, [None, None], name="passage_NERs") # [batch_size, passage_len] # build placeholder for answer self.answer_span = tf.placeholder(tf.float32, [None, None], name="answer_span") # [batch_size] # build placeholder for question self.decoder_inputs = tf.placeholder( tf.int32, [None, self.flags.max_question_len], name="decoder_inputs") # [batch_size, gen_steps] self.question_words = tf.placeholder( tf.int32, [None, self.flags.max_question_len], name="question_words") # [batch_size, gen_steps] self.question_lengths = tf.placeholder( tf.int32, [None], name="question_lengths") # [batch_size] self.decoder_inputs_rl = tf.placeholder( tf.int32, [None, self.flags.max_question_len], name="decoder_inputs_rl") # [batch_size, gen_steps] self.question_words_rl = tf.placeholder( tf.int32, [None, self.flags.max_question_len], name="question_words_rl") # [batch_size, gen_steps] self.question_lengths_rl = tf.placeholder( tf.int32, [None], name="question_lengths_rl") # [batch_size] # build placeholder for reinforcement learning self.rewards = tf.placeholder(tf.float32, [None], name="rewards") def run_greedy(self, sess, batch): feed_dict = self.run_encoder(sess, batch, only_feed_dict=True) feed_dict[self.decoder_inputs] = batch.decoder_inputs return sess.run(self.greedy_words, feed_dict) def ce_train(self, sess, batch, only_eval=False): feed_dict = self.run_encoder(sess, batch, only_feed_dict=True) feed_dict[self.decoder_inputs] = batch.decoder_inputs feed_dict[self.question_words] = batch.question_words feed_dict[self.question_lengths] = batch.question_lengths if only_eval: return sess.run([self.accu, self.loss], feed_dict) else: return sess.run([self.train_op, self.loss], feed_dict)[1] def rl_train(self, sess, batch, with_ce): feed_dict = self.run_encoder(sess, batch, only_feed_dict=True) feed_dict[self.decoder_inputs] = batch.decoder_inputs greedy_outputs = sess.run(self.greedy_words, feed_dict) greedy_outputs = greedy_outputs.tolist() gold_output = batch.question_words.tolist() # baseline outputs by flipping coin flipp = 0.1 baseline_outputs = np.copy(batch.question_words) for i in range(batch.question_words.shape[0]): seq_len = min(self.flags.max_question_len, batch.question_lengths[i] - 1) # don't change stop token '</s>' for j in range(seq_len): if greedy_outputs[i][j] != 0 and random.random() < flipp: baseline_outputs[i, j] = greedy_outputs[i][j] baseline_outputs = baseline_outputs.tolist() rl_inputs = [] rl_outputs = [] rl_input_lengths = [] rewards = [] for i, (baseline_output, greedy_output) in enumerate( zip(baseline_outputs, greedy_outputs)): _, baseline_output_words = self.word_vocab.getLexical( baseline_output) greedy_output, greedy_output_words = self.word_vocab.getLexical( greedy_output) _, gold_output_words = self.word_vocab.getLexical(gold_output[i]) rl_inputs.append([int(batch.decoder_inputs[i, 0])] + greedy_output[:-1]) rl_outputs.append(greedy_output) rl_input_lengths.append(len(greedy_output)) baseline_output_words_list = baseline_output_words.split() greedy_output_words_list = greedy_output_words.split() gold_output_words_list = gold_output_words.split() if self.flags.reward_type == 'bleu': cc = SmoothingFunction() reward = sentence_bleu([gold_output_words_list], greedy_output_words_list, smoothing_function=cc.method3) baseline = sentence_bleu([gold_output_words_list], baseline_output_words_list, smoothing_function=cc.method3) rewards.append(reward - baseline) elif self.flags.reward_type == 'rouge': reward = rouge.rouge([gold_output_words], [greedy_output_words])["rouge_l/f_score"] baseline = rouge.rouge( [gold_output_words], [baseline_output_words])["rouge_l/f_score"] rewards.append(reward - baseline) else: raise ValueError("Reward type is not bleu or rouge!") rl_inputs = padding_utils.pad_2d_vals(rl_inputs, len(rl_inputs), self.flags.max_question_len) rl_outputs = padding_utils.pad_2d_vals(rl_outputs, len(rl_outputs), self.flags.max_question_len) rl_input_lengths = np.array(rl_input_lengths, dtype=np.int32) rewards = np.array(rewards, dtype=np.float32) #reward = rescale(reward) assert rl_inputs.shape == rl_outputs.shape feed_dict = self.run_encoder(sess, batch, only_feed_dict=True) feed_dict[self.rewards] = rewards if with_ce: feed_dict[self.decoder_inputs_rl] = rl_inputs feed_dict[self.question_words_rl] = rl_outputs feed_dict[self.question_lengths_rl] = rl_input_lengths feed_dict[self.decoder_inputs] = batch.decoder_inputs feed_dict[self.question_words] = batch.question_words feed_dict[self.question_lengths] = batch.question_lengths else: feed_dict[self.decoder_inputs] = rl_inputs feed_dict[self.question_words] = rl_outputs feed_dict[self.question_lengths] = rl_input_lengths _, loss = sess.run([self.train_op, self.loss], feed_dict) return loss def run_encoder(self, sess, batch, only_feed_dict=False): feed_dict = {} feed_dict[self.passage_lengths] = batch.sent1_length if self.flags.with_word: feed_dict[self.passage_words] = batch.sent1_word if self.flags.with_POS: feed_dict[self.passage_POSs] = batch.sent1_POS if self.flags.with_NER: feed_dict[self.passage_NERs] = batch.sent1_NER if self.flags.with_answer_span: feed_dict[self.answer_span] = batch.answer_span if only_feed_dict: return feed_dict return sess.run([ self.encoder_hidden_states, self.decoder_init_state, self.encoder_features, self.passage_words, self.passage_mask ], feed_dict)
def create_model(vocab_src, vocab_tgt, config): encoder = Encoder(config) attention = create_attention(config) decoder = Decoder(attention, vocab_tgt.size(), config) model = CondNMT(vocab_src, vocab_tgt, encoder, decoder, config) return model
# tsf.Resize((img_size, img_size)), # tsf.ToTensor(), # tsf.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # ]) # train_dataset = CIFAR10("./data", train=True, transform=train_transform, download=True) # test_dataset = CIFAR10("./data", train=False, transform=val_transform, download=False) #train_paths, val_paths, train_labels, val_labels = train_test_split(paths, labels, random_state=0, stratify=labels) train_paths, train_labels = get_paths("./cat_dog/train", img_suffix=".jpg") val_paths, val_labels = get_paths("./cat_dog/val", img_suffix='.jpg') train_dataset = PALMClassifyDataset(train_paths, train_labels, augmentation=True, img_size=img_size) test_dataset = PALMClassifyDataset(val_paths, val_labels, augmentation=False, img_size=img_size) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) encoder = Encoder(in_ch=channels, out_ch=2048) decoder = Decoder(in_ch=2048, out_ch=channels) classifier = ResNet(channels, n_layers=50, num_classes=num_classes) #classifier = resnet18(pretrained=False, num_classes=num_classes, zero_init_residual=False) # classifier.load_state_dict(torch.load("./best_classifier.pth", map_location="cpu")) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device) classifier.to(device) encoder_opt = opt.Adam(encoder.parameters(), lr=init_lr, weight_decay=5e-4) decoder_opt = opt.Adam(decoder.parameters(), lr=init_lr, weight_decay=5e-4) classifier_opt = opt.Adam(classifier.parameters(), lr=init_lr, weight_decay=5e-4)
def __init__(self, word_vocab=None, POS_vocab=None, NER_vocab=None, flags=None, mode='ce_train'): # mode can have the following values: # 'ce_train', # 'rl_train', # 'evaluate', # 'evaluate_bleu', # 'decode'. # it is different from mode in decoder which can be # 'ce_train', 'loss', 'greedy' or 'sample' self.mode = mode # is_training controls whether to use dropout is_training = True if mode in ('ce_train') else False self.flags = flags self.word_vocab = word_vocab # create placeholders self.create_placeholders() # create encoder self.encoder = Encoder(flags, self.passage_words, self.passage_POSs, self.passage_NERs, self.passage_lengths, self.answer_span, word_vocab=word_vocab, POS_vocab=POS_vocab, NER_vocab=NER_vocab) # encode the input instance self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.decoder_init_state = self.encoder.encode( is_training=is_training) max_passage_length = tf.shape(self.passage_words)[1] self.passage_mask = tf.sequence_mask(self.passage_lengths, max_passage_length, dtype=tf.float32) loss_weights = tf.sequence_mask( self.question_lengths, flags.max_question_len, dtype=tf.float32) # [batch_size, gen_steps] loss_weights_rl = tf.sequence_mask( self.question_lengths_rl, flags.max_question_len, dtype=tf.float32) # [batch_size, gen_steps] with tf.variable_scope("generator"): # create decoder self.decoder = Decoder(flags, word_vocab, self.rewards, is_training) if mode == 'decode': self.context_t_1 = tf.placeholder( tf.float32, [None, self.encoder_dim], name='context_t_1') # [batch_size, encoder_dim] self.coverage_t_1 = tf.placeholder( tf.float32, [None, None], name='coverage_t_1') # [batch_size, encoder_dim] self.word_t = tf.placeholder(tf.int32, [None], name='word_t') # [batch_size] (self.state_t, self.context_t, self.coverage_t, self.attn_dist_t, self.p_gen_t, self.ouput_t, self.topk_log_probs, self.topk_ids, self.greedy_prediction, self.multinomial_prediction) = self.decoder.decode( self.decoder_init_state, self.context_t_1, self.coverage_t_1, self.word_t, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask) # not buiding training op for this mode return elif mode == 'evaluate_bleu': _, _, self.greedy_words = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='greedy') # not buiding training op for this mode return elif mode in ('ce_train', 'evaluate'): self.accu, self.loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='ce_train') if mode == 'evaluate': # not buiding training op for evaluation return elif mode == 'rl_train': _, self.loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='loss') tf.get_variable_scope().reuse_variables() _, _, self.greedy_words = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, None, mode='greedy') elif mode == 'rl_ce_train': self.accu, self.ce_loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, loss_weights, mode='ce_train') tf.get_variable_scope().reuse_variables() _, self.rl_loss, _ = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs_rl, self.question_words_rl, loss_weights_rl, mode='loss') self.loss = BETA * self.ce_loss + self.rl_loss _, _, self.greedy_words = self.decoder.train( self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.passage_words, self.passage_mask, self.decoder_init_state, self.decoder_inputs, self.question_words, None, mode='greedy') # defining optimizer and train op optimizer = tf.train.AdagradOptimizer( learning_rate=flags.learning_rate) tvars = tf.trainable_variables() total_parameters = 0 for variable in tvars: shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print("Total number of parameters is equal: %s" % total_parameters) if flags.lambda_l2 > 0.0: l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1]) self.loss = self.loss + flags.lambda_l2 * l2_loss grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), flags.clip_value) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) ema = tf.train.ExponentialMovingAverage(decay=0.9999) with tf.control_dependencies([self.train_op]): self.train_op = ema.apply(tvars) with tf.variable_scope('backup_variables'): backup_vars = [ tf.get_variable(var.op.name, dtype=var.value().dtype, trainable=False, initializer=var.initialized_value()) for var in tvars ] save_backup_vars_op = tf.group( *(tf.assign(bck, var.read_value()) for var, bck in zip(tvars, backup_vars))) with tf.control_dependencies([save_backup_vars_op]): self.ema_to_vars_op = tf.group( *(tf.assign(var, ema.average(var).read_value()) for var in tvars)) self.restore_backup_vars_op = tf.group( *(tf.assign(var, bck.read_value()) for var, bck in zip(tvars, backup_vars)))
def test_hoge(self): baseDir = "/tmp/test" video = Video("10.mp4") vr = VideoResource(video, baseDir) Encoder(baseDir, vr.get_image_path(), vr.get_sound_path(), video.fps).encode()
# 参数(模型参数+数据参数) args = arg_conf() print('GPU available:', torch.cuda.is_available()) print('CuDNN available', torch.backends.cudnn.enabled) print('GPU number: ', torch.cuda.device_count()) if torch.cuda.is_available() and args.cuda >= 0: args.device = torch.device('cuda', args.cuda) else: args.device = torch.device('cpu') # 词表 wdvocab = create_vocab(opts['data']['train_path']) embedding_weights = wdvocab.get_embedding_weights( opts['data']['embedding_weights']) # wdvocab.save(opts['vocab']['save_vocab']) # 模型 args.label_size = wdvocab.label_size args.pad = wdvocab.PAD # Transformer Encoder文本分类模型 trans_encoder = Encoder(args, embedding_weights).to(args.device) classifier = Classifier(trans_encoder, args, wdvocab) classifier.summary() # 训练 classifier.train(train_data, dev_data) # 评估 classifier.evaluate(test_data)
def run_model(mode, path, in_file, o_file): global feature, encoder, indp, crf, mldecoder, rltrain, f_opt, e_opt, i_opt, c_opt, m_opt, r_opt cfg = Configuration() #General mode has two values: 'train' or 'test' cfg.mode = mode #Set Random Seeds random.seed(cfg.seed) np.random.seed(cfg.seed) torch.manual_seed(cfg.seed) if hasCuda: torch.cuda.manual_seed_all(cfg.seed) #Load Embeddings load_embeddings(cfg) #Only for testing if mode == 'test': cfg.test_raw = in_file #Construct models feature = Feature(cfg) if cfg.model_type == 'AC-RNN': f_opt = optim.SGD(ifilter(lambda p: p.requires_grad, feature.parameters()), lr=cfg.actor_step_size) else: f_opt = optim.Adam(ifilter(lambda p: p.requires_grad, feature.parameters()), lr=cfg.learning_rate) if hasCuda: feature.cuda() encoder = Encoder(cfg) if cfg.model_type == 'AC-RNN': e_opt = optim.SGD(ifilter(lambda p: p.requires_grad, encoder.parameters()), lr=cfg.actor_step_size) else: e_opt = optim.Adam(ifilter(lambda p: p.requires_grad, encoder.parameters()), lr=cfg.learning_rate) if hasCuda: encoder.cuda() if cfg.model_type == 'INDP': indp = INDP(cfg) i_opt = optim.Adam(ifilter(lambda p: p.requires_grad, indp.parameters()), lr=cfg.learning_rate) if hasCuda: indp.cuda() elif cfg.model_type == 'CRF': crf = CRF(cfg) c_opt = optim.Adam(ifilter(lambda p: p.requires_grad, crf.parameters()), lr=cfg.learning_rate) if hasCuda: crf.cuda() elif cfg.model_type == 'TF-RNN': mldecoder = MLDecoder(cfg) m_opt = optim.Adam(ifilter(lambda p: p.requires_grad, mldecoder.parameters()), lr=cfg.learning_rate) if hasCuda: mldecoder.cuda() cfg.mldecoder_type = 'TF' elif cfg.model_type == 'SS-RNN': mldecoder = MLDecoder(cfg) m_opt = optim.Adam(ifilter(lambda p: p.requires_grad, mldecoder.parameters()), lr=cfg.learning_rate) if hasCuda: mldecoder.cuda() cfg.mldecoder_type = 'SS' elif cfg.model_type == 'AC-RNN': mldecoder = MLDecoder(cfg) m_opt = optim.SGD(ifilter(lambda p: p.requires_grad, mldecoder.parameters()), lr=cfg.actor_step_size) if hasCuda: mldecoder.cuda() cfg.mldecoder_type = 'TF' rltrain = RLTrain(cfg) r_opt = optim.Adam(ifilter(lambda p: p.requires_grad, rltrain.parameters()), lr=cfg.learning_rate, weight_decay=0.001) if hasCuda: rltrain.cuda() cfg.rltrain_type = 'AC' #For RL, the network should be pre-trained with teacher forced ML decoder. feature.load_state_dict(torch.load(path + 'TF-RNN' + '_feature')) encoder.load_state_dict(torch.load(path + 'TF-RNN' + '_encoder')) mldecoder.load_state_dict(torch.load(path + 'TF-RNN' + '_predictor')) if mode == 'train': o_file = './temp.predicted_' + cfg.model_type best_val_cost = float('inf') best_val_epoch = 0 first_start = time.time() epoch = 0 while (epoch < cfg.max_epochs): print print 'Model:{} | Epoch:{}'.format(cfg.model_type, epoch) if cfg.model_type == 'SS-RNN': #Specify the decaying schedule for sampling probability. #inverse sigmoid schedule: cfg.sampling_p = float( cfg.k) / float(cfg.k + np.exp(float(epoch) / cfg.k)) start = time.time() run_epoch(cfg) print '\nValidation:' predict(cfg, o_file) val_cost = 100 - evaluate(cfg, cfg.dev_ref, o_file) print 'Validation score:{}'.format(100 - val_cost) if val_cost < best_val_cost: best_val_cost = val_cost best_val_epoch = epoch torch.save(feature.state_dict(), path + cfg.model_type + '_feature') torch.save(encoder.state_dict(), path + cfg.model_type + '_encoder') if cfg.model_type == 'INDP': torch.save(indp.state_dict(), path + cfg.model_type + '_predictor') elif cfg.model_type == 'CRF': torch.save(crf.state_dict(), path + cfg.model_type + '_predictor') elif cfg.model_type == 'TF-RNN' or cfg.model_type == 'SS-RNN': torch.save(mldecoder.state_dict(), path + cfg.model_type + '_predictor') elif cfg.model_type == 'AC-RNN': torch.save(mldecoder.state_dict(), path + cfg.model_type + '_predictor') torch.save(rltrain.state_dict(), path + cfg.model_type + '_critic') #For early stopping if epoch - best_val_epoch > cfg.early_stopping: break ### print 'Epoch training time:{} seconds'.format(time.time() - start) epoch += 1 print 'Total training time:{} seconds'.format(time.time() - first_start) elif mode == 'test': cfg.batch_size = 256 feature.load_state_dict(torch.load(path + cfg.model_type + '_feature')) encoder.load_state_dict(torch.load(path + cfg.model_type + '_encoder')) if cfg.model_type == 'INDP': indp.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) elif cfg.model_type == 'CRF': crf.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) elif cfg.model_type == 'TF-RNN' or cfg.model_type == 'SS-RNN': mldecoder.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) elif cfg.model_type == 'AC-RNN': mldecoder.load_state_dict( torch.load(path + cfg.model_type + '_predictor')) rltrain.load_state_dict( torch.load(path + cfg.model_type + '_critic')) print print 'Model:{} Predicting'.format(cfg.model_type) start = time.time() predict(cfg, o_file) print 'Total prediction time:{} seconds'.format(time.time() - start) return
def inference(checkpoint_file, text): ds = tiny_words(max_text_length=hp.max_text_length, max_audio_length=hp.max_audio_length, max_dataset_size=args.data_size) print(ds.texts) # prepare input indexes = indexes_from_text(ds.lang, text) indexes.append(EOT_token) padded_indexes = pad_indexes(indexes, hp.max_text_length, PAD_token) texts_v = Variable(torch.from_numpy(padded_indexes)) texts_v = texts_v.unsqueeze(0) if hp.use_cuda: texts_v = texts_v.cuda() encoder = Encoder(ds.lang.num_chars, hp.embedding_dim, hp.encoder_bank_k, hp.encoder_bank_ck, hp.encoder_proj_dims, hp.encoder_highway_layers, hp.encoder_highway_units, hp.encoder_gru_units, dropout=hp.dropout, use_cuda=hp.use_cuda) decoder = AttnDecoder(hp.max_text_length, hp.attn_gru_hidden_size, hp.n_mels, hp.rf, hp.decoder_gru_hidden_size, hp.decoder_gru_layers, dropout=hp.dropout, use_cuda=hp.use_cuda) postnet = PostNet(hp.n_mels, 1 + hp.n_fft // 2, hp.post_bank_k, hp.post_bank_ck, hp.post_proj_dims, hp.post_highway_layers, hp.post_highway_units, hp.post_gru_units, use_cuda=hp.use_cuda) encoder.eval() decoder.eval() postnet.eval() if hp.use_cuda: encoder.cuda() decoder.cuda() postnet.cuda() # load model checkpoint = torch.load(checkpoint_file) encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder']) postnet.load_state_dict(checkpoint['postnet']) encoder_out = encoder(texts_v) # Prepare input and output variables GO_frame = np.zeros((1, hp.n_mels)) decoder_in = Variable(torch.from_numpy(GO_frame).float()) if hp.use_cuda: decoder_in = decoder_in.cuda() h, hs = decoder.init_hiddens(1) decoder_outs = [] for t in range(int(hp.max_audio_length / hp.rf)): decoder_out, h, hs, _ = decoder(decoder_in, h, hs, encoder_out) decoder_outs.append(decoder_out) # use predict decoder_in = decoder_out[:, -1, :].contiguous() # (batch_size, T, n_mels) decoder_outs = torch.cat(decoder_outs, 1) # postnet post_out = postnet(decoder_outs) s = post_out[0].cpu().data.numpy() print("Recontructing wav...") s = np.where(s < 0, 0, s) wav = spectrogram2wav(s**hp.power) # wav = griffinlim(s**hp.power) write("demo.wav", hp.sr, wav)
def __init__(self): super().__init__() self.encoder = Encoder() self.decoder = Decoder() self.egm = EdgeGuidanceModule() self.wam = WeightedAggregationModule()