Exemplo n.º 1
0
 def create_model(self, hid_dim, num_encoder_layer, num_decoder_layer, num_head, pf_dim, dropout):
     self.encoder = Encoder(self.vocab_size, hid_dim, num_encoder_layer, num_head, pf_dim,
                            EncoderLayer, SelfAttention, PositionwiseFeedforward, dropout, self.device)
     self.decoder = dec = Decoder(self.vocab_size, hid_dim, num_decoder_layer, num_head, pf_dim,
                                  DecoderLayer, SelfAttention, PositionwiseFeedforward, dropout, self.device)
     self.model = Seq2Seq(self.encoder, self.decoder, self.pad_idx, self.device).to(self.device)
     return self.model
Exemplo n.º 2
0
 def __init__(self, args, embeddings, device="gpu"):
     super().__init__()
     self.dropout = args.dropout
     self.device = device
     self.embedding = nn.Embedding(embeddings.shape[0], embeddings.shape[1])
     self.embedding.weight = nn.Parameter(torch.from_numpy(embeddings))
     self.embedding.float()
     self.embedding.weight.requires_grad = True
     self.embedding.to(device)
     self.blocks = ModuleList([
         ModuleDict({
             'encoder':
             Encoder(
                 args,
                 args.embedding_dim if i == 0 else args.embedding_dim +
                 args.hidden_size),
             'alignment':
             alignment[args.alignment](
                 args, args.embedding_dim +
                 args.hidden_size if i == 0 else args.embedding_dim +
                 args.hidden_size * 2),
             'fusion':
             fusion[args.fusion](
                 args, args.embedding_dim +
                 args.hidden_size if i == 0 else args.embedding_dim +
                 args.hidden_size * 2),
         }) for i in range(args.blocks)
     ])
     self.connection = connection[args.connection]()
     self.pooling = Pooling()
     self.prediction = prediction[args.prediction](args)
Exemplo n.º 3
0
def make_model(src_vocab, 
               tgt_vocab, 
               N=6, 
               d_model=512, 
               d_ff=2048, 
               h=8, 
               dropout=0.1):
    attn = MultiHeadedAttention(h, d_model, dropout)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    pe = PositionalEncoding(d_model, dropout)

    encoder_layer = EncoderLayer(d_model, copy(attn), copy(ff), dropout)
    encoder = Encoder(encoder_layer, N)

    decoder_layer = DecoderLayer(d_model, copy(attn), copy(attn), copy(ff), dropout)
    decoder = Decoder(decoder_layer, N)

    src_embed = nn.Sequential(Embedding(src_vocab, d_model), copy(pe))
    tgt_embed = nn.Sequential(Embedding(tgt_vocab, d_model), copy(pe))

    generator = Generator(d_model, tgt_vocab)

    model = EncoderDecoder(encoder, decoder, src_embed, tgt_embed, generator)

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Exemplo n.º 4
0
def create_model(vocab_src, vocab_tgt, config):
    inference_model = InferenceModel(config)
    encoder = Encoder(config)
    attention = create_attention(config)
    decoder = Decoder(attention, vocab_tgt.size(), config)
    language_model = LanguageModel(vocab_src.size(), config)
    model = AEVNMT(vocab_src, vocab_tgt, inference_model, encoder, decoder,
                   language_model, config)
    return model
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "[Crop] Crop out a landscape video and make it a virtical video.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-f",
                        "-file",
                        type=argparse.FileType("r"),
                        help="[required]input target video file.",
                        required=True)
    parser.add_argument(
        "-w",
        "-workdir",
        type=str,
        help="[required]Directory path where script saves tmp files.",
        required=True)
    parser.add_argument(
        "-a",
        "-average",
        type=int,
        default=Const.AVERAGE_FLAMES,
        help=
        "The number of frames to be averaged over in order to make the video smooth."
    )
    args = parser.parse_args()

    video: Video = Video(args.f.name)
    detector: ActorDetector = ActorDetector(video)
    convolve: Convolve = Convolve(args.a)
    original_centers = []
    center_x = video.width // 2

    print("[Step. 1/4] Create Video Resources.")
    vr = VideoResource(video=video, baseDir=args.w).create()

    print("[Step. 2/4] Detect Actor.")
    for image_path in tqdm(vr.get_image_paths()):
        actor: Person = detector.get_actor(image_path)
        if actor is not None:
            original_centers.append(actor.center_x)
            center_x = actor.center_x
        else:
            original_centers.append(center_x)

    convolved_centers: list = convolve.calculate(np.array(original_centers))
    zzz = list(zip(vr.get_image_paths(), original_centers, convolved_centers))
    # TODO : 座標のファイル書き出し

    print("[Step. 3/4] Crop Actor.")
    cropper = Cropper(args.w, video)
    for image_path, _, center_position in tqdm(zzz):
        cropper.crop(image_path, center_position)

    print("[Step. 4/4] Create Croped Video.")
    Encoder(args.w, cropper.get_images_path(), vr.get_sound_path(),
            video.fps).encode()
def create_model(src_vocab_size, trg_vocab_size, hid_dim, num_encoder_layer,
                 num_decoder_layer, num_head, pf_dim, dropout, pad_idx,
                 device):
    encoder = Encoder(src_vocab_size, hid_dim, num_encoder_layer, num_head,
                      pf_dim, EncoderLayer, SelfAttention,
                      PositionwiseFeedforward, dropout, device)
    decoder = dec = Decoder(trg_vocab_size, hid_dim, num_decoder_layer,
                            num_head, pf_dim, DecoderLayer, SelfAttention,
                            PositionwiseFeedforward, dropout, device)
    model = Seq2Seq(encoder, decoder, pad_idx, device).to(device)
    return model
Exemplo n.º 7
0
class ET_Net(nn.Module):
    """ET-Net: A Generic Edge-aTtention Guidance Network for Medical Image Segmentation
    """
    def __init__(self):
        super().__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
        self.egm = EdgeGuidanceModule()
        self.wam = WeightedAggregationModule()

    def forward(self, x):
        enc_1, enc_2, enc_3, enc_4 = self.encoder(x)
        dec_1, dec_2, dec_3 = self.decoder(enc_1, enc_2, enc_3, enc_4)
        edge_pred, egm = self.egm(enc_1, enc_2)
        pred = self.wam(dec_1, dec_2, dec_3, egm)
        return edge_pred, pred

    def load_encoder_weight(self):
        # One could get the pretrained weights via PyTorch official.
        self.encoder.load_state_dict(torch.load(ARGS['encoder_weight']))
Exemplo n.º 8
0
 def __init__(self, embedding_dim, num_word_embeddings, num_char_embeddings,
              kernels, num_input_channels, num_output_channels,
              rnn_hidden_dim, hidden_dim, output_dim, num_layers,
              bidirectional, dropout_p, word_padding_idx, char_padding_idx):
     super(NewsModel, self).__init__()
     self.encoder = Encoder(embedding_dim, num_word_embeddings,
                            num_char_embeddings, kernels,
                            num_input_channels, num_output_channels,
                            rnn_hidden_dim, num_layers, bidirectional,
                            word_padding_idx, char_padding_idx)
     self.decoder = Decoder(rnn_hidden_dim, hidden_dim, output_dim,
                            dropout_p)
Exemplo n.º 9
0
    def __init__(self, vocabulary, training):
        super(Model, self).__init__()

        self._training = training
        self._embedding = Embedding(vocabulary=vocabulary)
        self._encoder = Encoder(embedding=self._embedding, training=training)
        self._decoder = Decoder(embedding=self._embedding, training=training)

        if training:
            self._teacher_forcing = TeacherForcing()
            # TODO: Look at other possible loss functions
            self._loss = masked_nll_loss
Exemplo n.º 10
0
    def __init__(self,
                 num_layers=4,
                 d_model=512,
                 num_heads=8,
                 dff=2048,
                 pe_max_len=8000,
                 target_vocab_size=8000,
                 rate=0.1,
                 config=None,
                 logger=None):
        super(Transformer, self).__init__()

        if config is not None:
            num_enc_layers = config.model.N_encoder
            if logger is not None:
                logger.info('config.model.N_encoder: ' + str(num_enc_layers))
            num_dec_layers = config.model.N_decoder
            if logger is not None:
                logger.info('config.model.N_decoder: ' + str(num_dec_layers))
            d_model = config.model.d_model
            if logger is not None:
                logger.info('config.model.d_model:   ' + str(d_model))
            num_heads = config.model.n_heads
            if logger is not None:
                logger.info('config.model.n_heads:   ' + str(num_heads))
            dff = config.model.d_ff
            if logger is not None:
                logger.info('config.model.d_ff:      ' + str(dff))
            pe_max_len = config.model.pe_max_len
            if logger is not None:
                logger.info('config.model.pe_max_len:' + str(pe_max_len))
            target_vocab_size = config.model.vocab_size
            if logger is not None:
                logger.info('config.model.vocab_size:' +
                            str(target_vocab_size))
            rate = config.model.dropout
            if logger is not None:
                logger.info('config.model.dropout:   ' + str(rate))
        else:
            print('use default params')
            num_enc_layers = num_layers
            num_dec_layers = num_layers

        self.encoder = Encoder(num_enc_layers, d_model, num_heads, dff,
                               pe_max_len, 'encoder', rate)

        self.decoder = Decoder(num_dec_layers, d_model, num_heads, dff,
                               target_vocab_size, 'decoder', pe_max_len, rate)
Exemplo n.º 11
0
    def __init__(self, vocab_size, label_size, feature_dim, model_dim,
                 filter_dim):
        super(DeepAttn, self).__init__()
        self.feature_dim = feature_dim
        self.model_dim = model_dim

        self.word_embed = nn.Embedding(vocab_size, feature_dim)
        self.pred_embed = nn.Embedding(2, feature_dim)
        self.position_embed = PositionEmbedding(model_dim, residual_dropout)

        self.encoder = Encoder(model_dim, filter_dim, layer_num)

        self.bias = torch.nn.Parameter(torch.zeros([model_dim]),
                                       requires_grad=True)
        self.project = Affine(model_dim, label_size)

        self.criterion = SmoothedCrossEntropyLoss(label_smoothing)
Exemplo n.º 12
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 input_vocab_size,
                 target_vocab_size,
                 pe_input,
                 pe_target,
                 rate=config.dropout_rate):
        super(Transformer, self).__init__()

        self.encoder = Encoder(num_layers, d_model, num_heads, dff,
                               input_vocab_size, pe_input, rate)
        self.decoder = Decoder(num_layers, d_model, num_heads, dff,
                               target_vocab_size, pe_target, rate)
        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
    def __init__(self,
                 input_dim_encoder: int,
                 hidden_dim_encoder: int,
                 output_dim_encoder: int,
                 dropout_p_encoder: float,
                 output_dim_h_decoder: int,
                 nb_classes: int,
                 dropout_p_decoder: float,
                 max_out_t_steps: int) \
            -> None:
        """Baseline method for audio captioning with Clotho dataset.

        :param input_dim_encoder: Input dimensionality of the encoder.
        :type input_dim_encoder: int
        :param hidden_dim_encoder: Hidden dimensionality of the encoder.
        :type hidden_dim_encoder: int
        :param output_dim_encoder: Output dimensionality of the encoder.
        :type output_dim_encoder: int
        :param dropout_p_encoder: Encoder RNN dropout.
        :type dropout_p_encoder: float
        :param output_dim_h_decoder: Hidden output dimensionality of the decoder.
        :type output_dim_h_decoder: int
        :param nb_classes: Amount of output classes.
        :type nb_classes: int
        :param dropout_p_decoder: Decoder RNN dropout.
        :type dropout_p_decoder: float
        :param max_out_t_steps: Maximum output time-steps of the decoder.
        :type max_out_t_steps: int
        """
        super().__init__()

        self.encoder: Module = Encoder(
            input_dim=input_dim_encoder,
            hidden_dim=hidden_dim_encoder,
            output_dim=output_dim_encoder,
            dropout_p=dropout_p_encoder)

        self.decoder: Module = AttentionDecoder(
            input_dim=output_dim_encoder * 2,
            output_dim=output_dim_h_decoder,
            nb_classes=nb_classes,
            dropout_p=dropout_p_decoder,
            max_out_t_steps=max_out_t_steps)
Exemplo n.º 14
0
 def _get_encoder(self, name):
     args = (hp.embedding_dimension, hp.encoder_dimension,
             hp.encoder_blocks, hp.encoder_kernel_size, hp.dropout)
     ln = 1 if not hp.multi_language else hp.language_number
     if name == "simple":
         return Encoder(*args)
     elif name == "separate":
         return MultiEncoder(hp.language_number, args)
     elif name == "shared":
         return ConditionalEncoder(hp.language_number,
                                   hp.input_language_embedding, args)
     elif name == "convolutional":
         return ConvolutionalEncoder(hp.embedding_dimension,
                                     hp.encoder_dimension, 0.05, ln)
     elif name == "generated":
         return GeneratedConvolutionalEncoder(hp.embedding_dimension,
                                              hp.encoder_dimension,
                                              0.05,
                                              hp.generator_dim,
                                              hp.generator_bottleneck_dim,
                                              groups=ln)
Exemplo n.º 15
0
img_size = config["img_size"]
means = config["means"]
std = config["stds"]
channels = config["channel"]
alpha = config["alpha"]
csv_path = config["csv_path"]
img_dir = config["image_dir"]
output_dir = config["output_dir"]
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
data = pd.read_csv(csv_path)
paths = data["ImageId"].values
paths = [os.path.join(img_dir, p) for p in paths]
labels = data["TrueLabel"].values

encoder = Encoder(channels, out_ch=2048)
decoder = Decoder(2048, channels)

encoder.load_state_dict(torch.load(config["encoder"], map_location="cpu"))
decoder.load_state_dict(torch.load(config["decoder"], map_location="cpu"))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder.to(device)
decoder.to(device)

encoder.eval()
decoder.eval()
x_adv = []
with torch.no_grad():
    bar = tqdm.tqdm(paths)
    for path in bar:
Exemplo n.º 16
0
def train(args):
    # initalize dataset
    with Timed('Loading dataset'):
        ds = tiny_words(max_text_length=hp.max_text_length,
                        max_audio_length=hp.max_audio_length,
                        max_dataset_size=args.data_size)

    # initialize model
    with Timed('Initializing model.'):
        encoder = Encoder(ds.lang.num_chars,
                          hp.embedding_dim,
                          hp.encoder_bank_k,
                          hp.encoder_bank_ck,
                          hp.encoder_proj_dims,
                          hp.encoder_highway_layers,
                          hp.encoder_highway_units,
                          hp.encoder_gru_units,
                          dropout=hp.dropout,
                          use_cuda=hp.use_cuda)

        decoder = AttnDecoder(hp.max_text_length,
                              hp.attn_gru_hidden_size,
                              hp.n_mels,
                              hp.rf,
                              hp.decoder_gru_hidden_size,
                              hp.decoder_gru_layers,
                              dropout=hp.dropout,
                              use_cuda=hp.use_cuda)

        postnet = PostNet(hp.n_mels,
                          1 + hp.n_fft // 2,
                          hp.post_bank_k,
                          hp.post_bank_ck,
                          hp.post_proj_dims,
                          hp.post_highway_layers,
                          hp.post_highway_units,
                          hp.post_gru_units,
                          use_cuda=hp.use_cuda)

        if args.multi_gpus:
            all_devices = list(range(torch.cuda.device_count()))
            encoder = nn.DataParallel(encoder, device_ids=all_devices)
            decoder = nn.DataParallel(decoder, device_ids=all_devices)
            postnet = nn.DataParallel(postnet, device_ids=all_devices)

        if hp.use_cuda:
            encoder.cuda()
            decoder.cuda()
            postnet.cuda()

        # initialize optimizers and criterion
        all_paramters = (list(encoder.parameters()) +
                         list(decoder.parameters()) +
                         list(postnet.parameters()))
        optimizer = optim.Adam(all_paramters, lr=hp.lr)
        criterion = nn.L1Loss()

        # configuring traingin
        print_every = 100
        save_every = 1000

        # Keep track of time elapsed and running averages
        start = time.time()
        print_loss_total = 0  # Reset every print_every

    for epoch in range(1, hp.n_epochs + 1):

        # get training data for this cycle
        mels, mags, indexed_texts = ds.next_batch(hp.batch_size)

        mels_v = Variable(torch.from_numpy(mels).float())
        mags_v = Variable(torch.from_numpy(mags).float())
        texts_v = Variable(torch.from_numpy(indexed_texts))

        if hp.use_cuda:
            mels_v = mels_v.cuda()
            mags_v = mags_v.cuda()
            texts_v = texts_v.cuda()

        loss = train_batch(mels_v,
                           mags_v,
                           texts_v,
                           encoder,
                           decoder,
                           postnet,
                           optimizer,
                           criterion,
                           multi_gpus=args.multi_gpus)

        # Keep track of loss
        print_loss_total += loss

        if epoch == 0:
            continue

        if epoch % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print_summary = '%s (%d %d%%) %.4f' % \
                (time_since(start, epoch / hp.n_epochs),
                 epoch, epoch / hp.n_epochs * 100, print_loss_avg)
            print(print_summary)

        if epoch % save_every == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'encoder': encoder.state_dict(),
                'decoder': decoder.state_dict(),
                'postnet': postnet.state_dict(),
                'optimizer': optimizer.state_dict(),
            })
Exemplo n.º 17
0
    def __init__(self, input_channels, h_size, z_size):
        super(VAE, self).__init__()

        self.encoder = Encoder(input_channels, h_size, z_size)

        self.decoder = Decoder(input_channels, z_size)
Exemplo n.º 18
0
	def __init__(
		self,
		encoder_dict,
		encoder_padding_idx,
		encoder_emb_size,
		encoder_hid_size,
		encoder_bidirectional,
		encoder_rnn_cell_type,
		encoder_is_packed,
		encoder_batch_first,
		encoder_num_layers,
		encoder_dropout,
		decoder_dict,
		decoder_padding_idx,
		decoder_emb_size,
		decoder_hid_size,
		decoder_rnn_cell_type,
		decoder_num_layers,
		decoder_dropout,
		global_attention_type,
		generator_dim_lst,
		generator_num_layers
	):
		super(GlobalAttentionSeq2Seq, self).__init__()

		self.name = 'GlobalAttentionSeq2Seq'

		self.encoder = Encoder(
			encoder_dict,
			encoder_padding_idx,
			encoder_emb_size,
			encoder_hid_size,
			encoder_bidirectional,
			encoder_rnn_cell_type,
			encoder_is_packed,
			encoder_batch_first,
			encoder_num_layers,
			encoder_dropout
		)

		self.bridge = Bridge(
			encoder_bidirectional,
			encoder_num_layers,
			encoder_hid_size,
			encoder_rnn_cell_type,
			decoder_num_layers,
			decoder_hid_size,
			decoder_rnn_cell_type
		)

		self.decoder = GlobalAttentiveDecoder(
			decoder_dict,
			decoder_padding_idx,
			decoder_emb_size,
			decoder_hid_size,
			decoder_rnn_cell_type,
			decoder_num_layers,
			decoder_dropout,
			encoder_hid_size,
			global_attention_type,
			encoder_bidirectional
		)

		self.generator = Generator(
			decoder_dict.size(),
			decoder_hid_size,
			generator_dim_lst,
			generator_num_layers
		)
Exemplo n.º 19
0
#paths
P_TOKENS = Path('tokens').absolute()

# paths that needs to be checked inside clipper
P_PACKAGES = Path('packages').absolute()
P_CLIPS = Path('clips').absolute()
P_OUTPUTS = Path('outputs').absolute()
P_VIDEOS_MEDIA = Path('videos').absolute()
dirs_paths = list([P_PACKAGES, P_CLIPS, P_OUTPUTS, P_VIDEOS_MEDIA])
check_paths()

modules = list()
modules.append(
    Packager(ERROR_MESSAGES, P_PACKAGES, P_CLIPS, P_OUTPUTS, P_VIDEOS_MEDIA))
modules.append(Downloader(ERROR_MESSAGES, modules[0], P_TOKENS_FILE))
modules.append(Encoder(ERROR_MESSAGES, modules[0]))
modules.append(Editor(ERROR_MESSAGES, modules[0], P_VIDEOS_MEDIA))
modules.append(Uploader(ERROR_MESSAGES, modules[0], P_TOKENS, P_VIDEOS_MEDIA))
modules.append(Tweeter(ERROR_MESSAGES, P_TOKENS_FILE, modules[0], P_TWEETS))
modules.append(Wrapper(ERROR_MESSAGES, modules, P_SCHEDULE))
modules.append(Helper())

user_input('clear', [])
welcome()
while True:
    user_inp = input('>> ')

    if user_inp:
        inp = user_inp.split(' ')

        command = inp[0]
Exemplo n.º 20
0
class ModelGraph(object):
    def __init__(self,
                 word_vocab=None,
                 POS_vocab=None,
                 NER_vocab=None,
                 flags=None,
                 mode='ce_train'):
        # mode can have the following values:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from mode in decoder which can be
        # 'ce_train', 'loss', 'greedy' or 'sample'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train') else False

        self.flags = flags
        self.word_vocab = word_vocab

        # create placeholders
        self.create_placeholders()

        # create encoder
        self.encoder = Encoder(flags,
                               self.passage_words,
                               self.passage_POSs,
                               self.passage_NERs,
                               self.passage_lengths,
                               self.answer_span,
                               word_vocab=word_vocab,
                               POS_vocab=POS_vocab,
                               NER_vocab=NER_vocab)

        # encode the input instance
        self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.decoder_init_state = self.encoder.encode(
            is_training=is_training)

        max_passage_length = tf.shape(self.passage_words)[1]
        self.passage_mask = tf.sequence_mask(self.passage_lengths,
                                             max_passage_length,
                                             dtype=tf.float32)

        loss_weights = tf.sequence_mask(
            self.question_lengths, flags.max_question_len,
            dtype=tf.float32)  # [batch_size, gen_steps]
        loss_weights_rl = tf.sequence_mask(
            self.question_lengths_rl, flags.max_question_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with tf.variable_scope("generator"):
            # create decoder
            self.decoder = Decoder(flags, word_vocab, self.rewards,
                                   is_training)

            if mode == 'decode':
                self.context_t_1 = tf.placeholder(
                    tf.float32, [None, self.encoder_dim],
                    name='context_t_1')  # [batch_size, encoder_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encoder_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (self.state_t, self.context_t, self.coverage_t,
                 self.attn_dist_t, self.p_gen_t, self.ouput_t,
                 self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                 self.multinomial_prediction) = self.decoder.decode(
                     self.decoder_init_state, self.context_t_1,
                     self.coverage_t_1, self.word_t,
                     self.encoder_hidden_states, self.encoder_features,
                     self.passage_words, self.passage_mask)
                # not buiding training op for this mode
                return

            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='greedy')
                # not buiding training op for this mode
                return

            elif mode in ('ce_train', 'evaluate'):
                self.accu, self.loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='ce_train')
                if mode == 'evaluate':
                    # not buiding training op for evaluation
                    return

            elif mode == 'rl_train':
                _, self.loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.greedy_words = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    None,
                    mode='greedy')

            elif mode == 'rl_ce_train':

                self.accu, self.ce_loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='ce_train')

                tf.get_variable_scope().reuse_variables()

                _, self.rl_loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs_rl,
                    self.question_words_rl,
                    loss_weights_rl,
                    mode='loss')

                self.loss = BETA * self.ce_loss + self.rl_loss

                _, _, self.greedy_words = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    None,
                    mode='greedy')

        # defining optimizer and train op
        optimizer = tf.train.AdagradOptimizer(
            learning_rate=flags.learning_rate)

        tvars = tf.trainable_variables()
        total_parameters = 0
        for variable in tvars:
            shape = variable.get_shape()
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
            total_parameters += variable_parameters
        print("Total number of parameters is equal: %s" % total_parameters)

        if flags.lambda_l2 > 0.0:
            l2_loss = tf.add_n(
                [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss = self.loss + flags.lambda_l2 * l2_loss

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          flags.clip_value)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        ema = tf.train.ExponentialMovingAverage(decay=0.9999)
        with tf.control_dependencies([self.train_op]):
            self.train_op = ema.apply(tvars)
        with tf.variable_scope('backup_variables'):
            backup_vars = [
                tf.get_variable(var.op.name,
                                dtype=var.value().dtype,
                                trainable=False,
                                initializer=var.initialized_value())
                for var in tvars
            ]
        save_backup_vars_op = tf.group(
            *(tf.assign(bck, var.read_value())
              for var, bck in zip(tvars, backup_vars)))
        with tf.control_dependencies([save_backup_vars_op]):
            self.ema_to_vars_op = tf.group(
                *(tf.assign(var,
                            ema.average(var).read_value()) for var in tvars))
        self.restore_backup_vars_op = tf.group(
            *(tf.assign(var, bck.read_value())
              for var, bck in zip(tvars, backup_vars)))

    def create_placeholders(self):
        # build placeholder for input passage/article
        self.passage_lengths = tf.placeholder(tf.int32, [None],
                                              name='passage_lengths')
        self.passage_words = tf.placeholder(
            tf.int32, [None, None],
            name="passage_words")  # [batch_size, passage_len]
        self.passage_POSs = tf.placeholder(
            tf.int32, [None, None],
            name="passage_POSs")  # [batch_size, passage_len]
        self.passage_NERs = tf.placeholder(
            tf.int32, [None, None],
            name="passage_NERs")  # [batch_size, passage_len]

        # build placeholder for answer
        self.answer_span = tf.placeholder(tf.float32, [None, None],
                                          name="answer_span")  # [batch_size]

        # build placeholder for question
        self.decoder_inputs = tf.placeholder(
            tf.int32, [None, self.flags.max_question_len],
            name="decoder_inputs")  # [batch_size, gen_steps]
        self.question_words = tf.placeholder(
            tf.int32, [None, self.flags.max_question_len],
            name="question_words")  # [batch_size, gen_steps]
        self.question_lengths = tf.placeholder(
            tf.int32, [None], name="question_lengths")  # [batch_size]

        self.decoder_inputs_rl = tf.placeholder(
            tf.int32, [None, self.flags.max_question_len],
            name="decoder_inputs_rl")  # [batch_size, gen_steps]
        self.question_words_rl = tf.placeholder(
            tf.int32, [None, self.flags.max_question_len],
            name="question_words_rl")  # [batch_size, gen_steps]
        self.question_lengths_rl = tf.placeholder(
            tf.int32, [None], name="question_lengths_rl")  # [batch_size]

        # build placeholder for reinforcement learning
        self.rewards = tf.placeholder(tf.float32, [None], name="rewards")

    def run_greedy(self, sess, batch):
        feed_dict = self.run_encoder(sess, batch, only_feed_dict=True)
        feed_dict[self.decoder_inputs] = batch.decoder_inputs
        return sess.run(self.greedy_words, feed_dict)

    def ce_train(self, sess, batch, only_eval=False):
        feed_dict = self.run_encoder(sess, batch, only_feed_dict=True)
        feed_dict[self.decoder_inputs] = batch.decoder_inputs
        feed_dict[self.question_words] = batch.question_words
        feed_dict[self.question_lengths] = batch.question_lengths

        if only_eval:
            return sess.run([self.accu, self.loss], feed_dict)
        else:
            return sess.run([self.train_op, self.loss], feed_dict)[1]

    def rl_train(self, sess, batch, with_ce):
        feed_dict = self.run_encoder(sess, batch, only_feed_dict=True)
        feed_dict[self.decoder_inputs] = batch.decoder_inputs

        greedy_outputs = sess.run(self.greedy_words, feed_dict)
        greedy_outputs = greedy_outputs.tolist()
        gold_output = batch.question_words.tolist()

        # baseline outputs by flipping coin
        flipp = 0.1
        baseline_outputs = np.copy(batch.question_words)
        for i in range(batch.question_words.shape[0]):
            seq_len = min(self.flags.max_question_len,
                          batch.question_lengths[i] -
                          1)  # don't change stop token '</s>'
            for j in range(seq_len):
                if greedy_outputs[i][j] != 0 and random.random() < flipp:
                    baseline_outputs[i, j] = greedy_outputs[i][j]
        baseline_outputs = baseline_outputs.tolist()

        rl_inputs = []
        rl_outputs = []
        rl_input_lengths = []
        rewards = []
        for i, (baseline_output, greedy_output) in enumerate(
                zip(baseline_outputs, greedy_outputs)):
            _, baseline_output_words = self.word_vocab.getLexical(
                baseline_output)
            greedy_output, greedy_output_words = self.word_vocab.getLexical(
                greedy_output)
            _, gold_output_words = self.word_vocab.getLexical(gold_output[i])

            rl_inputs.append([int(batch.decoder_inputs[i, 0])] +
                             greedy_output[:-1])
            rl_outputs.append(greedy_output)
            rl_input_lengths.append(len(greedy_output))

            baseline_output_words_list = baseline_output_words.split()
            greedy_output_words_list = greedy_output_words.split()
            gold_output_words_list = gold_output_words.split()

            if self.flags.reward_type == 'bleu':
                cc = SmoothingFunction()
                reward = sentence_bleu([gold_output_words_list],
                                       greedy_output_words_list,
                                       smoothing_function=cc.method3)
                baseline = sentence_bleu([gold_output_words_list],
                                         baseline_output_words_list,
                                         smoothing_function=cc.method3)
                rewards.append(reward - baseline)

            elif self.flags.reward_type == 'rouge':
                reward = rouge.rouge([gold_output_words],
                                     [greedy_output_words])["rouge_l/f_score"]
                baseline = rouge.rouge(
                    [gold_output_words],
                    [baseline_output_words])["rouge_l/f_score"]
                rewards.append(reward - baseline)

            else:
                raise ValueError("Reward type is not bleu or rouge!")

        rl_inputs = padding_utils.pad_2d_vals(rl_inputs, len(rl_inputs),
                                              self.flags.max_question_len)
        rl_outputs = padding_utils.pad_2d_vals(rl_outputs, len(rl_outputs),
                                               self.flags.max_question_len)
        rl_input_lengths = np.array(rl_input_lengths, dtype=np.int32)
        rewards = np.array(rewards, dtype=np.float32)
        #reward = rescale(reward)
        assert rl_inputs.shape == rl_outputs.shape

        feed_dict = self.run_encoder(sess, batch, only_feed_dict=True)
        feed_dict[self.rewards] = rewards

        if with_ce:
            feed_dict[self.decoder_inputs_rl] = rl_inputs
            feed_dict[self.question_words_rl] = rl_outputs
            feed_dict[self.question_lengths_rl] = rl_input_lengths
            feed_dict[self.decoder_inputs] = batch.decoder_inputs
            feed_dict[self.question_words] = batch.question_words
            feed_dict[self.question_lengths] = batch.question_lengths

        else:
            feed_dict[self.decoder_inputs] = rl_inputs
            feed_dict[self.question_words] = rl_outputs
            feed_dict[self.question_lengths] = rl_input_lengths

        _, loss = sess.run([self.train_op, self.loss], feed_dict)
        return loss

    def run_encoder(self, sess, batch, only_feed_dict=False):
        feed_dict = {}
        feed_dict[self.passage_lengths] = batch.sent1_length
        if self.flags.with_word:
            feed_dict[self.passage_words] = batch.sent1_word
        if self.flags.with_POS:
            feed_dict[self.passage_POSs] = batch.sent1_POS
        if self.flags.with_NER:
            feed_dict[self.passage_NERs] = batch.sent1_NER
        if self.flags.with_answer_span:
            feed_dict[self.answer_span] = batch.answer_span

        if only_feed_dict:
            return feed_dict

        return sess.run([
            self.encoder_hidden_states, self.decoder_init_state,
            self.encoder_features, self.passage_words, self.passage_mask
        ], feed_dict)
Exemplo n.º 21
0
def create_model(vocab_src, vocab_tgt, config):
    encoder = Encoder(config)
    attention = create_attention(config)
    decoder = Decoder(attention, vocab_tgt.size(), config)
    model = CondNMT(vocab_src, vocab_tgt, encoder, decoder, config)
    return model
Exemplo n.º 22
0
#     tsf.Resize((img_size, img_size)),
#     tsf.ToTensor(),
#     tsf.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
# ])

# train_dataset = CIFAR10("./data", train=True, transform=train_transform, download=True)
# test_dataset = CIFAR10("./data", train=False, transform=val_transform, download=False)
#train_paths, val_paths, train_labels, val_labels = train_test_split(paths, labels, random_state=0, stratify=labels)
train_paths, train_labels = get_paths("./cat_dog/train", img_suffix=".jpg")
val_paths, val_labels = get_paths("./cat_dog/val", img_suffix='.jpg')
train_dataset = PALMClassifyDataset(train_paths, train_labels, augmentation=True, img_size=img_size)
test_dataset = PALMClassifyDataset(val_paths, val_labels, augmentation=False, img_size=img_size)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

encoder = Encoder(in_ch=channels, out_ch=2048)
decoder = Decoder(in_ch=2048, out_ch=channels)

classifier = ResNet(channels, n_layers=50, num_classes=num_classes)
#classifier = resnet18(pretrained=False, num_classes=num_classes, zero_init_residual=False)
# classifier.load_state_dict(torch.load("./best_classifier.pth", map_location="cpu"))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder.to(device)
decoder.to(device)
classifier.to(device)

encoder_opt = opt.Adam(encoder.parameters(), lr=init_lr, weight_decay=5e-4)
decoder_opt = opt.Adam(decoder.parameters(), lr=init_lr, weight_decay=5e-4)
classifier_opt = opt.Adam(classifier.parameters(), lr=init_lr, weight_decay=5e-4)
Exemplo n.º 23
0
    def __init__(self,
                 word_vocab=None,
                 POS_vocab=None,
                 NER_vocab=None,
                 flags=None,
                 mode='ce_train'):
        # mode can have the following values:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from mode in decoder which can be
        # 'ce_train', 'loss', 'greedy' or 'sample'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train') else False

        self.flags = flags
        self.word_vocab = word_vocab

        # create placeholders
        self.create_placeholders()

        # create encoder
        self.encoder = Encoder(flags,
                               self.passage_words,
                               self.passage_POSs,
                               self.passage_NERs,
                               self.passage_lengths,
                               self.answer_span,
                               word_vocab=word_vocab,
                               POS_vocab=POS_vocab,
                               NER_vocab=NER_vocab)

        # encode the input instance
        self.encoder_dim, self.encoder_hidden_states, self.encoder_features, self.decoder_init_state = self.encoder.encode(
            is_training=is_training)

        max_passage_length = tf.shape(self.passage_words)[1]
        self.passage_mask = tf.sequence_mask(self.passage_lengths,
                                             max_passage_length,
                                             dtype=tf.float32)

        loss_weights = tf.sequence_mask(
            self.question_lengths, flags.max_question_len,
            dtype=tf.float32)  # [batch_size, gen_steps]
        loss_weights_rl = tf.sequence_mask(
            self.question_lengths_rl, flags.max_question_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with tf.variable_scope("generator"):
            # create decoder
            self.decoder = Decoder(flags, word_vocab, self.rewards,
                                   is_training)

            if mode == 'decode':
                self.context_t_1 = tf.placeholder(
                    tf.float32, [None, self.encoder_dim],
                    name='context_t_1')  # [batch_size, encoder_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encoder_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (self.state_t, self.context_t, self.coverage_t,
                 self.attn_dist_t, self.p_gen_t, self.ouput_t,
                 self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                 self.multinomial_prediction) = self.decoder.decode(
                     self.decoder_init_state, self.context_t_1,
                     self.coverage_t_1, self.word_t,
                     self.encoder_hidden_states, self.encoder_features,
                     self.passage_words, self.passage_mask)
                # not buiding training op for this mode
                return

            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='greedy')
                # not buiding training op for this mode
                return

            elif mode in ('ce_train', 'evaluate'):
                self.accu, self.loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='ce_train')
                if mode == 'evaluate':
                    # not buiding training op for evaluation
                    return

            elif mode == 'rl_train':
                _, self.loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.greedy_words = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    None,
                    mode='greedy')

            elif mode == 'rl_ce_train':

                self.accu, self.ce_loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    loss_weights,
                    mode='ce_train')

                tf.get_variable_scope().reuse_variables()

                _, self.rl_loss, _ = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs_rl,
                    self.question_words_rl,
                    loss_weights_rl,
                    mode='loss')

                self.loss = BETA * self.ce_loss + self.rl_loss

                _, _, self.greedy_words = self.decoder.train(
                    self.encoder_dim,
                    self.encoder_hidden_states,
                    self.encoder_features,
                    self.passage_words,
                    self.passage_mask,
                    self.decoder_init_state,
                    self.decoder_inputs,
                    self.question_words,
                    None,
                    mode='greedy')

        # defining optimizer and train op
        optimizer = tf.train.AdagradOptimizer(
            learning_rate=flags.learning_rate)

        tvars = tf.trainable_variables()
        total_parameters = 0
        for variable in tvars:
            shape = variable.get_shape()
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
            total_parameters += variable_parameters
        print("Total number of parameters is equal: %s" % total_parameters)

        if flags.lambda_l2 > 0.0:
            l2_loss = tf.add_n(
                [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss = self.loss + flags.lambda_l2 * l2_loss

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          flags.clip_value)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        ema = tf.train.ExponentialMovingAverage(decay=0.9999)
        with tf.control_dependencies([self.train_op]):
            self.train_op = ema.apply(tvars)
        with tf.variable_scope('backup_variables'):
            backup_vars = [
                tf.get_variable(var.op.name,
                                dtype=var.value().dtype,
                                trainable=False,
                                initializer=var.initialized_value())
                for var in tvars
            ]
        save_backup_vars_op = tf.group(
            *(tf.assign(bck, var.read_value())
              for var, bck in zip(tvars, backup_vars)))
        with tf.control_dependencies([save_backup_vars_op]):
            self.ema_to_vars_op = tf.group(
                *(tf.assign(var,
                            ema.average(var).read_value()) for var in tvars))
        self.restore_backup_vars_op = tf.group(
            *(tf.assign(var, bck.read_value())
              for var, bck in zip(tvars, backup_vars)))
Exemplo n.º 24
0
    def test_hoge(self):
        baseDir = "/tmp/test"
        video = Video("10.mp4")
        vr = VideoResource(video, baseDir)

        Encoder(baseDir, vr.get_image_path(), vr.get_sound_path(), video.fps).encode()
    # 参数(模型参数+数据参数)
    args = arg_conf()
    print('GPU available:', torch.cuda.is_available())
    print('CuDNN available', torch.backends.cudnn.enabled)
    print('GPU number: ', torch.cuda.device_count())

    if torch.cuda.is_available() and args.cuda >= 0:
        args.device = torch.device('cuda', args.cuda)
    else:
        args.device = torch.device('cpu')

    # 词表
    wdvocab = create_vocab(opts['data']['train_path'])
    embedding_weights = wdvocab.get_embedding_weights(
        opts['data']['embedding_weights'])
    # wdvocab.save(opts['vocab']['save_vocab'])

    # 模型
    args.label_size = wdvocab.label_size
    args.pad = wdvocab.PAD
    # Transformer Encoder文本分类模型
    trans_encoder = Encoder(args, embedding_weights).to(args.device)
    classifier = Classifier(trans_encoder, args, wdvocab)
    classifier.summary()

    # 训练
    classifier.train(train_data, dev_data)

    # 评估
    classifier.evaluate(test_data)
Exemplo n.º 26
0
def run_model(mode, path, in_file, o_file):
    global feature, encoder, indp, crf, mldecoder, rltrain, f_opt, e_opt, i_opt, c_opt, m_opt, r_opt

    cfg = Configuration()

    #General mode has two values: 'train' or 'test'
    cfg.mode = mode

    #Set Random Seeds
    random.seed(cfg.seed)
    np.random.seed(cfg.seed)
    torch.manual_seed(cfg.seed)
    if hasCuda:
        torch.cuda.manual_seed_all(cfg.seed)

    #Load Embeddings
    load_embeddings(cfg)

    #Only for testing
    if mode == 'test': cfg.test_raw = in_file

    #Construct models
    feature = Feature(cfg)
    if cfg.model_type == 'AC-RNN':
        f_opt = optim.SGD(ifilter(lambda p: p.requires_grad,
                                  feature.parameters()),
                          lr=cfg.actor_step_size)
    else:
        f_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   feature.parameters()),
                           lr=cfg.learning_rate)

    if hasCuda: feature.cuda()

    encoder = Encoder(cfg)
    if cfg.model_type == 'AC-RNN':
        e_opt = optim.SGD(ifilter(lambda p: p.requires_grad,
                                  encoder.parameters()),
                          lr=cfg.actor_step_size)
    else:
        e_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   encoder.parameters()),
                           lr=cfg.learning_rate)
    if hasCuda: encoder.cuda()

    if cfg.model_type == 'INDP':
        indp = INDP(cfg)
        i_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   indp.parameters()),
                           lr=cfg.learning_rate)
        if hasCuda: indp.cuda()

    elif cfg.model_type == 'CRF':
        crf = CRF(cfg)
        c_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   crf.parameters()),
                           lr=cfg.learning_rate)
        if hasCuda: crf.cuda()

    elif cfg.model_type == 'TF-RNN':
        mldecoder = MLDecoder(cfg)
        m_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   mldecoder.parameters()),
                           lr=cfg.learning_rate)
        if hasCuda: mldecoder.cuda()
        cfg.mldecoder_type = 'TF'

    elif cfg.model_type == 'SS-RNN':
        mldecoder = MLDecoder(cfg)
        m_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   mldecoder.parameters()),
                           lr=cfg.learning_rate)
        if hasCuda: mldecoder.cuda()
        cfg.mldecoder_type = 'SS'

    elif cfg.model_type == 'AC-RNN':
        mldecoder = MLDecoder(cfg)
        m_opt = optim.SGD(ifilter(lambda p: p.requires_grad,
                                  mldecoder.parameters()),
                          lr=cfg.actor_step_size)
        if hasCuda: mldecoder.cuda()
        cfg.mldecoder_type = 'TF'
        rltrain = RLTrain(cfg)
        r_opt = optim.Adam(ifilter(lambda p: p.requires_grad,
                                   rltrain.parameters()),
                           lr=cfg.learning_rate,
                           weight_decay=0.001)
        if hasCuda: rltrain.cuda()
        cfg.rltrain_type = 'AC'
        #For RL, the network should be pre-trained with teacher forced ML decoder.
        feature.load_state_dict(torch.load(path + 'TF-RNN' + '_feature'))
        encoder.load_state_dict(torch.load(path + 'TF-RNN' + '_encoder'))
        mldecoder.load_state_dict(torch.load(path + 'TF-RNN' + '_predictor'))

    if mode == 'train':
        o_file = './temp.predicted_' + cfg.model_type
        best_val_cost = float('inf')
        best_val_epoch = 0
        first_start = time.time()
        epoch = 0
        while (epoch < cfg.max_epochs):
            print
            print 'Model:{} | Epoch:{}'.format(cfg.model_type, epoch)

            if cfg.model_type == 'SS-RNN':
                #Specify the decaying schedule for sampling probability.
                #inverse sigmoid schedule:
                cfg.sampling_p = float(
                    cfg.k) / float(cfg.k + np.exp(float(epoch) / cfg.k))

            start = time.time()
            run_epoch(cfg)
            print '\nValidation:'
            predict(cfg, o_file)
            val_cost = 100 - evaluate(cfg, cfg.dev_ref, o_file)
            print 'Validation score:{}'.format(100 - val_cost)
            if val_cost < best_val_cost:
                best_val_cost = val_cost
                best_val_epoch = epoch
                torch.save(feature.state_dict(),
                           path + cfg.model_type + '_feature')
                torch.save(encoder.state_dict(),
                           path + cfg.model_type + '_encoder')
                if cfg.model_type == 'INDP':
                    torch.save(indp.state_dict(),
                               path + cfg.model_type + '_predictor')
                elif cfg.model_type == 'CRF':
                    torch.save(crf.state_dict(),
                               path + cfg.model_type + '_predictor')
                elif cfg.model_type == 'TF-RNN' or cfg.model_type == 'SS-RNN':
                    torch.save(mldecoder.state_dict(),
                               path + cfg.model_type + '_predictor')
                elif cfg.model_type == 'AC-RNN':
                    torch.save(mldecoder.state_dict(),
                               path + cfg.model_type + '_predictor')
                    torch.save(rltrain.state_dict(),
                               path + cfg.model_type + '_critic')

            #For early stopping
            if epoch - best_val_epoch > cfg.early_stopping:
                break
                ###

            print 'Epoch training time:{} seconds'.format(time.time() - start)
            epoch += 1

        print 'Total training time:{} seconds'.format(time.time() -
                                                      first_start)

    elif mode == 'test':
        cfg.batch_size = 256
        feature.load_state_dict(torch.load(path + cfg.model_type + '_feature'))
        encoder.load_state_dict(torch.load(path + cfg.model_type + '_encoder'))
        if cfg.model_type == 'INDP':
            indp.load_state_dict(
                torch.load(path + cfg.model_type + '_predictor'))
        elif cfg.model_type == 'CRF':
            crf.load_state_dict(
                torch.load(path + cfg.model_type + '_predictor'))
        elif cfg.model_type == 'TF-RNN' or cfg.model_type == 'SS-RNN':
            mldecoder.load_state_dict(
                torch.load(path + cfg.model_type + '_predictor'))
        elif cfg.model_type == 'AC-RNN':
            mldecoder.load_state_dict(
                torch.load(path + cfg.model_type + '_predictor'))
            rltrain.load_state_dict(
                torch.load(path + cfg.model_type + '_critic'))

        print
        print 'Model:{} Predicting'.format(cfg.model_type)
        start = time.time()
        predict(cfg, o_file)
        print 'Total prediction time:{} seconds'.format(time.time() - start)
    return
Exemplo n.º 27
0
def inference(checkpoint_file, text):
    ds = tiny_words(max_text_length=hp.max_text_length,
                    max_audio_length=hp.max_audio_length,
                    max_dataset_size=args.data_size)

    print(ds.texts)

    # prepare input
    indexes = indexes_from_text(ds.lang, text)
    indexes.append(EOT_token)
    padded_indexes = pad_indexes(indexes, hp.max_text_length, PAD_token)
    texts_v = Variable(torch.from_numpy(padded_indexes))
    texts_v = texts_v.unsqueeze(0)

    if hp.use_cuda:
        texts_v = texts_v.cuda()

    encoder = Encoder(ds.lang.num_chars,
                      hp.embedding_dim,
                      hp.encoder_bank_k,
                      hp.encoder_bank_ck,
                      hp.encoder_proj_dims,
                      hp.encoder_highway_layers,
                      hp.encoder_highway_units,
                      hp.encoder_gru_units,
                      dropout=hp.dropout,
                      use_cuda=hp.use_cuda)

    decoder = AttnDecoder(hp.max_text_length,
                          hp.attn_gru_hidden_size,
                          hp.n_mels,
                          hp.rf,
                          hp.decoder_gru_hidden_size,
                          hp.decoder_gru_layers,
                          dropout=hp.dropout,
                          use_cuda=hp.use_cuda)

    postnet = PostNet(hp.n_mels,
                      1 + hp.n_fft // 2,
                      hp.post_bank_k,
                      hp.post_bank_ck,
                      hp.post_proj_dims,
                      hp.post_highway_layers,
                      hp.post_highway_units,
                      hp.post_gru_units,
                      use_cuda=hp.use_cuda)

    encoder.eval()
    decoder.eval()
    postnet.eval()

    if hp.use_cuda:
        encoder.cuda()
        decoder.cuda()
        postnet.cuda()

    # load model
    checkpoint = torch.load(checkpoint_file)
    encoder.load_state_dict(checkpoint['encoder'])
    decoder.load_state_dict(checkpoint['decoder'])
    postnet.load_state_dict(checkpoint['postnet'])

    encoder_out = encoder(texts_v)

    # Prepare input and output variables
    GO_frame = np.zeros((1, hp.n_mels))
    decoder_in = Variable(torch.from_numpy(GO_frame).float())
    if hp.use_cuda:
        decoder_in = decoder_in.cuda()
    h, hs = decoder.init_hiddens(1)

    decoder_outs = []
    for t in range(int(hp.max_audio_length / hp.rf)):
        decoder_out, h, hs, _ = decoder(decoder_in, h, hs, encoder_out)
        decoder_outs.append(decoder_out)
        # use predict
        decoder_in = decoder_out[:, -1, :].contiguous()

    # (batch_size, T, n_mels)
    decoder_outs = torch.cat(decoder_outs, 1)

    # postnet
    post_out = postnet(decoder_outs)
    s = post_out[0].cpu().data.numpy()

    print("Recontructing wav...")
    s = np.where(s < 0, 0, s)
    wav = spectrogram2wav(s**hp.power)
    # wav = griffinlim(s**hp.power)
    write("demo.wav", hp.sr, wav)
Exemplo n.º 28
0
 def __init__(self):
     super().__init__()
     self.encoder = Encoder()
     self.decoder = Decoder()
     self.egm = EdgeGuidanceModule()
     self.wam = WeightedAggregationModule()