def main(args: Optional[argparse.Namespace] = None):
    if args is None:
        args = parse_arguments()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # load model & data
    target_image = utilities.preprocess_image(
        utilities.load_image(args.img_path))
    net = model.Model(args.model_path, device, target_image)

    # synthesize
    optimizer = optimize.Optimizer(net, args)
    result = optimizer.optimize()

    # save result
    final_image = utilities.postprocess_image(
        result, utilities.load_image(args.img_path))
    final_image.save(os.path.join(args.out_dir, 'output.png'))

    # plot loss
    x = list(
        range(args.checkpoint_every - 1,
              len(optimizer.losses) * args.checkpoint_every,
              args.checkpoint_every))
    plt.plot(x, optimizer.losses)
    plt.savefig(os.path.join(args.out_dir, 'loss_plot.png'))
    plt.close()

    # save intermediate images
    for i, image in enumerate(optimizer.opt_images):
        image.save(
            os.path.join(
                args.out_dir,
                'intermediate_image_{}.png'.format(i * args.checkpoint_every)))
Exemplo n.º 2
0
 def setup(self):
     self.features = params['X']
     self.weights = params['Theta']
     self.ratings = dataset['Y']
     self.rated = dataset['R']
     self.optimizer = optimize.Optimizer(self.features, self.weights,
                                         self.ratings, 1.5)
Exemplo n.º 3
0
 def __init__(self):
     trainedSteps = 0
     if os.path.exists('AutoTrainConfig'):
         with open('AutoTrainConfig', 'r') as atconfig:
             trainedSteps = json.load(atconfig)['Steps']
     self.config = mconfig.MainConfig()
     self.sp = self_train.SelfPlay(mconfig.MainConfig())
     self.op = optimize.Optimizer(mconfig.MainConfig(), trainedSteps)
Exemplo n.º 4
0
 def __init__(self, arg,
              name = None):
   if name:
     self.name = name
   else:
     self.name = 'Neural-GPU'
   self.arg = arg
   
   batch_size = 128
   sequence_size = 10
   if __name__ != '__main__':
     batch_size = sequence_size = None
   self.inputs = tf.placeholder(tf.int32,
                                shape = [batch_size, sequence_size])
   self.targets = tf.placeholder(tf.int32,
                                 shape = [batch_size, sequence_size])
   self.batch_size = tf.shape(self.inputs)[0]
   self.sequence_size = tf.shape(self.inputs)[1]
   self.training = tf.placeholder(tf.bool)
   self.keep_prob = tf.placeholder(tf.float32)
   self.learning_rate = tf.placeholder(tf.float32)
   self.embed_weight = tf.get_variable('embed_weight',
                                       shape = [self.arg.vocab_size, self.arg.embed_dim])
   s0 = tf.nn.embedding_lookup(self.embed_weight,
                               self.inputs)
   s0 = tf.reshape(s0,
                   shape = [self.batch_size, 1, sequence_size, self.arg.embed_dim])
   s0 = tf.concat([s0, tf.zeros([self.batch_size, self.arg.width - 1, sequence_size, self.arg.embed_dim])],
                  axis = 1)
   sfin = self.neural_gpu(s0)
   output = sfin[:,0,:,:]
   self.output_weight = tf.get_variable('output_weight',
                                        shape = [self.arg.embed_dim, self.arg.vocab_size])
   self.output_bias = tf.get_variable('output_bias',
                                      shape = [self.arg.vocab_size])
   self.output = tf.tensordot(output,
                              self.output_weight,
                              axes = 1) + self.output_bias
   self.predict = tf.argmax(self.output,
                            axis = -1,
                            output_type = tf.int32)
   self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels = tf.one_hot(self.targets,
                                                                              depth = self.arg.vocab_size),
                                                          logits = self.output,
                                                          axis = -1)
   self.loss = tf.reduce_mean(self.loss)
   self.optimizer = optimize.Optimizer(arg,
                                       loss = self.loss,
                                       learning_rate = self.learning_rate)
   self.optimizer.accuracy(self.output,
                           self.targets)
   self.train_op = self.optimizer.train_op
   self.predict = self.optimizer.predict
   self.correct_prediction = self.optimizer.correct_prediction
   self.accuracy = self.optimizer.accuracy
Exemplo n.º 5
0
    def test_grad(self):
        optimizer = optimize.Optimizer(self.features, self.weights,
                                       self.ratings)
        numeric_f = numerical_grad_features(self.features, self.weights,
                                            self.ratings)
        numeric_w = numerical_grad_weights(self.features, self.weights,
                                           self.ratings)
        numeric = vstack((numeric_f, numeric_w)).flatten()
        assert numeric.shape == (27, )
        assert optimizer.x.shape == (27, )
        analytical = optimizer.fprime(optimizer.x)
        assert analytical.shape == (27, )

        assert all(abs(numeric - analytical) < epsilon)
Exemplo n.º 6
0
    def test_grad_r(self):
        optimizer = optimize.Optimizer(self.features,
                                       self.weights,
                                       self.ratings,
                                       regularization=1.5)
        numeric_f = numerical_grad_features(self.features,
                                            self.weights,
                                            self.ratings,
                                            regularization=1.5)
        numeric_w = numerical_grad_weights(self.features,
                                           self.weights,
                                           self.ratings,
                                           regularization=1.5)
        numeric = vstack((numeric_f, numeric_w)).flatten()
        analytical = optimizer.fprime(optimizer.x)

        assert all(abs(numeric - analytical) < epsilon)
def run_experiment(target, client, config, tune_run_func_config, workers):

    config_identifier = hashlib.sha256(json.dumps(config, sort_keys=True, default=str).encode("utf-8")).hexdigest()
    # # # While experiment resuming is disabled, this includes timestamp
    # # experiment_name = "{0}_{1}_{2}_{3}".format(client.name, target.name, config_identifier)
    experiment_name = "{0}_{1}_ts{3}_{2}".format(client.name, target.name, config_identifier, int(time.time()))
    experiment_dir = path.join(EXPERIMENT_EXPORT_RESULTS_DIRECTORY, experiment_name)
    pathlib.Path(experiment_dir).mkdir(exist_ok=True, parents=True)
    
    config_dump_filename = path.join(experiment_dir, "config_dump.json")
    is_continuation = True
    if not path.exists(config_dump_filename):
        is_continuation = False
        with open(config_dump_filename, "w") as config_dump_file:
            json.dump(config, config_dump_file, sort_keys=True, indent=2, default=str)

    search_algorithm_config = config["configuration"]["search_algorithm"]
    client_config = config["configuration"]["client"]
    target_config = config["configuration"]["target"]
    parameter_config = config["parameters"]

    opfunc = functools.partial(_run_experiment, target, client, target_config, client_config)

    op = optimize.Optimizer(
        opfunc, 
        experiment_name=experiment_name,
        parameter_config=parameter_config,
        search_algorithm_config=search_algorithm_config,
        concurrent_workers=workers
    )
    
    # Run the experiment
    with util.LogWriter(path.join(experiment_dir, "experiment_output.log")):
        result = op.run(tune_run_func_config, export_directory=experiment_dir, resume=is_continuation)

    return result.get_best_config(search_algorithm_config["objective"]["name"])
Exemplo n.º 8
0
    def __init__(self, arg, name=None):
        '''
    the Transformer model, introduced in arXiv:1706.03762
    '''
        if name:
            self.name = name
        else:
            self.name = 'Transformer'
        batch_size = 128
        input_sequence_size = 27
        output_sequence_size = 27
        if __name__ != '__main__':
            batch_size = input_sequence_size = output_sequence_size = None
        self.arg = arg
        self.inputs = tf.placeholder(
            tf.int32,
            shape=[batch_size,
                   input_sequence_size],  # (batch_size, input_sequence_size)
            name='inputs')
        if self.arg.classification:
            self.targets = tf.placeholder(
                tf.int32,
                shape=[batch_size],  # (batch_size, output_sequence_size)
                name='targets')
        else:
            self.targets = tf.placeholder(
                tf.int32,
                shape=[batch_size, output_sequence_size
                       ],  # (batch_size, output_sequence_size)
                name='targets')
        self.training = tf.placeholder(tf.bool)
        self.keep_prob = tf.placeholder(tf.float32)
        self.learning_rate = tf.placeholder(tf.float32)
        self.batch_size = tf.shape(self.inputs)[0]
        self.input_sequence_size = tf.shape(self.inputs)[1]
        if not self.arg.classification:
            self.target_sequence_size = tf.shape(self.targets)[1]

        self.encoder_self_attention_bias = develop_bias._create_mask(
            self.input_sequence_size, self.arg.unidirectional_encoder)
        if not self.arg.classification:
            self.encoder_decoder_attention_bias = tf.zeros(
                [1, 1, self.target_sequence_size, self.input_sequence_size],
                name='encoder_self_attention_bias')
            self.decoder_self_attention_bias = develop_bias._create_mask(
                self.target_sequence_size, self.arg.unidirectional_decoder)

        if self.arg.mask_loss:
            if self.arg.classification:
                self.loss_mask = tf.placeholder(tf.float32,
                                                shape=[batch_size],
                                                name='loss_mask')
            else:
                self.loss_mask = tf.placeholder(
                    tf.float32,
                    shape=[batch_size, output_sequence_size
                           ],  # (batch_size, output_sequence_size)
                    name='loss_mask')
        else:
            self.loss_mask = None

        if self.arg.ffd == 'transformer_ffd':
            self.ffd = self.transformer_ffd
        elif self.arg.ffd == 'sru':
            from SRU import SRU
            self.ffd = SRU
        elif self.arg.ffd == 'sepconv':
            self.ffd = self.sepconv

        if 'stop' in self.arg.pos:
            embedding_size = self.arg.hidden_size - 1
        else:
            embedding_size = self.arg.hidden_size
        with tf.variable_scope('encoder_embedding'):
            encoder_input, enc_params = utils.embedding(
                self.inputs,
                model_dim=embedding_size,
                vocab_size=self.arg.input_vocab_size,
                name='encode')
        if not self.arg.classification:
            with tf.variable_scope('decoder_embedding'):
                decoder_input, dec_params = utils.embedding(
                    self.targets,
                    model_dim=embedding_size,
                    vocab_size=self.arg.target_vocab_size,
                    name='decode')
            if self.arg.use_decoder:
                params = dec_params
                del enc_params
            else:
                params = enc_params
                del dec_params
        else:
            params = enc_params

        with tf.variable_scope('positional_encoding'):
            with tf.variable_scope('encoder'):
                encoder_input = self.timing_position(encoder_input)
            with tf.variable_scope('decoder'):
                if not self.arg.classification:
                    decoder_input = self.timing_position(decoder_input)
        with tf.variable_scope('encoder'):
            encoder_input = self.dropout_fn(encoder_input)
            encoder_output = self.encoder(
                encoder_input,
                encoder_self_attention_bias=self.encoder_self_attention_bias)
            if self.arg.adaptive_mask:
                self.encoder_l0 = tf.reduce_sum(self.encoder_l0)
        if arg.use_decoder:
            with tf.variable_scope('decoder'):
                decoder_input = tf.pad(decoder_input,
                                       paddings=[[0, 0], [1, 0],
                                                 [0, 0]])[:, :-1, :]
                decoder_input = self.dropout_fn(decoder_input)
                decoder_output = self.decoder(
                    decoder_input,
                    encoder_output,
                    decoder_self_attention_bias=self.
                    decoder_self_attention_bias,
                    encoder_decoder_attention_bias=self.
                    encoder_decoder_attention_bias)
        if self.arg.classification:
            if self.arg.use_decoder:
                decoder_output = decoder_output[:, -1]
            else:
                encoder_output = encoder_output[:, -1]
        with tf.variable_scope('output'):
            if self.arg.use_mos:
                if self.arg.use_decoder:
                    self.logits = mos.MoS(
                        decoder_output,
                        hidden_size=self.arg.hidden_size,
                        vocab_size=self.arg.target_vocab_size)
                else:
                    self.logits = mos.MoS(
                        encoder_output,
                        hidden_size=self.arg.hidden_size,
                        vocab_size=self.arg.target_vocab_size)
                self.logits = tf.nn.softmax(self.logits)
                if self.arg.loss == 'sparse_softmax_cross_entropy_with_logits':
                    self.arg.loss = 'log_loss'
                self.loss_cl = loss.Loss(
                    self.logits,
                    self.targets,
                    self.arg.loss,
                    vocab_size=self.arg.target_vocab_size,
                    activation=tf.identity,
                    label_smoothing=self.arg.label_smoothing)
                cost = tf.reduce_sum(self.loss_cl.loss, axis=-1)
            else:
                weights = tf.get_variable(
                    'weights',
                    shape=[self.arg.hidden_size, self.arg.target_vocab_size],
                    dtype=tf.float32)
                bias = tf.get_variable('bias',
                                       shape=[self.arg.target_vocab_size],
                                       dtype=tf.float32)
                if arg.use_decoder:
                    self.logits = tf.tensordot(decoder_output, weights,
                                               axes=1) + bias
                else:
                    self.logits = tf.tensordot(encoder_output, weights,
                                               axes=1) + bias
                self.loss_cl = loss.Loss(
                    self.logits,
                    self.targets,
                    self.arg.loss,
                    vocab_size=self.arg.target_vocab_size,
                    label_smoothing=self.arg.label_smoothing)
                cost = self.loss_cl.loss

        if self.arg.mask_loss:
            self.cost = tf.reduce_mean(cost * self.loss_mask)
        else:
            self.cost = tf.reduce_mean(cost)
        if self.arg.adaptive_mask:
            self.cost += 0.0001 * self.encoder_l0
            if self.arg.use_decoder:
                self.decoder_l0 = tf.reduce_sum(self.decoder_l0)
                self.cost += 0.0001 * self.decoder_l0
        if self.arg.weight_decay_regularization:
            l2_loss = self.loss_cl.l2_loss(tf.trainable_variables())
            l2_loss *= self.arg.weight_decay_hyperparameter
            self.cost += l2_loss
        self.optimizer = optimize.Optimizer(arg,
                                            loss=self.cost,
                                            learning_rate=self.learning_rate)
        self.optimizer.accuracy(self.logits, self.targets, mask=self.loss_mask)
        self.train_op = self.optimizer.train_op
        self.predict = self.optimizer.predict
        self.correct_prediction = self.optimizer.correct_prediction
        self.accuracy = self.optimizer.accuracy
        self.optimizer.sequential_accuracy(self.logits,
                                           self.targets,
                                           mask=self.loss_mask)
        self.sequential_accuracy = self.optimizer.sequential_accuracy
        self.fetches = [encoder_input, encoder_output, self.logits]
Exemplo n.º 9
0
    def __init__(self, arg, name=None):
        '''
    an RNN-model
    '''
        batch_size = 32
        sequence_size = 10
        if __name__ != '__main__':
            batch_size = sequence_size = None
        if name:
            self.name = name
        else:
            self.name = 'RNN'
        self.arg = arg
        self.inputs = tf.placeholder(tf.int32,
                                     shape=[batch_size, sequence_size],
                                     name='inputs')
        if self.arg.classification:
            self.targets = tf.placeholder(tf.int32,
                                          shape=[batch_size],
                                          name='targets')
            self.loss_mask = tf.placeholder(tf.float32,
                                            shape=[batch_size],
                                            name='loss_mask')
        else:
            self.targets = tf.placeholder(tf.int32,
                                          shape=[batch_size, sequence_size],
                                          name='targets')
            self.loss_mask = tf.placeholder(
                tf.float32,
                shape=[batch_size,
                       sequence_size],  # (batch_size, sequence_size)
                name='loss_mask')
        self.keep_prob = tf.placeholder(tf.float32)
        self.training = tf.placeholder(tf.bool)
        self.learning_rate = tf.placeholder(tf.float32)
        self.batch_size = tf.shape(self.inputs)[0]
        self.sequence_size = tf.shape(self.inputs)[1]
        with tf.variable_scope('embedding'):
            embedded_input = self.embedding()
        with tf.variable_scope('RNN'):
            if self.arg.cell == 'lstm':
                cell = functools.partial(tf.nn.rnn_cell.LSTMCell,
                                         num_units=self.arg.hidden_dim)
                #dtype = tf.float32)
            elif self.arg.cell == 'gru':
                cell = functools.partial(tf.nn.rnn_cell.GRUCell,
                                         num_units=self.arg.hidden_dim)
                #dtype = tf.float32)
            cells = []
            for layer in range(1, self.arg.layers + 1):
                with tf.variable_scope('layer_{}'.format(self.arg.layers)):
                    cells.append(cell(name='cell_{}'.format(layer)))
                    if layer == 1:
                        rnn_output, rnn_state = tf.nn.dynamic_rnn(
                            cells[-1], embedded_input, dtype=tf.float32)
                    else:
                        if self.arg.unidirectional:
                            rnn_output, rnn_state = tf.nn.dynamic_rnn(
                                cells[-1], rnn_output, dtype=tf.float32)
                        else:
                            rnn_output, rnn_state = tf.nn.dynamic_rnn(
                                cells[-1], rnn_output, initial_state=rnn_state)
        with tf.variable_scope('output'):
            if self.arg.classification:
                if self.arg.cell == 'lstm':
                    rnn_state = rnn_state[-1]
                elif self.arg.cell == 'gru':
                    rnn_state = rnn_state
                self.logits = self.output(rnn_state)
            else:
                self.logits = self.output(rnn_output)

        with tf.variable_scope('loss'):
            self.loss_cl = loss.Loss(self.logits,
                                     self.targets,
                                     self.arg.loss,
                                     vocab_size=self.arg.target_vocab_size,
                                     label_smoothing=self.arg.label_smoothing)
            cost = self.loss_cl.loss
            if self.arg.mask_loss:
                self.cost = tf.reduce_mean(cost * self.loss_mask)
            else:
                self.cost = tf.reduce_mean(cost)
            if self.arg.weight_decay_regularization:
                l2_loss = self.loss_cl.l2_loss(tf.trainable_variables())
                l2_loss *= self.arg.weight_decay_hyperparameter
                self.cost += l2_loss
            self.optimizer = optimize.Optimizer(
                arg, loss=self.cost, learning_rate=self.learning_rate)
            self.optimizer.accuracy(self.logits,
                                    self.targets,
                                    mask=self.loss_mask)
            self.train_op = self.optimizer.train_op
            self.predict = self.optimizer.predict
            self.correct_prediction = self.optimizer.correct_prediction
            self.accuracy = self.optimizer.accuracy
Exemplo n.º 10
0
import optimize as op

if __name__ == "__main__":

	import argparse
	opt = op.Optimizer()


	# the number has to be at least 3, that the goal object is consists of 3 cubes
	opt.get_layout(4)
	
	
Exemplo n.º 11
0
#!/usr/bin/env python
import sys, pair_trading, optimize

if __name__ == '__main__':
    f = open(sys.argv[1], 'r')
    data = [line.split(',') for line in f]
    data = filter(lambda x: x[2].strip() == 'True', data)
    results = []
    print len(data)
    for d in data:
        o = optimize.Optimizer(pair_trading, d[0].split('/'), 3, 10)
        t = o.run()
        if len(t) > 0 and len(d) > 0: results.append((t[-1], d[0]))
    print sorted(results)
    out = open('batch_out.txt', 'w')
    results = sorted(results)
    for r in results:
        out.write('%r, %r\n' % r)
    out.close()
Exemplo n.º 12
0
    def __init__(self, arg, name=None):
        '''
    Transformer-XL, introduced in arXiv:1901.02860
    code was based off https://github.com/kimiyoung/transformer-xl/blob/master/tf/model.py, but adjusted for simplicity
    '''
        if name:
            self.name = name
        else:
            self.name = 'Transformer-XL'
        batch_size = 128
        input_sequence_size = 10
        memory_sequence_size = 15
        if __name__ != '__main__':
            batch_size = input_sequence_size = memory_sequence_size = None
        self.arg = arg
        self.inputs = tf.placeholder(
            tf.int32,
            shape=[batch_size,
                   input_sequence_size],  # (batch_size, input_sequence_size)
            name='inputs')
        if self.arg.classification:
            self.targets = tf.placeholder(
                tf.int32,
                shape=[batch_size],  # (batch_size, output_sequence_size)
                name='targets')
        else:
            self.targets = tf.placeholder(
                tf.int32,
                shape=[batch_size, input_sequence_size
                       ],  # (batch_size, input_sequence_size)
                name='targets')
        self.memory = tf.placeholder(tf.float32,
                                     shape=[
                                         self.arg.encoder_layers, batch_size,
                                         memory_sequence_size,
                                         self.arg.hidden_size
                                     ],
                                     name='memory')
        self.training = tf.placeholder(tf.bool)
        self.keep_prob = tf.placeholder(tf.float32)
        self.learning_rate = tf.placeholder(tf.float32)
        self.batch_size = tf.shape(self.inputs)[0]
        self.input_sequence_size = tf.shape(self.inputs)[1]
        self.memory_sequence_size = tf.shape(self.memory)[2]

        self.encoder_self_attention_bias = develop_bias._create_mask(
            self.input_sequence_size, self.arg.unidirectional_encoder)
        self.encoder_self_attention_bias = tf.concat([
            tf.zeros([
                1, 1, self.input_sequence_size, self.memory_sequence_size
            ]), self.encoder_self_attention_bias
        ],
                                                     axis=-1)

        if self.arg.mask_loss:
            if self.arg.classification:
                self.loss_mask = tf.placeholder(tf.float32,
                                                shape=[batch_size],
                                                name='loss_mask')
            else:
                self.loss_mask = tf.placeholder(
                    tf.float32,
                    shape=[batch_size, input_sequence_size
                           ],  # (batch_size, input_sequence_size)
                    name='loss_mask')
        else:
            self.loss_mask = None

        self.new_mems = []

        if self.arg.ffd == 'transformer_ffd':
            self.ffd = self.transformer_ffd
        elif self.arg.ffd == 'sru':
            from SRU import SRU
            self.ffd = SRU
        elif self.arg.ffd == 'sepconv':
            self.ffd = self.sepconv

        if 'stop' in self.arg.pos:
            embedding_size = self.arg.hidden_size - 1
        else:
            embedding_size = self.arg.hidden_size
        with tf.variable_scope('encoder_embedding'):
            encoder_input, enc_params = utils.embedding(
                self.inputs,
                model_dim=embedding_size,
                vocab_size=self.arg.input_vocab_size,
                name='encode')
            params = enc_params

        with tf.variable_scope('positional_encoding'):
            with tf.variable_scope('encoder'):
                encoder_input = self.timing_position(encoder_input)

        with tf.variable_scope('encoder'):
            encoder_input = self.dropout_fn(encoder_input)
            encoder_output = self.encoder(
                encoder_input,
                encoder_self_attention_bias=self.encoder_self_attention_bias)
            self.new_mems = tf.stack(self.new_mems)

        if self.arg.classification:
            encoder_output = encoder_output[:, -1]
        with tf.variable_scope('output'):
            if self.arg.use_mos:
                self.logits = mos.MoS(encoder_output,
                                      hidden_size=self.arg.hidden_size,
                                      vocab_size=self.arg.target_vocab_size)
                self.logits = tf.nn.softmax(self.logits)
                if self.arg.loss == 'sparse_softmax_cross_entropy_with_logits':
                    self.arg.loss = 'log_loss'
                self.loss_cl = loss.Loss(
                    self.logits,
                    self.targets,
                    self.arg.loss,
                    vocab_size=self.arg.target_vocab_size,
                    activation=tf.identity,
                    label_smoothing=self.arg.label_smoothing)
                cost = tf.reduce_sum(cost, axis=-1)
            else:
                weights = tf.get_variable(
                    'weights',
                    shape=[self.arg.hidden_size, self.arg.target_vocab_size],
                    dtype=tf.float32)
                bias = tf.get_variable('bias',
                                       shape=[self.arg.target_vocab_size],
                                       dtype=tf.float32)
                self.logits = tf.tensordot(encoder_output, weights,
                                           axes=1) + bias
                self.loss_cl = loss.Loss(
                    self.logits,
                    self.targets,
                    self.arg.loss,
                    vocab_size=self.arg.target_vocab_size,
                    label_smoothing=self.arg.label_smoothing)
                cost = self.loss_cl.loss
        if self.arg.mask_loss:
            self.cost = tf.reduce_mean(cost * self.loss_mask)
        else:
            self.cost = tf.reduce_mean(cost)
        if self.arg.weight_decay_regularization:
            l2_loss = self.loss_cl.l2_loss(tf.trainable_variables())
            l2_loss *= self.arg.weight_decay_hyperparameter
            self.cost += l2_loss
        self.optimizer = optimize.Optimizer(arg,
                                            loss=self.cost,
                                            learning_rate=self.learning_rate)
        self.optimizer.accuracy(self.logits, self.targets, mask=self.loss_mask)
        self.train_op = self.optimizer.train_op
        self.predict = self.optimizer.predict
        self.correct_prediction = self.optimizer.correct_prediction
        self.accuracy = self.optimizer.accuracy
        self.optimizer.sequential_accuracy(self.logits,
                                           self.targets,
                                           mask=self.loss_mask)
        self.sequential_accuracy = self.optimizer.sequential_accuracy
        self.fetches = [encoder_input, encoder_output, self.logits]
Exemplo n.º 13
0
    def __init__(self, arg, name=None):
        '''
    a Seq2Seq model based on the model described in arXiv:1804.00946
    the stop-feature mechanism, in particular, was taken from these mechanisms
    '''
        if name:
            self.name = name
        else:
            self.name = 'Seq2Seq'
        batch_size = 32
        input_sequence_size = 10
        output_sequence_size = 12
        if __name__ != '__main__':
            batch_size = input_sequence_size = output_sequence_size = None
        self.arg = arg
        self.inputs = tf.placeholder(tf.int32,
                                     shape=[batch_size, input_sequence_size],
                                     name='inputs')
        self.targets = tf.placeholder(tf.int32,
                                      shape=[batch_size, output_sequence_size],
                                      name='targets')
        self.training = tf.placeholder(tf.bool, name='training')
        self.learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        self.input_stop_feature = tf.placeholder(
            tf.float32,
            shape=[batch_size, input_sequence_size, 1],
            name='input_stop_feature')
        self.target_stop_feature = tf.placeholder(
            tf.float32,
            shape=[batch_size, output_sequence_size, 1],
            name='target_stop_feature')
        self.batch_size = tf.shape(self.inputs)[0]
        self.input_sequence_size = tf.shape(self.inputs)[1]
        self.target_sequence_size = tf.shape(self.targets)[1]

        if self.arg.mask_loss:
            self.loss_mask = tf.placeholder(
                tf.float32,
                shape=[batch_size, output_sequence_size
                       ],  # (batch_size, output_sequence_size)
                name='loss_mask')
        else:
            self.loss_mask = None

        with tf.variable_scope('embedding'):
            embedded_inputs, embedded_targets = self.embedding()
            embedded_inputs = tf.concat(
                [embedded_inputs, self.input_stop_feature], axis=2)
            embedded_targets = tf.concat(
                [embedded_targets, self.target_stop_feature], axis=2)
        with tf.variable_scope('encode'):
            encoder_output, encoder_state = self.encode(embedded_inputs)
            encoder_output = self.dropout_fn(encoder_output)
        with tf.variable_scope('decode'):
            decoder_output, _ = self.decode(encoder_output, encoder_state,
                                            embedded_targets)
            decoder_output = self.dropout_fn(decoder_output)
        with tf.variable_scope('output'):
            self.logits = utils.dense(decoder_output,
                                      output_dim=self.arg.target_vocab_size,
                                      name='logits')
        with tf.variable_scope('loss'):
            self.loss_cl = loss.Loss(self.logits,
                                     self.targets,
                                     self.arg.loss,
                                     vocab_size=self.arg.target_vocab_size,
                                     label_smoothing=self.arg.label_smoothing)
            cost = self.loss_cl.loss
            if self.arg.mask_loss:
                self.cost = tf.reduce_mean(cost * self.loss_mask)
            else:
                self.cost = tf.reduce_mean(cost)
            if self.arg.weight_decay_regularization:
                l2_loss = self.loss_cl.l2_loss(tf.trainable_variables())
                l2_loss *= self.arg.weight_decay_hyperparameter
                self.cost += l2_loss
            self.optimizer = optimize.Optimizer(
                arg, loss=self.cost, learning_rate=self.learning_rate)
            self.optimizer.accuracy(self.logits,
                                    self.targets,
                                    mask=self.loss_mask)
            self.train_op = self.optimizer.train_op
            self.predict = self.optimizer.predict
            self.correct_prediction = self.optimizer.correct_prediction
            self.accuracy = self.optimizer.accuracy
            self.optimizer.sequential_accuracy(self.logits,
                                               self.targets,
                                               mask=self.loss_mask)
            self.sequential_accuracy = self.optimizer.sequential_accuracy
            self.fetches = [embedded_inputs, encoder_output, self.logits]