Exemplo n.º 1
0
 def __init__(self, corpus_data, *, params):
     self.fast_text = FastText(corpus_data.model).to(GPU)
     self.discriminator = Discriminator(
         params.emb_dim,
         n_layers=params.d_n_layers,
         n_units=params.d_n_units,
         drop_prob=params.d_drop_prob,
         drop_prob_input=params.d_drop_prob_input,
         leaky=params.d_leaky,
         batch_norm=params.d_bn).to(GPU)
     self.ft_optimizer = optim.SGD(self.fast_text.parameters(),
                                   lr=params.ft_lr)
     self.d_optimizer = optim.SGD(self.discriminator.parameters(),
                                  lr=params.d_lr,
                                  weight_decay=params.d_wd)
     self.a_optimizer = optim.SGD([{
         "params": self.fast_text.u.parameters()
     }, {
         "params": self.fast_text.v.parameters()
     }],
                                  lr=params.a_lr)
     self.smooth = params.smooth
     self.loss_fn = nn.BCEWithLogitsLoss(reduction="elementwise_mean")
     self.corpus_data_queue = _data_queue(corpus_data,
                                          n_threads=params.n_threads,
                                          n_sentences=params.n_sentences,
                                          batch_size=params.ft_bs)
     self.vocab_size = params.vocab_size
     self.d_bs = params.d_bs
     self.split = params.split
     self.align_output = params.align_output
Exemplo n.º 2
0
 def fast_text_step(self):
     self.ft_optimizer.zero_grad()
     u_b, v_b = self.corpus_data_queue.__next__()
     s = self.fast_text(u_b, v_b)
     loss = FastText.loss_fn(s)
     loss.backward()
     self.ft_optimizer.step()
     return loss.item()
Exemplo n.º 3
0
 def fast_text_step(self):
     losses = []
     for id in [0, 1]:
         self.ft_optimizer[id].zero_grad()
         u_b, v_b = self.corpus_data_queue[id].__next__()
         s = self.fast_text[id](u_b, v_b)
         loss = FastText.loss_fn(s)
         loss.backward()
         self.ft_optimizer[id].step()
         losses.append(loss.item())
     return losses[0], losses[1]
Exemplo n.º 4
0
    def __init__(self, corpus_data_0, corpus_data_1, *, params, n_samples=10000000):
        self.fast_text = [FastText(corpus_data_0.model).to(GPU), FastText(corpus_data_1.model).to(GPU)]
        self.discriminator = Discriminator(params.emb_dim, n_layers=params.d_n_layers, n_units=params.d_n_units,
                                           drop_prob=params.d_drop_prob, drop_prob_input=params.d_drop_prob_input,
                                           leaky=params.d_leaky, batch_norm=params.d_bn).to(GPU)
        self.mapping = nn.Linear(params.emb_dim, params.emb_dim, bias=False)
        self.mapping.weight.data.copy_(torch.diag(torch.ones(params.emb_dim)))
        self.mapping = self.mapping.to(GPU)
        self.ft_optimizer, self.ft_scheduler = [], []
        for id in [0, 1]:
            optimizer, scheduler = optimizers.get_sgd_adapt(self.fast_text[id].parameters(),
                                                            lr=params.ft_lr, mode="max", factor=params.ft_lr_decay,
                                                            patience=params.ft_lr_patience)
            self.ft_optimizer.append(optimizer)
            self.ft_scheduler.append(scheduler)
        self.a_optimizer, self.a_scheduler = [], []
        for id in [0, 1]:
            optimizer, scheduler = optimizers.get_sgd_adapt(
                [{"params": self.fast_text[id].u.parameters()}, {"params": self.fast_text[id].v.parameters()}],
                lr=params.a_lr, mode="max", factor=params.a_lr_decay, patience=params.a_lr_patience)
            self.a_optimizer.append(optimizer)
            self.a_scheduler.append(scheduler)
        if params.d_optimizer == "SGD":
            self.d_optimizer, self.d_scheduler = optimizers.get_sgd_adapt(self.discriminator.parameters(),
                                                                          lr=params.d_lr, mode="max", wd=params.d_wd)

        elif params.d_optimizer == "RMSProp":
            self.d_optimizer, self.d_scheduler = optimizers.get_rmsprop_linear(self.discriminator.parameters(),
                                                                               params.n_steps,
                                                                               lr=params.d_lr, wd=params.d_wd)
        else:
            raise Exception(f"Optimizer {params.d_optimizer} not found.")
        if params.m_optimizer == "SGD":
            self.m_optimizer, self.m_scheduler = optimizers.get_sgd_adapt(self.mapping.parameters(),
                                                                          lr=params.m_lr, mode="max", wd=params.m_wd,
                                                                          factor=params.m_lr_decay,
                                                                          patience=params.m_lr_patience)
        elif params.m_optimizer == "RMSProp":
            self.m_optimizer, self.m_scheduler = optimizers.get_rmsprop_linear(self.mapping.parameters(),
                                                                               params.n_steps,
                                                                               lr=params.m_lr, wd=params.m_wd)
        else:
            raise Exception(f"Optimizer {params.m_optimizer} not found")
        self.m_beta = params.m_beta
        self.smooth = params.smooth
        self.wgan = params.wgan
        self.d_clip_mode = params.d_clip_mode
        if params.wgan:
            self.loss_fn = _wasserstein_distance
        else:
            self.loss_fn = nn.BCEWithLogitsLoss(reduction="elementwise_mean")
        self.corpus_data_queue = [
            _data_queue(corpus_data_0, n_threads=(params.n_threads + 1) // 2, n_sentences=params.n_sentences,
                        batch_size=params.ft_bs),
            _data_queue(corpus_data_1, n_threads=(params.n_threads + 1) // 2, n_sentences=params.n_sentences,
                        batch_size=params.ft_bs)
        ]
        self.sampler = [
            WordSampler(corpus_data_0.dic, n_urns=n_samples, alpha=params.a_sample_factor, top=params.a_sample_top),
            WordSampler(corpus_data_1.dic, n_urns=n_samples, alpha=params.a_sample_factor, top=params.a_sample_top)]
        self.d_bs = params.d_bs
        self.dic_0, self.dic_1 = corpus_data_0.dic, corpus_data_1.dic
        self.d_gp = params.d_gp
Exemplo n.º 5
0
def train(config):
    print('parameters: ')
    print(json.dumps(config, indent=4, ensure_ascii=False))

    # load data
    print('load data .....')
    X, y = data_helper.process_data(config)

    # make vocab
    print('make vocab .....')
    word_to_index, label_to_index = data_helper.generate_vocab(X, y, config)

    # padding data
    print('padding data .....')
    input_x, input_y = data_helper.padding(X, y, config, word_to_index, label_to_index)

    # split data
    print('split data .....')
    x_train, y_train, x_test, y_test, x_dev, y_dev = data_helper.split_data(input_x, input_y, config)

    print('length train: {}'.format(len(x_train)))
    print('length test: {}'.format(len(x_test)))
    print('length dev: {}'.format(len(x_dev)))
    print('training .....')
    with tf.Graph().as_default():
        sess_config = tf.ConfigProto(
            allow_soft_placement=config['allow_soft_placement'],
            log_device_placement=config['log_device_placement']
        )
        with tf.Session(config=sess_config) as sess:
            fast_text = FastText(config)

            # training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(config['learning_rate'])
            grads_and_vars = optimizer.compute_gradients(fast_text.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # keep track of gradient values and sparsity
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram('{}/grad/hist'.format(v.name), g)
                    sparsity_summary = tf.summary.scalar('{}/grad/sparsity'.format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # output dir for models and summaries
            timestamp = str(int(time.time()))
            outdir = os.path.abspath(os.path.join(os.path.curdir, 'runs', timestamp))
            print('writing to {}'.format(outdir))

            # summary for loss and accuracy
            loss_summary = tf.summary.scalar('loss', fast_text.loss)
            acc_summary = tf.summary.scalar('accuracy', fast_text.accuracy)

            # train summary
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(outdir, 'summaries', 'train')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # dev summary
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(outdir, 'summaries', 'dev')
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # checkpoint dirctory
            checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.bin')

            if not os.path.exists(checkpoint_dir):
                os.mkdir(checkpoint_dir)

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=config['num_checkpoints'])

            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                feed_dict = {
                    fast_text.input_x: x_batch,
                    fast_text.input_y: y_batch,
                }

                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, fast_text.loss, fast_text.accuracy],
                    feed_dict=feed_dict
                )

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                feed_dic = {
                    fast_text.input_x: x_batch,
                    fast_text.input_y: y_batch,
                    fast_text.dropout_keep_prob: 1.0
                }

                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, fast_text.loss, fast_text.accuracy],
                    feed_dict=feed_dic
                )

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # generate batches
            batches = data_helper.generate_batchs(x_train, y_train, config)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % config['evaluate_every'] == 0:
                    print('Evaluation:')
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)

                if current_step % config['checkpoint_every'] == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print('save model checkpoint to {}'.format(path))

            # test accuracy
            test_accuracy = sess.run([fast_text.accuracy], feed_dict={
                fast_text.input_x: x_test, fast_text.input_y: y_test, fast_text.dropout_keep_prob: 1.0})
            print('Test dataset accuracy: {}'.format(test_accuracy))
Exemplo n.º 6
0
    # Augmenting x_train and x_test with n-grams features
    x_train = add_ngram(x_train, token_indice, ngram_range)
    x_test = add_ngram(x_test, token_indice, ngram_range)
    print('Average train sequence length: {}'.format(
        np.mean(list(map(len, x_train)), dtype=int)))
    print('Average test sequence length: {}'.format(
        np.mean(list(map(len, x_test)), dtype=int)))

print('Pad sequences (samples x time)...')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = FastText(maxlen, max_features, embedding_dims)
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
early_stopping = EarlyStopping(monitor='val_accuracy', patience=3, mode='max')
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[early_stopping],
          validation_data=(x_test, y_test))

print('Test...')
result = model.predict(x_test)
Exemplo n.º 7
0
class Trainer:
    def __init__(self, corpus_data, *, params):
        self.fast_text = FastText(corpus_data.model).to(GPU)
        self.discriminator = Discriminator(
            params.emb_dim,
            n_layers=params.d_n_layers,
            n_units=params.d_n_units,
            drop_prob=params.d_drop_prob,
            drop_prob_input=params.d_drop_prob_input,
            leaky=params.d_leaky,
            batch_norm=params.d_bn).to(GPU)
        self.ft_optimizer = optim.SGD(self.fast_text.parameters(),
                                      lr=params.ft_lr)
        self.d_optimizer = optim.SGD(self.discriminator.parameters(),
                                     lr=params.d_lr,
                                     weight_decay=params.d_wd)
        self.a_optimizer = optim.SGD([{
            "params": self.fast_text.u.parameters()
        }, {
            "params": self.fast_text.v.parameters()
        }],
                                     lr=params.a_lr)
        self.smooth = params.smooth
        self.loss_fn = nn.BCEWithLogitsLoss(reduction="elementwise_mean")
        self.corpus_data_queue = _data_queue(corpus_data,
                                             n_threads=params.n_threads,
                                             n_sentences=params.n_sentences,
                                             batch_size=params.ft_bs)
        self.vocab_size = params.vocab_size
        self.d_bs = params.d_bs
        self.split = params.split
        self.align_output = params.align_output

    def fast_text_step(self):
        self.ft_optimizer.zero_grad()
        u_b, v_b = self.corpus_data_queue.__next__()
        s = self.fast_text(u_b, v_b)
        loss = FastText.loss_fn(s)
        loss.backward()
        self.ft_optimizer.step()
        return loss.item()

    def get_adv_batch(self, *, reverse, fix_embedding):
        vocab_split, bs_split = int(self.vocab_size * self.split), int(
            self.d_bs * self.split)
        x = (torch.randint(0, vocab_split, size=(bs_split, ),
                           dtype=torch.long).tolist() +
             torch.randint(vocab_split,
                           self.vocab_size,
                           size=(self.d_bs - bs_split, ),
                           dtype=torch.long).tolist())
        if self.align_output:
            x = torch.LongTensor(x).view(self.d_bs, 1).to(GPU)
            if fix_embedding:
                with torch.no_grad():
                    x = self.fast_text.v(x).view(self.d_bs, -1)
            else:
                x = self.fast_text.v(x).view(self.d_bs, -1)
        else:
            x = self.fast_text.model.get_bag(x, self.fast_text.u.weight.device)
            if fix_embedding:
                with torch.no_grad():
                    x = self.fast_text.u(x[0], x[1]).view(self.d_bs, -1)
            else:
                x = self.fast_text.u(x[0], x[1]).view(self.d_bs, -1)
        y = torch.FloatTensor(self.d_bs).to(GPU).uniform_(0.0, self.smooth)
        if reverse:
            y[:bs_split] = 1 - y[:bs_split]
        else:
            y[bs_split:] = 1 - y[bs_split:]
        return x, y

    def discriminator_step(self):
        self.d_optimizer.zero_grad()
        self.discriminator.train()
        with torch.no_grad():
            x, y = self.get_adv_batch(reverse=False, fix_embedding=True)
        y_hat = self.discriminator(x)
        loss = self.loss_fn(y_hat, y)
        loss.backward()
        self.d_optimizer.step()
        return loss.item()

    def adversarial_step(self):
        self.a_optimizer.zero_grad()
        self.discriminator.eval()
        x, y = self.get_adv_batch(reverse=True, fix_embedding=False)
        y_hat = self.discriminator(x)
        loss = self.loss_fn(y_hat, y)
        loss.backward()
        self.a_optimizer.step()
        return loss.item()