示例#1
0
def load_tf_model(path, embedding_dim=128, graph_size=20, n_encode_layers=2):
    """Load model weights from hd5 file
    """
    # https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model
    CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.}

    data_random = [
        tf.random.uniform((
            2,
            2,
        ),
                          minval=0,
                          maxval=1,
                          dtype=tf.dtypes.float32),
        tf.random.uniform((2, graph_size, 2),
                          minval=0,
                          maxval=1,
                          dtype=tf.dtypes.float32),
        tf.cast(
            tf.random.uniform(
                minval=1, maxval=10, shape=(2, graph_size), dtype=tf.int32),
            tf.float32) / tf.cast(CAPACITIES[graph_size], tf.float32)
    ]

    model_loaded = AttentionModel(embedding_dim,
                                  n_encode_layers=n_encode_layers)
    set_decode_type(model_loaded, "greedy")
    _, _ = model_loaded(data_random)

    model_loaded.load_weights(path)

    return model_loaded
示例#2
0
 def __init__(self, input_dim, second_hidden_size, minute_hidden_size, rnn_layers, batch_size, bidirectional, use_lstm):
     super(SafetyModel, self).__init__()
     self.batch_size = batch_size
     self.rnn_layers = rnn_layers
     self.use_lstm = use_lstm
     self.second_hidden_size = second_hidden_size
     self.minute_hidden_size = minute_hidden_size
     self.second_att_net = AttentionModel(input_dim, second_hidden_size, bidirectional, use_lstm)
     if bidirectional:
         second_hidden_size *= 2
     self.minute_att_net = AttentionModel(second_hidden_size, minute_hidden_size, bidirectional, use_lstm)
     if bidirectional:
         minute_hidden_size *= 2
     self.fc = nn.Linear(minute_hidden_size, 2)
     self.init_hidden()
示例#3
0
def copy_of_tf_model(model, embedding_dim=128, graph_size=20):
    """Copy model weights to new model
    """
    # https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0
    CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.}

    data_random = [
        tf.random.uniform((
            2,
            2,
        ),
                          minval=0,
                          maxval=1,
                          dtype=tf.dtypes.float32),
        tf.random.uniform((2, graph_size, 2),
                          minval=0,
                          maxval=1,
                          dtype=tf.dtypes.float32),
        tf.cast(
            tf.random.uniform(
                minval=1, maxval=10, shape=(2, graph_size), dtype=tf.int32),
            tf.float32) / tf.cast(CAPACITIES[graph_size], tf.float32)
    ]

    new_model = AttentionModel(embedding_dim)
    set_decode_type(new_model, "sampling")
    _, _ = new_model(data_random)

    for a, b in zip(new_model.variables, model.variables):
        a.assign(b)

    return new_model
示例#4
0
def main():
    args = parse_args()
    print(args)
    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    if args.enable_ce:
        fluid.default_main_program().random_seed = 102
        framework.default_startup_program().random_seed = 102

    train_program = fluid.Program()
    startup_program = fluid.Program()

    with fluid.program_guard(train_program, startup_program):
        # Training process

        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=dropout)
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=dropout)
        loss = model.build_graph()
        inference_program = train_program.clone(for_test=True)
        clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=max_grad_norm)
        lr = args.learning_rate
        opt_type = args.optimizer
        if opt_type == "sgd":
            optimizer = fluid.optimizer.SGD(lr, grad_clip=clip)
        elif opt_type == "adam":
            optimizer = fluid.optimizer.Adam(lr, grad_clip=clip)
        else:
            print("only support [sgd|adam]")
            raise Exception("opt type not support")

        optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(startup_program)

    device_count = len(fluid.cuda_places()) if args.use_gpu else len(
        fluid.cpu_places())

    CompiledProgram = fluid.CompiledProgram(train_program).with_data_parallel(
        loss_name=loss.name)

    train_data_prefix = args.train_data_prefix
    eval_data_prefix = args.eval_data_prefix
    test_data_prefix = args.test_data_prefix
    vocab_prefix = args.vocab_prefix
    src_lang = args.src_lang
    tar_lang = args.tar_lang
    print("begin to load data")
    raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                               train_data_prefix, eval_data_prefix,
                               test_data_prefix, args.max_len)
    print("finished load data")
    train_data, valid_data, test_data, _ = raw_data

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, tar_ids, tar_mask = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
        in_tar = tar_ids[:, :-1]
        label_tar = tar_ids[:, 1:]

        in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
        label_tar = label_tar.reshape(
            (label_tar.shape[0], label_tar.shape[1], 1))

        res['src'] = src_ids
        res['tar'] = in_tar
        res['label'] = label_tar
        res['src_sequence_length'] = src_mask
        res['tar_sequence_length'] = tar_mask

        return res, np.sum(tar_mask)

    # get train epoch size
    def eval(data, epoch_id=0):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, word_num = prepare_input(batch,
                                                      epoch_id,
                                                      with_lr=False)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

        ppl = np.exp(total_loss / word_count)

        return ppl

    def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            time_interval = 0.0
            batch_start_time = time.time()
            epoch_word_count = 0.0
            total_reader_cost = 0.0
            batch_read_start = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                total_reader_cost += time.time() - batch_read_start
                fetch_outs = exe.run(program=CompiledProgram,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)

                cost_train = np.mean(fetch_outs[0])
                # print(cost_train)
                total_loss += cost_train * batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)
                time_interval += batch_time
                epoch_word_count += word_num

                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f; reader cost: %0.5f s; ips: %0.5f tokens/sec"
                        % (epoch_id, batch_id, batch_time,
                           np.exp(total_loss / word_count), total_reader_cost /
                           100, word_count / time_interval))
                    ce_ppl.append(np.exp(total_loss / word_count))
                    total_loss = 0.0
                    word_count = 0.0
                    time_interval = 0.0
                    total_reader_cost = 0.0

                # profiler tools
                if args.profile and epoch_id == 0 and batch_id == 100:
                    profiler.reset_profiler()
                elif args.profile and epoch_id == 0 and batch_id == 105:
                    return
                batch_start_time = time.time()
                batch_read_start = time.time()

            end_time = time.time()
            epoch_time = end_time - start_time
            ce_time.append(epoch_time)
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step; ips: %0.5f tokens/sec\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times),
                   epoch_word_count / sum(batch_times)))

            if not args.profile:
                save_path = os.path.join(args.model_path,
                                         "epoch_" + str(epoch_id),
                                         "checkpoint")
                print("begin to save", save_path)
                fluid.save(train_program, save_path)
                print("save finished")
                dev_ppl = eval(valid_data)
                print("dev ppl", dev_ppl)
                test_ppl = eval(test_data)
                print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))

    with profile_context(args.profile, args.profiler_path):
        train()
示例#5
0
def train():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    if args.enable_ce:
        fluid.default_main_program().random_seed = 102
        framework.default_startup_program().random_seed = 102

    # Training process

    if args.attention:
        model = AttentionModel(hidden_size,
                               src_vocab_size,
                               tar_vocab_size,
                               batch_size,
                               num_layers=num_layers,
                               init_scale=init_scale,
                               dropout=dropout)
    else:
        model = BaseModel(hidden_size,
                          src_vocab_size,
                          tar_vocab_size,
                          batch_size,
                          num_layers=num_layers,
                          init_scale=init_scale,
                          dropout=dropout)

    loss = model.build_graph()
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
        clip_norm=max_grad_norm))

    lr = args.learning_rate
    opt_type = args.optimizer
    if opt_type == "sgd":
        optimizer = fluid.optimizer.SGD(lr)
    elif opt_type == "adam":
        optimizer = fluid.optimizer.Adam(lr)
    else:
        print("only support [sgd|adam]")
        raise Exception("opt type not support")

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    train_data_prefix = args.train_data_prefix
    eval_data_prefix = args.eval_data_prefix
    test_data_prefix = args.test_data_prefix
    vocab_prefix = args.vocab_prefix
    src_lang = args.src_lang
    tar_lang = args.tar_lang
    print("begin to load data")
    raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                               train_data_prefix, eval_data_prefix,
                               test_data_prefix, args.max_len)
    print("finished load data")
    train_data, valid_data, test_data, _ = raw_data

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, tar_ids, tar_mask = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1))
        in_tar = tar_ids[:, :-1]
        label_tar = tar_ids[:, 1:]

        in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1))
        label_tar = label_tar.reshape(
            (label_tar.shape[0], label_tar.shape[1], 1))

        res['src'] = src_ids
        res['tar'] = in_tar
        res['label'] = label_tar
        res['src_sequence_length'] = src_mask
        res['tar_sequence_length'] = tar_mask

        return res, np.sum(tar_mask)

    # get train epoch size
    def eval(data, epoch_id=0):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, word_num = prepare_input(batch,
                                                      epoch_id,
                                                      with_lr=False)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

        ppl = np.exp(total_loss / word_count)

        return ppl

    ce_time = []
    ce_ppl = []
    max_epoch = args.max_epoch
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)
        if args.enable_ce:
            train_data_iter = reader.get_data_iter(train_data,
                                                   batch_size,
                                                   enable_ce=True)
        else:
            train_data_iter = reader.get_data_iter(train_data, batch_size)

        total_loss = 0
        word_count = 0.0
        for batch_id, batch in enumerate(train_data_iter):

            input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

            if batch_id > 0 and batch_id % 100 == 0:
                print("ppl", batch_id, np.exp(total_loss / word_count))
                ce_ppl.append(np.exp(total_loss / word_count))
                total_loss = 0.0
                word_count = 0.0
        end_time = time.time()
        time_gap = end_time - start_time
        ce_time.append(time_gap)

        dir_name = args.model_path + "/epoch_" + str(epoch_id)
        print("begin to save", dir_name)
        fluid.io.save_params(exe, dir_name)
        print("save finished")
        dev_ppl = eval(valid_data)
        print("dev ppl", dev_ppl)
        test_ppl = eval(test_data)
        print("test ppl", test_ppl)

    if args.enable_ce:
        card_num = get_cards()
        _ppl = 0
        _time = 0
        try:
            _time = ce_time[-1]
            _ppl = ce_ppl[-1]
        except:
            print("ce info error")
        print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
        print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
示例#6
0
from attention_model import AttentionModel

if __name__ == '__main__':
    debug = 0
    if len(sys.argv) > 1:
        if sys.argv[1] == '--debug':
            debug = 1

    print('-> Starting Bot!')
    f = open("token.txt", "r")
    token = f.read().strip()

    if debug == 0:
        checkpoint_path = "models/char_att7/"
        global model
        model = AttentionModel(checkpoint_path=checkpoint_path,
                               load_model=True).model
    elif debug == 1:
        model = 'yolo'
    # with CustomObjectScope({'SeqSelfAttention': SeqSelfAttention,
    #                         'MultiHead': MultiHead,
    #                         'root_mean_squared_error': root_mean_squared_error}):
    #     model = keras.models.load_model('models/char_18_epoch_5.h5')
    # global graph
    # graph = tf.get_default_graph()
    graph = None

    pp = PicklePersistence(filename='data/conversationbot')
    updater = Updater(token, persistence=pp, use_context=True)
    bot = NeuralBot(updater, model, graph)

    updater.dispatcher.add_handler(CommandHandler('hello', bot.hello))
示例#7
0
    def train(self):
        self.max_acc = 1
        self.is_training = True
        with tf.Graph().as_default():
            data_processor = DataProcessor()

            vocab_size = data_processor.get_vocabulary_size(FLAGS.vocab_path)
            vocab, revocab = DataProcessor.initialize_vocabulary(
                FLAGS.vocab_path)
            data_processor.get_init(FLAGS.input_training_data_path,
                                    FLAGS.input_validation_data_path, vocab,
                                    vocab_size, FLAGS.max_length, revocab)
            models = AttentionModel()

            input_q = tf.placeholder(tf.int32,
                                     shape=(None, FLAGS.max_length),
                                     name="input_x1")  # FLAGS.train_batch_size
            input_ap = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            input_an = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            # input_k = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            # input_v = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            q_encode = models.embed(
                inputs=input_q,
                vocab_size=vocab_size + 1,
                num_units=hp.hidden_units)  # embedding size plus 1 for padding
            ap_encode = models.embed(inputs=input_ap,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)
            an_encode = models.embed(inputs=input_an,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)
            # k_encode = models.embed(input_k, vocab_size=vocab_size,num_units=hp.hidden_units)
            # v_encode = models.embed(input_v, vocab_size=vocab_size,num_units=hp.hidden_units)
            # apply dropout
            q_encode = tf.layers.dropout(q_encode,
                                         hp.dropout_rate,
                                         training=tf.convert_to_tensor(
                                             self.is_training))
            ap_encode = tf.layers.dropout(ap_encode,
                                          hp.dropout_rate,
                                          training=tf.convert_to_tensor(
                                              self.is_training))
            an_encode = tf.layers.dropout(an_encode,
                                          hp.dropout_rate,
                                          training=tf.convert_to_tensor(
                                              self.is_training))
            # k_encode = tf.layers.dropout(k_encode, hp.dropout_rate, training=tf.convert_to_tensor(self.is_training))
            # v_encode = tf.layers.dropout(v_encode, hp.dropout_rate, training=tf.convert_to_tensor(self.is_training))

            # multihead blocks
            for i in range(hp.num_blocks):
                with tf.variable_scope("num_blocks_{}".format(i)):
                    q_encode = models.multihead_attention(
                        query=q_encode,
                        key=q_encode,
                        value=q_encode,
                        num_heads=hp.num_heads,
                        is_training=tf.convert_to_tensor(self.is_training),
                        dropout_rate=hp.dropout_rate,
                        mask_future=False)
                    q_encode = models.feed_forward(
                        q_encode, units=[hp.hidden_units * 4, hp.hidden_units])
                    ap_encode = models.multihead_attention(
                        query=ap_encode,
                        key=ap_encode,
                        value=ap_encode,
                        num_heads=hp.num_heads,
                        is_training=tf.convert_to_tensor(self.is_training),
                        dropout_rate=hp.dropout_rate,
                        mask_future=False)
                    ap_encode = models.feed_forward(
                        ap_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])
                    an_encode = models.multihead_attention(
                        query=an_encode,
                        key=an_encode,
                        value=an_encode,
                        num_heads=hp.num_heads,
                        is_training=tf.convert_to_tensor(self.is_training),
                        dropout_rate=hp.dropout_rate,
                        mask_future=False)
                    an_encode = models.feed_forward(
                        an_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])

                ## output layer
            with tf.name_scope('output_layer'):
                dims = q_encode.get_shape().as_list()
                q_encode = tf.reshape(q_encode, [-1, dims[1] * dims[2]])
                ap_encode = tf.reshape(ap_encode, [-1, dims[1] * dims[2]])
                an_encode = tf.reshape(an_encode, [-1, dims[1] * dims[2]])
                weight = tf.get_variable(
                    'output_weight',
                    [q_encode.get_shape().as_list()[-1], hp.hidden_units])

                q_encode = tf.matmul(q_encode, weight)
                ap_encode = tf.matmul(ap_encode, weight)
                an_encode = tf.matmul(an_encode, weight)

            q_encode = models.vec_normalize(q_encode)
            ap_encode = models.vec_normalize(ap_encode)
            an_encode = models.vec_normalize(an_encode)

            ## calculate similarity and loss
            cos_12 = tf.reduce_sum(tf.multiply(q_encode, ap_encode),
                                   1)  # wisely multiple vectors
            cos_13 = tf.reduce_sum(tf.multiply(q_encode, an_encode), 1)

            zero = tf.constant(0,
                               shape=[FLAGS.train_batch_size],
                               dtype=tf.float32)
            margin = tf.constant(FLAGS.loss_margin,
                                 shape=[FLAGS.train_batch_size],
                                 dtype=tf.float32)

            losses = tf.maximum(
                zero, tf.subtract(margin, tf.subtract(cos_12, cos_13)))
            loss_sum = tf.reduce_sum(losses)
            loss_avg = tf.div(loss_sum, FLAGS.train_batch_size)
            correct = tf.equal(zero, losses)
            accuracy = tf.reduce_mean(tf.cast(correct, "float"),
                                      name="accuracy")

            global_step = tf.Variable(
                0, name="global_step", trainable=False
            )  # The global step will be automatically incremented by one every time you execute a train loop
            optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
            # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(loss_avg)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables())

            # session start point
            with tf.Session() as session:
                session.run(tf.local_variables_initializer())
                session.run(tf.global_variables_initializer())
                session.run(tf.tables_initializer())
                # meta_path = FLAGS.output_model_path + '/step6500_loss0.0_trainAcc1.0_evalAcc0.36.meta'
                # model_path = FLAGS.output_model_path + '/step6500_loss0.0_trainAcc1.0_evalAcc0.36'
                # saver = tf.train.import_meta_graph(meta_path)
                # print('graph imported')
                # saver.restore(session, model_path)
                # print('variables restored!')

                # Load pre-trained model
                ckpt = tf.train.get_checkpoint_state(
                    FLAGS.input_previous_model_path)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(session, ckpt.model_checkpoint_path)
                    print("Load Model From ", ckpt.model_checkpoint_path)
                else:
                    print("No model found")

                print("Begin to train model.")
                max_acc = 0
                for step in range(FLAGS.training_steps):
                    train_data_batch = data_processor.next_batch_train_random(
                        FLAGS.train_batch_size)
                    train_q_vec_b, train_q_vec_len_b, train_d_vec_b, train_d_vec_len_b, train_dneg_vec_b, train_dneg_vec_len_b = train_data_batch
                    feed_dict = {
                        input_q: train_q_vec_b,
                        input_ap: train_d_vec_b,
                        input_an: train_dneg_vec_b
                    }
                    _, loss_avg_, accuracy_, step_ = session.run(
                        [train_op, loss_avg, accuracy, global_step],
                        feed_dict=feed_dict)
                    print('=' * 10 + 'step{}, loss_avg = {}, acc={}'.format(
                        step_, loss_avg_, accuracy_))  # loss for all batches
                    if step_ % FLAGS.eval_every == 0:
                        print('\n============================> begin to test ')
                        eval_size = FLAGS.validation_size

                        def test_step(input_y1, input_y2, input_y3, label_list,
                                      sess):
                            feed_dict = {
                                input_q: input_y1,
                                input_ap: input_y2,
                                input_an: input_y3
                            }

                            correct_flag = 0
                            cos_12_ = sess.run(cos_12, feed_dict)
                            cos_max = max(cos_12_)
                            index_max = list(cos_12_).index(cos_max)
                            if label_list[index_max] == '1':
                                correct_flag = 1
                            # cos_pos_, cos_neg_, accuracy_ = sess.run([cos_12, cos_13, accuracy], feed_dict)
                            # data_processor.saveFeatures(cos_pos_, cos_neg_, test_loss_, accuracy_)
                            return correct_flag

                        def evaluate(eval_size):
                            correct_num = int(0)
                            for i in range(eval_size):
                                print('evaluation step %d ' % i)
                                batches = data_processor.loadValData_step(
                                    vocab, vocab_size,
                                    FLAGS.input_validation_data_path,
                                    FLAGS.max_length,
                                    eval_size)  # batch_size*seq_len
                                # 显示/保存测试数据
                                # save_test_data(batch_y1, batch_y2, label_list)
                                batch_y1, batch_y2, label_list = batches[i]
                                correct_flag = test_step(
                                    batch_y1, batch_y2, batch_y2, label_list,
                                    session)
                                correct_num += correct_flag
                            print('correct_num', correct_num)
                            acc = correct_num / float(eval_size)
                            return acc

                        self.is_training = False
                        acc_ = evaluate(eval_size=eval_size)
                        self.is_training = True
                        print(
                            '--------The test result among the test data sets: acc = {0}, test size = {1}----------'
                            .format(acc_, eval_size))
                        if acc_ >= max_acc:
                            max_acc = acc_
                            # # acc = test_for_bilstm.test()
                            path = saver.save(
                                session, FLAGS.output_model_path + '/step' +
                                str(step_) + '_loss' + str(loss_avg_) +
                                '_trainAcc' + str(accuracy_) + '_evalAcc' +
                                str(acc_))
                            saver.export_meta_graph(FLAGS.output_model_path +
                                                    '/meta_' + 'step' +
                                                    str(step_) + '_loss' +
                                                    str(loss_avg_) +
                                                    '_trainAcc' +
                                                    str(accuracy_) +
                                                    '_evalAcc' + str(acc_))
                            print("Save checkpoint(model) to {}".format(path))
示例#8
0
def main():
    args = parse_args()
    print(args)
    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        #args.enable_ce = True
        if args.enable_ce:
            fluid.default_startup_program().random_seed = 102
            fluid.default_main_program().random_seed = 102
            np.random.seed(102)
            random.seed(102)

        # Training process

        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=dropout)
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=dropout)
        gloabl_norm_clip = GradientClipByGlobalNorm(max_grad_norm)
        lr = args.learning_rate
        opt_type = args.optimizer
        if opt_type == "sgd":
            optimizer = fluid.optimizer.SGD(lr,
                                            parameter_list=model.parameters(),
                                            grad_clip=gloabl_norm_clip)
        elif opt_type == "adam":
            optimizer = fluid.optimizer.Adam(lr,
                                             parameter_list=model.parameters(),
                                             grad_clip=gloabl_norm_clip)
        else:
            print("only support [sgd|adam]")
            raise Exception("opt type not support")

        train_data_prefix = args.train_data_prefix
        eval_data_prefix = args.eval_data_prefix
        test_data_prefix = args.test_data_prefix
        vocab_prefix = args.vocab_prefix
        src_lang = args.src_lang
        tar_lang = args.tar_lang
        print("begin to load data")
        raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                                   train_data_prefix, eval_data_prefix,
                                   test_data_prefix, args.max_len)
        print("finished load data")
        train_data, valid_data, test_data, _ = raw_data

        def prepare_input(batch, epoch_id=0):
            src_ids, src_mask, tar_ids, tar_mask = batch
            res = {}
            src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
            in_tar = tar_ids[:, :-1]
            label_tar = tar_ids[:, 1:]

            in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
            label_tar = label_tar.reshape(
                (label_tar.shape[0], label_tar.shape[1], 1))
            inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask]
            return inputs, np.sum(tar_mask)

        # get train epoch size
        def eval(data, epoch_id=0):
            model.eval()
            eval_data_iter = reader.get_data_iter(data,
                                                  batch_size,
                                                  mode='eval')
            total_loss = 0.0
            word_count = 0.0
            for batch_id, batch in enumerate(eval_data_iter):
                input_data_feed, word_num = prepare_input(batch, epoch_id)
                loss = model(input_data_feed)

                total_loss += loss * batch_size
                word_count += word_num
            ppl = np.exp(total_loss.numpy() / word_count)
            model.train()
            return ppl

        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            epoch_start = time.time()

            model.train()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            total_reader_cost = 0.0
            interval_time_start = time.time()

            batch_start = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                batch_reader_end = time.time()
                total_reader_cost += batch_reader_end - batch_start

                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                loss = model(input_data_feed)
                loss.backward()
                optimizer.minimize(loss)
                model.clear_gradients()
                total_loss += loss * batch_size
                total_loss_value = total_loss.numpy()

                batch_times.append(time.time() - batch_start)
                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f words/sec"
                        % (epoch_id, batch_id,
                           np.exp(total_loss_value / word_count),
                           (time.time() - interval_time_start) / 100,
                           total_reader_cost / 100, word_count /
                           (time.time() - interval_time_start)))
                    ce_ppl.append(np.exp(total_loss_value / word_count))
                    total_loss = 0.0
                    word_count = 0.0
                    total_reader_cost = 0.0
                    interval_time_start = time.time()
                batch_start = time.time()

            train_epoch_cost = time.time() - epoch_start
            print(
                "\nTrain epoch:[%d]; epoch_cost: %.5f sec; avg_batch_cost: %.5f s/step\n"
                % (epoch_id, train_epoch_cost,
                   sum(batch_times) / len(batch_times)))
            ce_time.append(train_epoch_cost)

            dir_name = os.path.join(args.model_path, "epoch_" + str(epoch_id))
            print("begin to save", dir_name)
            paddle.fluid.save_dygraph(model.state_dict(), dir_name)
            print("save finished")
            dev_ppl = eval(valid_data)
            print("dev ppl", dev_ppl)
            test_ppl = eval(test_data)
            print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
示例#9
0
    set up model, loss criterion, optimizer
    '''
    # Instantiate the model

    Exp_model = TextEncoder(embedding_dim=1024,
                            hidden_size=256,
                            num_layers=1,
                            bidir=True,
                            dropout1=0.5)
    Query_model = TextEncoder(embedding_dim=1024,
                              hidden_size=256,
                              num_layers=1,
                              bidir=True,
                              dropout1=0.5)
    Attn_model = AttentionModel(para_encoder_input_dim=512,
                                query_dim=512,
                                output_dim=256)
    para_encoder_attn_model = AttentionModel(para_encoder_input_dim=512,
                                             query_dim=512,
                                             output_dim=512)

    para_encoder = ParaEncoder(input_dim=1024,
                               hidden_size=256,
                               num_layers=1,
                               attn_model=para_encoder_attn_model,
                               bidir=True,
                               dropout1=0.5)
    linearfc = LinearFC(num_classes=2,
                        encoded_embedding_dim=512,
                        context_dim=512,
                        dropout1=0.2)
示例#10
0
def train():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size
    # inference process

    print("src", src_vocab_size)

    # dropout type using upscale_in_train, dropout can be remove in inferecen
    # So we can set dropout to 0
    if args.attention:
        model = AttentionModel(hidden_size,
                               src_vocab_size,
                               tar_vocab_size,
                               batch_size,
                               num_layers=num_layers,
                               init_scale=init_scale,
                               dropout=0.0)
    else:
        model = BaseModel(hidden_size,
                          src_vocab_size,
                          tar_vocab_size,
                          batch_size,
                          num_layers=num_layers,
                          init_scale=init_scale,
                          dropout=0.0)

    beam_size = args.beam_size
    trans_res = model.build_graph(mode='beam_search', beam_size=beam_size)
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    source_vocab_file = args.vocab_prefix + "." + args.src_lang
    infer_file = args.infer_file

    infer_data = reader.raw_mono_data(source_vocab_file, infer_file)

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, tar_ids, tar_mask = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1))
        in_tar = tar_ids[:, :-1]
        label_tar = tar_ids[:, 1:]

        in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1))
        in_tar = np.zeros_like(in_tar, dtype='int64')
        label_tar = label_tar.reshape(
            (label_tar.shape[0], label_tar.shape[1], 1))
        label_tar = np.zeros_like(label_tar, dtype='int64')

        res['src'] = src_ids
        res['tar'] = in_tar
        res['label'] = label_tar
        res['src_sequence_length'] = src_mask
        res['tar_sequence_length'] = tar_mask

        return res, np.sum(tar_mask)

    dir_name = args.reload_model
    print("dir name", dir_name)
    fluid.io.load_params(exe, dir_name)

    train_data_iter = reader.get_data_iter(infer_data, 1, mode='eval')

    tar_id2vocab = []
    tar_vocab_file = args.vocab_prefix + "." + args.tar_lang
    with open(tar_vocab_file, "r") as f:
        for line in f.readlines():
            tar_id2vocab.append(line.strip())

    infer_output_file = args.infer_output_file

    out_file = open(infer_output_file, 'w')

    for batch_id, batch in enumerate(train_data_iter):
        input_data_feed, word_num = prepare_input(batch, epoch_id=0)

        fetch_outs = exe.run(feed=input_data_feed,
                             fetch_list=[trans_res.name],
                             use_program_cache=False)

        res = [tar_id2vocab[e] for e in fetch_outs[0].reshape(-1)]

        res = res[1:]

        new_res = []
        for ele in res:
            if ele == "</s>":
                break
            new_res.append(ele)

        out_file.write(' '.join(new_res))
        out_file.write('\n')

    out_file.close()
def main():
    h5_list = [
        h5py.File('data/processed0.h5', 'r'),
        h5py.File('data/processed1.h5', 'r'),
        h5py.File('data/processed3.h5', 'r'),
        h5py.File('data/processed4.h5', 'r')
    ]
    h5_list_test = [h5py.File('data/processed2.h5', 'r')]

    batch_size = 100

    bg_train = BatchGenerator(h5_list, batch_size)
    bg_test = BatchGenerator(h5_list_test, batch_size)  #, maxlen = 400000)

    # if we want to change batch size during training
    # dynamic_batch = True
    dynamic_batch = False

    n_epochs = 30
    opt = keras.optimizers.Adam()
    # opt = keras.optimizers.Adadelta()
    # opt = keras.optimizers.RMSprop(lr=0.001)

    # which iteration of models to load
    # next_it = 2
    #     # encoder_output, attention_weights = SelfAttention(size=50,
    #     #                                                   num_hops=16,
    #     #                                                   use_penalization=False)(x)

    checkpoint_path = "models/char_att7/"

    # Create checkpoint callback
    cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                  monitor='val_accuracy',
                                                  verbose=1,
                                                  save_weights_only=True,
                                                  mode='max',
                                                  save_best_only=True)

    load_model = False
    # load_model = True

    am = AttentionModel(
        checkpoint_path=checkpoint_path,
        rnn_size=512,
        rnn_style='GRU',  #'CuDNNLSTM',
        # bidirectional = True,
        dropout_rate=0.4,
        load_model=load_model)
    # am.model.save(checkpoint_path + 'model.h5')
    # am.build_model()
    am.model.compile(
        optimizer=opt,
        loss='categorical_crossentropy',
        metrics=['accuracy', perplexity, categorical_accuracy],
    )
    # print(am.model.summary())
    am.save_config()

    # am.model.fit(X_train,
    #              y_train,
    #              batch_size = batch_size,
    #              validation_data=(X_test, y_test),
    #              callbacks = [cp_callback],
    #              epochs=n_epochs)
    # fit using the batch generators
    am.model.fit_generator(
        bg_train,
        validation_data=bg_test,
        callbacks=[cp_callback],
        # use_multiprocessing=True,
        # workers=4,
        epochs=n_epochs)
示例#12
0
                                                    default_value=0)
 train_x, train_y = load_data(train_df)
 splitVal = int(len(train_y) * VAL_RATE)
 val_x, val_y = train_x[splitVal:], train_y[splitVal:]
 src_dataset = tf.contrib.data.Dataset.from_tensor_slices(
     (train_x, train_y))
 val_dataset = tf.contrib.data.Dataset.from_tensor_slices((val_x, val_y))
 src_iterator = train_iterator(src_dataset,
                               src_vocab_table,
                               BATCH_SIZE,
                               max_length=500)
 val_iterator = train_iterator(val_dataset,
                               src_vocab_table,
                               BATCH_SIZE,
                               max_length=500)
 attention_model = AttentionModel(src_iterator, val_iterator, 4, 200, 200,
                                  False)
 sess_config = tf.ConfigProto(log_device_placement=False,
                              allow_soft_placement=True)
 sess_config.gpu_options.allow_growth = True
 with tf.Session(config=sess_config) as sess:
     sess.run(tf.global_variables_initializer())
     sess.run(tf.tables_initializer())
     for i in range(MAX_EPOCHS):
         sess.run(src_iterator.initializer)
         sess.run(val_iterator.initializer)
         print("epoch:", i)
         step = 0
         while True:
             try:
                 attention_model.train(sess, step)
                 step += 1
示例#13
0
def infer():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size
    # inference process

    print("src", src_vocab_size)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        # dropout type using upscale_in_train, dropout can be remove in inferecen
        # So we can set dropout to 0
        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   beam_size=args.beam_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=0.0,
                                   mode='beam_search')
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              beam_size=args.beam_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=0.0,
                              mode='beam_search')

        source_vocab_file = args.vocab_prefix + "." + args.src_lang
        infer_file = args.infer_file

        infer_data = reader.raw_mono_data(source_vocab_file, infer_file)

        def prepare_input(batch, epoch_id=0):
            src_ids, src_mask, tar_ids, tar_mask = batch
            res = {}
            src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
            in_tar = tar_ids[:, :-1]
            label_tar = tar_ids[:, 1:]

            in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
            label_tar = label_tar.reshape(
                (label_tar.shape[0], label_tar.shape[1], 1))
            inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask]
            return inputs, np.sum(tar_mask)

        dir_name = args.reload_model
        print("dir name", dir_name)
        state_dict, _ = fluid.dygraph.load_dygraph(dir_name)
        model.set_dict(state_dict)
        model.eval()

        train_data_iter = reader.get_data_iter(infer_data,
                                               batch_size,
                                               mode='infer')

        tar_id2vocab = []
        tar_vocab_file = args.vocab_prefix + "." + args.tar_lang
        with io.open(tar_vocab_file, "r", encoding='utf-8') as f:
            for line in f.readlines():
                tar_id2vocab.append(line.strip())

        infer_output_file = args.infer_output_file
        infer_output_dir = infer_output_file.split('/')[0]
        if not os.path.exists(infer_output_dir):
            os.mkdir(infer_output_dir)

        with io.open(infer_output_file, 'w', encoding='utf-8') as out_file:

            for batch_id, batch in enumerate(train_data_iter):
                input_data_feed, word_num = prepare_input(batch, epoch_id=0)
                # import ipdb; ipdb.set_trace()
                outputs = model(input_data_feed)
                for i in range(outputs.shape[0]):
                    ins = outputs[i].numpy()
                    res = [tar_id2vocab[int(e)] for e in ins[:, 0].reshape(-1)]
                    new_res = []
                    for ele in res:
                        if ele == "</s>":
                            break
                        new_res.append(ele)

                    out_file.write(space_tok.join(new_res))
                    out_file.write(line_tok)
示例#14
0
def main():
    # load test data
    filename = "data/mixed.txt"
    raw_text = open(filename, 'r', encoding='utf-8').read()
    print('-> Raw text length:', len(raw_text))
    raw_text = raw_text.lower()[:700000]
    raw_text = re.sub('\n', " ", raw_text)

    # create mapping of unique chars to integers
    chars = sorted(list(set(raw_text)))
    # print('-> chars:', chars)
    char_to_int = dict((c, i) for i, c in enumerate(chars))
    int_to_char = dict((i, c) for i, c in enumerate(chars))
    print('-> int to char:', int_to_char)
    # print('-> char to int:', char_to_int)
    # print(char_to_int)
    # char_to_int = {' ': 0, '!': 1, '%': 2, '&': 3, "'": 4, ',': 5, '-': 6, '.': 7, '/': 8, '0': 9, '1': 10, '2': 11, '3': 12, '4': 13, '5': 14, '6': 15, '7': 16, '8': 17, '9': 18, ':': 19, ';': 20, '<': 21, '>': 22, '?': 23, 'a': 24, 'b': 25, 'c': 26, 'd': 27, 'e': 28, 'f': 29, 'g': 30, 'h': 31, 'i': 32, 'j': 33, 'k': 34, 'l': 35, 'm': 36, 'n': 37, 'o': 38, 'p': 39, 'q': 40, 'r': 41, 's': 42, 't': 43, 'u': 44, 'v': 45, 'w': 46, 'x': 47, 'y': 48, 'z': 49, '~': 50, '—': 51}
    char_list = list(char_to_int.keys())
    raw_text = ''.join([i for i in raw_text if i in char_list])
    print('-> char to int:', char_to_int)
    # summarize the loaded data
    n_chars = len(raw_text)
    n_vocab = len(char_list)
    print("-> Total Characters: ", n_chars)
    print("-> Total Vocab: ", n_vocab)
    # prepare the dataset of input to output pairs encoded as integers
    seq_length = 100
    sentences = []
    next_chars = []
    for i in range(0, n_chars - seq_length, 1):
        sentences.append(raw_text[i:i + seq_length])
        next_chars.append(raw_text[i + seq_length])

    n_patterns = len(sentences)
    print("Total Patterns: ", n_patterns)

    X = np.zeros((n_patterns, seq_length, n_vocab), dtype=np.bool)
    y = np.zeros((n_patterns, n_vocab), dtype=np.bool)

    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_to_int[char]] = 1
        y[i, char_to_int[next_chars[i]]] = 1

    print('- Input:', X[0, :, :].shape)
    print('- Output:', y[0].shape)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    batch_size = 100

    # if we want to change batch size during training
    # dynamic_batch = True
    dynamic_batch = False

    n_epochs = 40
    # opt = keras.optimizers.Adam()
    # opt = keras.optimizers.Adadelta()
    opt = keras.optimizers.RMSprop(lr=0.001)

    sen_len = seq_length
    emb_len = n_vocab

    # which iteration of models to load
    # next_it = 2
    #     # encoder_output, attention_weights = SelfAttention(size=50,
    #     #                                                   num_hops=16,
    #     #                                                   use_penalization=False)(x)

    checkpoint_path = "models/char_att4/"

    # Create checkpoint callback
    cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                  monitor='val_accuracy',
                                                  verbose=1,
                                                  save_weights_only=True,
                                                  mode='max',
                                                  save_best_only=True)

    load_model = False
    # load_model = True

    am = AttentionModel(checkpoint_path=checkpoint_path,
                        rnn_size=512,
                        rnn_style='CuDNNLSTM',
                        dropout_rate=0.3,
                        load_model=load_model)
    # am.build_model()
    am.model.compile(
        optimizer=opt,
        loss='categorical_crossentropy',
        metrics=['accuracy', perplexity, categorical_accuracy],
    )
    # print(am.model.summary())
    am.save_config()

    am.model.fit(X_train,
                 y_train,
                 batch_size=batch_size,
                 validation_data=(X_test, y_test),
                 callbacks=[cp_callback],
                 epochs=n_epochs)
示例#15
0
def main():
    start_time = time.time()
    random.seed(42)
    # os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', type=int, help='Preprocess or execute the data.', default=None)
    args = vars(parser.parse_args())  # Convert the arguments to a dict
    if args['mode'] == 1:
        train = pd.read_csv('../data/proppy_1.0.train.tsv', sep='\t', header=None)
        train_processed = Preprocessing.pipeline(train[train.columns[0]])
        train_processed_df = pd.DataFrame(columns=['text_stem', 'text_join', 'text', 'label'])
        train_processed_df['text_stem'], train_processed_df['text_join'] = train_processed
        train_processed_df['label'] = train[train.columns[len(train.columns) - 1]]
        train_processed_df['text'] = Preprocessing.pipeline_simple(train[train.columns[0]])
        # train_processed_df['embedding'] = train_embeddings
        train_processed_df.to_csv('../data/train_preprocessed.tsv', sep='\t', index=False,
                                  index_label=False)
        test = pd.read_csv('../data/proppy_1.0.test.tsv', sep='\t', header=None)
        test_processed = Preprocessing.pipeline(test[test.columns[0]])
        test_processed_df = pd.DataFrame(columns=['text_stem', 'text_join', 'text', 'label'])
        test_processed_df['text_stem'], test_processed_df['text_join'] = test_processed
        test_processed_df['label'] = test[test.columns[len(test.columns) - 1]]
        test_processed_df['text'] = Preprocessing.pipeline_simple(test[test.columns[0]])
        # train_processed_df['embedding'] = train_embeddings
        test_processed_df.to_csv('../data/test_preprocessed.tsv', sep='\t', index=False,
                                 index_label=False)
        dev = pd.read_csv('../data/proppy_1.0.dev.tsv', sep='\t', header=None)
        dev_processed = Preprocessing.pipeline(dev[dev.columns[0]])
        dev_processed_df = pd.DataFrame(columns=['text_stem', 'text_join', 'text', 'label'])
        dev_processed_df['text_stem'], dev_processed_df['text_join'] = dev_processed
        dev_processed_df['label'] = dev[dev.columns[len(dev.columns) - 1]]
        dev_processed_df['text'] = Preprocessing.pipeline_simple(dev[dev.columns[0]])
        # train_processed_df['embedding'] = train_embeddings
        dev_processed_df.to_csv('../data/dev_preprocessed.tsv', sep='\t', index=False,
                                index_label=False)
    elif args['mode'] == 2:
        # Creación del modelo con embeddings de fasttext o glove
        config = ModelConfig.AttentionConfig.value
        model = AttentionModel(batch_size=config['batch_size'], epochs=config['epochs'],
                               vocab_size=config['vocab_size'],
                               max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'],
                               optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                               max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                               embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                               pool_size=config['pool_size'], path_train=config['path_train'],
                               path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'],
                               buffer_size=config['buffer_size'], rate=config['rate'],
                               length_type=config['length_type'],
                               dense_units=config['dense_units'],
                               att_units=config['att_units']
                               )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=False)
        print('Previo a predict')
        model.predict_test_dev()
    elif args['mode'] == 3:
        # Creación del modelo con embeddings de fasttext o glove
        config = ModelConfig.TrainEmbeddings.value
        model = BiLSTMModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'],
                            max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'],
                            optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                            max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                            embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                            pool_size=config['pool_size'], path_train=config['path_train'],
                            path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'],
                            buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'],
                            dense_units=config['dense_units'], concat=config['concat']
                            )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        # model.fit_as_tensors(with_validation=False)
        print('Previo a predict')
        # model.predict_test_dev()
        # print('Se guarda historial del loss:')
        # model.save_plot_history()
    elif args['mode'] == 4:
        config = ModelConfig.SecondExperiment.value
        model = BiLSTMModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'],
                            max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'],
                            optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                            max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                            embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                            pool_size=config['pool_size'], path_train=config['path_train'],
                            path_test=config['path_test'], path_dev=None, emb_type=config['emb_type'],
                            buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'],
                            dense_units=config['dense_units'], concat=config['concat']
                            )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=False)
        print('Previo a predict')
        model.predict()
    elif args['mode'] == 5:
        config = ModelConfig.BertConfig.value
        model = BertModel(max_len=config['max_len'], path_train=config['path_train'], path_test=config['path_test'],
                          path_dev=config['path_dev'], epochs=config['epochs'], optimizer=config['optimizer'],
                          load_embeddings=False, batch_size=config['batch_size'],
                          max_sequence_len=config['max_sequence_len'],
                          rate=config['rate'], learning_rate=config['learning_rate'], length_type=config['length_type']
                          )
        print('Loading the data.')
        model.load_data()
        print('Creating the model.')
        model.call()
        print('Fitting the model.')
        # model.fit(with_validation=True)
        print('Predict the test set.')
        # model.predict()
    elif args['mode'] == 6:
        config = ModelConfig.TransformerConfig.value
        model = TransformerModel(batch_size=config['batch_size'], epochs=config['epochs'],
                                 vocab_size=config['vocab_size'],
                                 max_len=config['max_len'], filters=config['filters'],
                                 kernel_size=config['kernel_size'],
                                 optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                                 max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                                 embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                                 pool_size=config['pool_size'], path_train=config['path_train'],
                                 path_test=config['path_test'], path_dev=config['path_dev'],
                                 emb_type=config['emb_type'],
                                 buffer_size=config['buffer_size'], rate=config['rate'],
                                 length_type=config['length_type'], dense_units=config['dense_units'],
                                 attheads=config['attheads'], att_layers=config['att_layers']
                                 )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=True)
        print('Previo a predict')
        model.predict()
    elif args['mode'] == 7:
        # Creación del modelo con embeddings de fasttext o glove
        config = ModelConfig.MeanModelConfig.value
        model = LocalAttentionModel(batch_size=config['batch_size'], epochs=config['epochs'],
                                    vocab_size=config['vocab_size'],
                                    max_len=config['max_len'], filters=config['filters'],
                                    kernel_size=config['kernel_size'],
                                    optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                                    max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                                    embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                                    pool_size=config['pool_size'], path_train=config['path_train'],
                                    path_test=config['path_test'], path_dev=config['path_dev'],
                                    emb_type=config['emb_type'],
                                    buffer_size=config['buffer_size'], rate=config['rate'],
                                    length_type=config['length_type'],
                                    dense_units=config['dense_units']
                                    )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=False)
        print('Previo a predict')
        model.predict_test_dev()
        print('Se muestra la atención:')
        # model.plot_attention()
    elif args['mode'] == 8:
        config = ModelConfig.SecondExperiment.value
        model = AttentionModel(batch_size=config['batch_size'], epochs=config['epochs'],
                               vocab_size=config['vocab_size'],
                               max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'],
                               optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                               max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                               embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                               pool_size=config['pool_size'], path_train=config['path_train'],
                               path_test=config['path_test'], path_dev=None, emb_type=config['emb_type'],
                               buffer_size=config['buffer_size'], rate=config['rate'],
                               length_type=config['length_type'],
                               dense_units=config['dense_units']
                               )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=True)
        print('Previo a predict')
        model.predict()
    elif args['mode'] == 9:
        config = ModelConfig.AttentionConfig.value
        model = AttentionModel(batch_size=config['batch_size'], epochs=config['epochs'],
                               vocab_size=config['vocab_size'],
                               max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'],
                               optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                               max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                               embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                               pool_size=config['pool_size'], path_train=config['path_train'],
                               path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'],
                               buffer_size=config['buffer_size'], rate=config['rate'],
                               length_type=config['length_type'],
                               dense_units=config['dense_units'], both_embeddings=config['both_embeddings'],
                               att_units=config['att_units']
                               )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=False)
        print('Previo a predict')
        model.predict_test_dev()
    elif args['mode'] == 10:
        config = ModelConfig.BertConfigSecondExp.value
        model = BertModel(max_len=config['max_len'], path_train=config['path_train'], path_test=config['path_test'],
                          epochs=config['epochs'], optimizer=config['optimizer'],
                          load_embeddings=False, batch_size=config['batch_size'],
                          max_sequence_len=config['max_sequence_len'],
                          rate=config['rate'], learning_rate=config['learning_rate'], length_type=config['length_type']
                          )
        print('Loading the data.')
        model.load_data()
        print('Creating the model.')
        model.call()
        print('Fitting the model.')
        model.fit(with_validation=False)
        print('Predict the test set.')
        model.predict()
    elif args['mode'] == 11:
        config = ModelConfig.SecondExperiment.value
        model = LocalAttentionModel(batch_size=config['batch_size'], epochs=config['epochs'],
                                    vocab_size=config['vocab_size'],
                                    max_len=config['max_len'], filters=config['filters'],
                                    kernel_size=config['kernel_size'],
                                    optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                                    max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                                    embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'],
                                    pool_size=config['pool_size'], path_train=config['path_train'],
                                    path_test=config['path_test'], path_dev=None, emb_type=config['emb_type'],
                                    buffer_size=config['buffer_size'], rate=config['rate'],
                                    length_type=config['length_type'],
                                    dense_units=config['dense_units']
                                    )
        model.prepare_data_as_tensors()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit_as_tensors(with_validation=False)
        print('Previo a predict')
        model.predict()
    elif args['mode'] == 12:
        config = ModelConfig.MeanModelConfig.value
        model = LocalAttentionModelNela(batch_size=config['batch_size'], epochs=config['epochs'],
                                        vocab_size=config['vocab_size'],
                                        max_len=config['max_len'], filters=config['filters'],
                                        kernel_size=config['kernel_size'],
                                        optimizer=config['optimizer'], learning_rate=config['learning_rate'],
                                        max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'],
                                        embedding_size=config['embedding_size'],
                                        load_embeddings=config['load_embeddings'],
                                        pool_size=config['pool_size'], path_train=config['path_train'],
                                        path_test=config['path_test'], path_dev=config['path_dev'],
                                        emb_type=config['emb_type'],
                                        buffer_size=config['buffer_size'], rate=config['rate'],
                                        length_type=config['length_type'],
                                        dense_units=config['dense_units']
                                        )
        model.prepare_data()
        print('Building the model.')
        model.call()
        print('Previo a fit')
        model.fit(with_validation=False)
        print('Previo a predict')
        model.predict_test_dev()
    else:
        print('No other mode implemented yed.')

    elapsed_time = time.time() - start_time

    print('The execution took: ' + str(elapsed_time) + ' seconds.')
    print('End of execution.')
示例#16
0
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


if __name__ == '__main__':
    print('-> Loading att model')

    checkpoint_path = "models/char_att7/"
    model = AttentionModel(checkpoint_path=checkpoint_path,
                           load_model=True).model

    filename = "data/mixed.txt"
    raw_text = open(filename, 'r', encoding='utf-8').read()
    raw_text = raw_text.lower()
    raw_text = re.sub('\n', " ", raw_text)[:10000]

    # create mapping of unique chars to integers
    chars = sorted(list(set(raw_text)))
    # char_to_int = dict((c, i) for i, c in enumerate(chars))
    # int_to_char = dict((i, c) for i, c in enumerate(chars))
    int_to_char = {
        0: ' ',
        1: '!',
        2: '"',
        3: "'",
示例#17
0
    def eval(self):
        self.max_acc = 1
        self.is_training = False
        with tf.Graph().as_default():
            data_processor = DataProcessor()

            vocab_size = data_processor.get_vocabulary_size(FLAGS.vocab_path)
            vocab, revocab = DataProcessor.initialize_vocabulary(
                FLAGS.vocab_path)
            data_processor.get_init(FLAGS.input_training_data_path,
                                    FLAGS.input_validation_data_path, vocab,
                                    vocab_size, FLAGS.max_length, revocab)
            models = AttentionModel()

            input_q = tf.placeholder(tf.int32,
                                     shape=(None, FLAGS.max_length),
                                     name="input_x1")  # FLAGS.train_batch_size
            input_ap = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            input_an = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            q_encode = models.embed(
                inputs=input_q,
                vocab_size=vocab_size + 1,
                num_units=hp.hidden_units)  # embedding size plus 1 for padding
            ap_encode = models.embed(inputs=input_ap,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)
            an_encode = models.embed(inputs=input_an,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)

            # multihead blocks
            for i in range(hp.num_blocks):
                with tf.variable_scope("num_blocks_{}".format(i)):
                    q_encode = models.multihead_attention(
                        query=q_encode,
                        key=q_encode,
                        value=q_encode,
                        num_heads=hp.num_heads,
                        mask_future=False)
                    q_encode = models.feed_forward(
                        q_encode, units=[hp.hidden_units * 4, hp.hidden_units])
                    ap_encode = models.multihead_attention(
                        query=ap_encode,
                        key=ap_encode,
                        value=ap_encode,
                        num_heads=hp.num_heads,
                        mask_future=False)
                    ap_encode = models.feed_forward(
                        ap_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])
                    an_encode = models.multihead_attention(
                        query=an_encode,
                        key=an_encode,
                        value=an_encode,
                        num_heads=hp.num_heads,
                        mask_future=False)
                    an_encode = models.feed_forward(
                        an_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])

                ## output layer
            with tf.name_scope('output_layer'):
                dims = q_encode.get_shape().as_list()
                q_encode = tf.reshape(q_encode, [-1, dims[1] * dims[2]])
                ap_encode = tf.reshape(ap_encode, [-1, dims[1] * dims[2]])
                an_encode = tf.reshape(an_encode, [-1, dims[1] * dims[2]])
                weight = tf.get_variable(
                    'output_weight',
                    [q_encode.get_shape().as_list()[-1], hp.hidden_units])

                q_encode = tf.matmul(q_encode, weight)
                ap_encode = tf.matmul(ap_encode, weight)
                an_encode = tf.matmul(an_encode, weight)

            q_encode = models.vec_normalize(q_encode)
            ap_encode = models.vec_normalize(ap_encode)
            an_encode = models.vec_normalize(an_encode)

            ## calculate similarity and loss
            cos_12 = tf.reduce_sum(tf.multiply(q_encode, ap_encode),
                                   1)  # wisely multiple vectors
            cos_13 = tf.reduce_sum(tf.multiply(q_encode, an_encode), 1)

            zero = tf.constant(0,
                               shape=[FLAGS.train_batch_size],
                               dtype=tf.float32)
            margin = tf.constant(FLAGS.loss_margin,
                                 shape=[FLAGS.train_batch_size],
                                 dtype=tf.float32)

            losses = tf.maximum(
                zero, tf.subtract(margin, tf.subtract(cos_12, cos_13)))
            loss_sum = tf.reduce_sum(losses)
            loss_avg = tf.div(loss_sum, FLAGS.train_batch_size)
            correct = tf.equal(zero, losses)
            accuracy = tf.reduce_mean(tf.cast(correct, "float"),
                                      name="accuracy")

            global_step = tf.Variable(
                0, name="global_step", trainable=False
            )  # The global step will be automatically incremented by one every time you execute a train loop
            optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
            # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(loss_avg)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables())

            # session start point
            with tf.Session() as session:
                session.run(tf.local_variables_initializer())
                session.run(tf.global_variables_initializer())
                session.run(tf.tables_initializer())

                # Load pre-trained model
                ckpt = tf.train.get_checkpoint_state(
                    FLAGS.input_previous_model_path)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(session, ckpt.model_checkpoint_path)
                    print("Load Model From ", ckpt.model_checkpoint_path)
                else:
                    print("No model found and exit.")
                    exit()

                print(
                    '\n============================> begin to evaluate model. '
                )
                eval_size = FLAGS.evaluation_size

                def evaluate_all():
                    correct_num = int(0)
                    batches = data_processor.loadValData_step(
                        vocab,
                        vocab_size,
                        FLAGS.input_validation_data_path,
                        FLAGS.max_length,
                        eval_size=0)  # batch_size*seq_len
                    for i in range(eval_size):
                        # 显示/保存测试数据
                        # save_test_data(batch_y1, batch_y2, label_list)
                        batch_y1, batch_y2, label_list = batches[i]
                        correct_flag = test_step(batch_y1, batch_y2, batch_y2,
                                                 label_list, session)
                        correct_num += correct_flag
                        if (correct_flag == 1):
                            print('step %d ==== correct prediction' % i)
                        else:
                            print('step %d ==== wrong prediction' % i)
                    print('correct_num', correct_num)
                    acc = correct_num / float(eval_size)
                    return acc

                def test_step(input_y1, input_y2, input_y3, label_list, sess):
                    feed_dict = {
                        input_q: input_y1,
                        input_ap: input_y2,
                        input_an: input_y3
                    }

                    correct_flag = 0
                    cos_12_ = sess.run(cos_12, feed_dict)
                    cos_max = max(cos_12_)
                    index_max = list(cos_12_).index(cos_max)
                    if label_list[index_max] == '1':
                        correct_flag = 1
                    return correct_flag

                def evaluate(eval_size):
                    correct_num = int(0)
                    batches = data_processor.loadValData_step(
                        vocab, vocab_size, FLAGS.input_validation_data_path,
                        FLAGS.max_length, eval_size)  # batch_size*seq_len
                    for i in range(eval_size):
                        # 显示/保存测试数据
                        # save_test_data(batch_y1, batch_y2, label_list)
                        batch_y1, batch_y2, label_list = batches[i]
                        correct_flag = test_step(batch_y1, batch_y2, batch_y2,
                                                 label_list, session)
                        correct_num += correct_flag
                        if (correct_flag == 1):
                            print('step %d ==== correct prediction' % i)
                        else:
                            print('step %d ==== wrong prediction' % i)
                    print('correct_num', correct_num)
                    acc = correct_num / float(eval_size)
                    return acc

                acc_ = evaluate(eval_size=eval_size)
                print(
                    '--------The test result among the test data sets: acc = {0}, test size = {1}----------'
                    .format(acc_, eval_size))
                exit()
示例#18
0
        json.dump(vars(opts), f, indent=True)

    # Load data from load_path
    load_data = {}
    if opts.load_path is not None:
        print('  [*] Loading data from {}'.format(opts.load_path))
        load_data = torch.load(
            opts.load_path,
            map_location=lambda storage, loc: storage)  # Load on CPU

    # Initialize model
    model = maybe_cuda_model(
        AttentionModel(opts.embedding_dim,
                       opts.hidden_dim,
                       problem,
                       n_encode_layers=opts.n_encode_layers,
                       mask_inner=True,
                       mask_logits=True,
                       normalization=opts.normalization,
                       tanh_clipping=opts.tanh_clipping), opts.use_cuda)

    # Overwrite model parameters by parameters to load
    model.load_state_dict({**model.state_dict(), **load_data.get('model', {})})

    # Initialize baseline
    if opts.baseline == 'exponential':
        baseline = ExponentialBaseline(opts.exp_beta)
    elif opts.baseline == 'critic':
        baseline = CriticBaseline(
            maybe_cuda_model(
                CriticNetwork(problem.NODE_DIM, opts.embedding_dim,
                              opts.hidden_dim, opts.n_encode_layers,
        wordlist = Word2VecUtil.review_to_wordlist(review)
        train_x.append(' '.join(wordlist))
    return train_x


if __name__ == '__main__':
    test_df = pd.read_csv("data/testData.tsv", delimiter="\t", quoting=3)
    src_vocab_table = lookup_ops.index_table_from_file('data/vocab.txt',
                                                       default_value=0)
    test_x = load_data(test_df)
    test_dataset = tf.contrib.data.Dataset.from_tensor_slices(test_x)
    test_iterator = infer_iterator(test_dataset,
                                   src_vocab_table,
                                   BATCH_SIZE,
                                   max_length=500)
    attention_model = AttentionModel(None, test_iterator, 4, 200, 200, False)
    sess_config = tf.ConfigProto(log_device_placement=False,
                                 allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    with tf.Session(config=sess_config) as sess:
        attention_model.load_model(sess, "model/attention_model.ckpt")
        sess.run(tf.tables_initializer())
        sess.run(test_iterator.initializer)
        test_preds = []
        while True:
            try:
                batch_preds = attention_model.test_infer(sess)
                test_preds.append(batch_preds)
            except tf.errors.OutOfRangeError:
                test_preds = (np.concatenate(test_preds, axis=0))[:, 1]
                submission = pd.DataFrame({