Пример #1
0
    def __init__(self, args, word_dict, char_dict):

        self.args = args
        self.word_dict = word_dict
        self.char_dict = char_dict
        self.network = DSSM(args, word_dict, char_dict)
        if args.cuda:
            self.network.cuda()
        self.optimizer = optim.Adamax(self.network.parameters(),
                                      weight_decay=0)
Пример #2
0
class CrossModal(nn.Module):
    def __init__(self,
                 vocab_size=250000,
                 embed_size=128,
                 hidden_size=512,
                 pretrain_path=None):
        super(CrossModal, self).__init__()
        # image
        resnet = models.resnet18(pretrained=True)
        modules = list(resnet.children())[:-1]
        self.resnet = nn.Sequential(*modules)
        self.resnet_linear = nn.Linear(resnet.fc.in_features, hidden_size)
        self.resnet_bn = nn.BatchNorm1d(hidden_size, momentum=0.01)

        # text
        self.dssm = DSSM(vocab_size=vocab_size)
        self.dssm.load_state_dict(torch.load(pretrain_path))
        self.dssm_linear = nn.Linear(embed_size, hidden_size)
        self.dssm_bn = nn.BatchNorm1d(hidden_size, momentum=0.01)

        # Function
        self.tanh = nn.Tanh()

    def forward(self, query, pos_img, neg_img):
        #with torch.no_grad():
        text_feature = self.dssm.predict(query)
        pos_img_feature = self.resnet(pos_img)
        neg_img_feature = self.resnet(neg_img)

        text_feature = self.tanh(self.dssm_bn(self.dssm_linear(text_feature)))

        pos_img_feature = pos_img_feature.reshape(pos_img_feature.size(0), -1)
        pos_img_feature = self.tanh(
            self.resnet_bn(self.resnet_linear(pos_img_feature)))

        neg_img_feature = neg_img_feature.reshape(neg_img_feature.size(0), -1)
        neg_img_feature = self.tanh(
            self.resnet_bn(self.resnet_linear(neg_img_feature)))

        left = torch.cosine_similarity(text_feature, pos_img_feature)
        right = torch.cosine_similarity(text_feature, neg_img_feature)
        return left, right

    def query_emb(self, query):
        text_feature = self.dssm.predict(query)
        text_feature = self.tanh(self.dssm_bn(self.dssm_linear(text_feature)))
        return text_feature

    def img_emb(self, pos_img):
        pos_img_feature = self.resnet(pos_img)
        pos_img_feature = pos_img_feature.reshape(pos_img_feature.size(0), -1)
        pos_img_feature = self.tanh(
            self.resnet_bn(self.resnet_linear(pos_img_feature)))
        return pos_img_feature
Пример #3
0
def predict(data_params):
    meta_path = "./model/dssm.ckpt.meta"
    ckpt_path = "./model/dssm.ckpt"
    data_file = "./data/train.txt.10"
    dssm = DSSM()
    data_iterator = DataIterator(data_params)
    iterator = data_iterator.input_fn(data_file)
    # config
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(meta_path)
        saver.restore(sess, ckpt_path)
        sess.run(tf.global_variables_initializer())
        sess.run(iterator.initializer)
        s = time.time()
        while True:
            try:
                (query_features, creative_ids, labels) = iterator.get_next()
                (batch_query, batch_creative_ids, batch_labels) = sess.run(
                    [query_features, creative_ids, labels])
                prediction = sess.run(dssm.score,
                                      feed_dict={
                                          dssm.query: batch_query,
                                          dssm.doc: batch_creative_ids
                                      })
                print(prediction)
            except tf.errors.OutOfRangeError:
                break
        e = time.time()
        # 平均每条 0.0001s
        print(e - s)
Пример #4
0
def train():
    dssm = DSSM()
    with tf.Session() as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        iterator = data_iterator.input_fn(data_file)

        sess.run(iterator.initializer)
        while True:
            try:
                (query_features, creative_ids, labels) = iterator.get_next()
                (batch_query, batch_creative_ids, batch_labels) = sess.run(
                    [query_features, creative_ids, labels])
                # print(sess.run([query_features, creative_ids, labels]))
                # print('loss:', sess.run(dssm.loss, feed_dict={dssm.query : batch_query, dssm.doc : batch_creative_ids, dssm.label : batch_labels}))
                sess.run(dssm.train_step,
                         feed_dict={
                             dssm.query: batch_query,
                             dssm.doc: batch_creative_ids,
                             dssm.label: batch_labels
                         })
                print(
                    'score:',
                    sess.run(dssm.score,
                             feed_dict={
                                 dssm.query: batch_query,
                                 dssm.doc: batch_creative_ids
                             }))
            except tf.errors.OutOfRangeError:
                break
        saver.save(sess, model_path)
Пример #5
0
def train():
    dssm = DSSM()
    with tf.Session() as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        for i in range(FLAGS.epoch):
            train_raw_data = data_utils.load_all_dataset("0107")
            if train_raw_data is None:
                continue
            epoch_steps = int(len(train_raw_data) / FLAGS.batch_size)
            for step in range(epoch_steps):
                query_batch, doc_batch, label_batch = get_batch_data(
                    step, FLAGS.batch_size, train_raw_data)
                # print(query_batch)
                #print('label:', label_batch)

                print(
                    'loss:',
                    sess.run(dssm.loss,
                             feed_dict={
                                 dssm.query: query_batch,
                                 dssm.doc: doc_batch,
                                 dssm.label: label_batch
                             }))
                # print('score:', sess.run(dssm.score, feed_dict={dssm.query : query_batch, dssm.doc : doc_batch}))
                sess.run(dssm.train_step,
                         feed_dict={
                             dssm.query: query_batch,
                             dssm.doc: doc_batch,
                             dssm.label: label_batch
                         })
        saver.save(sess, model_path)
Пример #6
0
def debug():
    query, doc, Y = fake_train_data()
    dssm = DSSM()
    with tf.Session() as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        for i in range(len(Y)):
            q = query[i:i + 1]
            d = doc[i:i + 1]
            label = Y[i:i + 1]
            print('query:', sess.run(dssm.query, feed_dict={dssm.query: q}))
            print('doc:', sess.run(dssm.doc, feed_dict={dssm.doc: d}))
            print('label:', sess.run(dssm.label, feed_dict={dssm.label:
                                                            label}))

            # embedding table
            print('embedding:', sess.run(dssm.embedding))

            # debug query
            print('query_embedding:',
                  sess.run(dssm.query_embeddings, feed_dict={dssm.query: q}))
            print('query_flatten:',
                  sess.run(dssm.query_flatten, feed_dict={dssm.query: q}))

            # debug doc
            print('doc_embedding:', sess.run(dssm.doc, feed_dict={dssm.doc:
                                                                  d}))
            print('doc_flatten:',
                  sess.run(dssm.doc_flatten, feed_dict={dssm.doc: d}))

            # debug dense layer
            print('query_layer_1_out:',
                  sess.run(dssm.query_layer_1_out, feed_dict={dssm.query: q}))
            print('doc_layer_1_out:',
                  sess.run(dssm.doc_layer_1_out, feed_dict={dssm.doc: d}))

            # debug cosine_similarity, score, loss
            print(
                'cosine_similarity:',
                sess.run(dssm.cosine_similarity,
                         feed_dict={
                             dssm.query: q,
                             dssm.doc: d
                         }))
            print('score:',
                  sess.run(dssm.score, feed_dict={
                      dssm.query: q,
                      dssm.doc: d
                  }))
            print(
                'loss:',
                sess.run(dssm.loss,
                         feed_dict={
                             dssm.query: q,
                             dssm.doc: d,
                             dssm.label: label
                         }))
Пример #7
0
    def __init__(self,
                 vocab_size=250000,
                 embed_size=128,
                 hidden_size=512,
                 pretrain_path=None):
        super(CrossModal, self).__init__()
        # image
        resnet = models.resnet18(pretrained=True)
        modules = list(resnet.children())[:-1]
        self.resnet = nn.Sequential(*modules)
        self.resnet_linear = nn.Linear(resnet.fc.in_features, hidden_size)
        self.resnet_bn = nn.BatchNorm1d(hidden_size, momentum=0.01)

        # text
        self.dssm = DSSM(vocab_size=vocab_size)
        self.dssm.load_state_dict(torch.load(pretrain_path))
        self.dssm_linear = nn.Linear(embed_size, hidden_size)
        self.dssm_bn = nn.BatchNorm1d(hidden_size, momentum=0.01)

        # Function
        self.tanh = nn.Tanh()
Пример #8
0
class TMmodel(object):
    """
    """
    def __init__(self, args, word_dict, char_dict):

        self.args = args
        self.word_dict = word_dict
        self.char_dict = char_dict
        self.network = DSSM(args, word_dict, char_dict)
        if args.cuda:
            self.network.cuda()
        self.optimizer = optim.Adamax(self.network.parameters(),
                                      weight_decay=0)

    def update(self, ex):
        self.network.train()
        if self.args.cuda:
            inputs = [
                e if e is None else Variable(e.cuda(async=True))
                for e in ex[1:9]
            ]
            label = Variable(ex[0].cuda(async=True))
Пример #9
0
def main(cfg):
    set_seed(7)

    file_num = cfg.filenum
    cfg.result_path = './result/'
    print('load dict')
    news_dict = json.load(
        open('./{}/news.json'.format(cfg.root), 'r', encoding='utf-8'))
    cfg.news_num = len(news_dict)
    print('load words dict')
    word_dict = json.load(
        open('./{}/word.json'.format(cfg.root), 'r', encoding='utf-8'))
    cfg.word_num = len(word_dict)

    if cfg.model == 'dssm':
        model = DSSM(cfg)
    elif cfg.model == 'gru':
        model = GRURec(cfg)

    saved_model_path = os.path.join('./checkpoint/',
                                    'model.ep{0}'.format(cfg.epoch))
    print("Load from:", saved_model_path)
    if not os.path.exists(saved_model_path):
        print("Not Exist: {}".format(saved_model_path))
        return []
    model.cpu()
    pretrained_model = torch.load(saved_model_path, map_location='cpu')
    print(model.load_state_dict(pretrained_model, strict=False))

    for point_num in range(file_num):
        print("processing {}/raw/test-{}.npy".format(cfg.root, point_num))
        valid_dataset = FMData(
            np.load("{}/raw/test-{}.npy".format(cfg.root, point_num)))

        dataset_list = split_dataset(valid_dataset, cfg.gpus)

        processes = []
        for rank in range(cfg.gpus):
            cur_device = torch.device("cuda:{}".format(rank))

            p = mp.Process(target=run,
                           args=(cfg, rank, dataset_list[rank], cur_device,
                                 model))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()

        gather(cfg, point_num)

    gather_all(cfg.result_path, file_num, validate=True, save=True)
Пример #10
0
nwords = dataset._vocab_size
trainData, evalData = dataset.dataGen()

train_epoch_steps = int(len(trainData) / Config.batchSize) - 1
eval_epoch_steps = int(len(evalData) / Config.batchSize) - 1

# 定义计算图
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False,
                                  device_count={"CPU": 78})
    sess = tf.Session(config=session_conf)

    # 定义会话
    with sess.as_default():
        dssm = DSSM(config, nwords)

        globalStep = tf.Variable(0, name="globalStep", trainable=False)
        # 定义优化函数,传入学习速率参数
        optimizer = tf.train.AdamOptimizer(config.learningRate)
        # 计算梯度,得到梯度和变量
        gradsAndVars = optimizer.compute_gradients(dssm.losses)
        # 将梯度应用到变量下,生成训练器
        trainOp = optimizer.apply_gradients(gradsAndVars,
                                            global_step=globalStep)

        # 用summary绘制tensorBoard
        gradSummaries = []
        for g, v in gradsAndVars:
            if g is not None:
                tf.summary.histogram("{}/grad/hist".format(v.name), g)
Пример #11
0
dictionary = data.Dictionary()
train_corpus = data.Corpus(args.data, 'session_train.txt', dictionary)
print('train set size = ', len(train_corpus.data))
print('vocabulary size = ', len(dictionary))
dev_corpus = data.Corpus(args.data, 'session_dev.txt', dictionary, is_test_corpus=True)
print('dev set size = ', len(dev_corpus.data))

# save the dictionary object to use during testing
helper.save_object(dictionary, args.save_path + 'dictionary.p')

# ###############################################################################
# # Build the model
# ###############################################################################

model = DSSM(dictionary, args)
optimizer = optim.SGD(model.parameters(), args.lr)
best_loss = -1

param_dict = helper.count_parameters(model)
print('Number of trainable parameters = ', numpy.sum(list(param_dict.values())))

# for training on multiple GPUs. use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
if 'CUDA_VISIBLE_DEVICES' in os.environ:
    cuda_visible_devices = [int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')]
    if len(cuda_visible_devices) > 1:
        model = torch.nn.DataParallel(model, device_ids=cuda_visible_devices)
if args.cuda:
    model = model.cuda()

if args.resume:
Пример #12
0
                              shuffle=False,
                              drop_last=False,
                              collate_fn=my_collate_fn)
x_raw_path = save_path / 'x_raw.pkl'
kmeans_path = save_path / 'kmeans.pkl'
# Check for cached clustering results
if x_raw_path.exists() and kmeans_path.exists():
    print('Loading precomputed clustering')
    with open(x_raw_path, 'rb') as f:
        x_raw = pickle.load(f)
    with open(kmeans_path, 'rb') as f:
        kmeans = pickle.load(f)
else:
    print('Clustering')

    model = DSSM(**model_kwargs)
    model.eval()
    model.load_state_dict(
        torch.load(experiment_path_base / experiment_name / 'best_model.pth'))

    embeds = []
    with torch.no_grad():
        for (s, s_prime), (_, _) in tqdm(train_dataloader):
            if isinstance(model, DSSM):
                embeds.append(model.phi2(s_prime - s).numpy())
            else:
                embeds.append(
                    model.phi2(model.embed(s_prime) - model.embed(s)).numpy())

    x_raw = np.concatenate(embeds)
    kmeans = KMeans(n_clusters=n_clusters, verbose=0, random_state=42)
Пример #13
0
import os
import time

import tensorflow as tf

from dssm import DSSM
import data_utils

# saver = tf.train.Saver()

batch_size = 100

# config
sess = tf.Session()

dssm = DSSM()

meta_path = "./model/dssm.ckpt.meta"
ckpt_path = "./model/dssm.ckpt"

saver = tf.train.import_meta_graph(meta_path)
saver.restore(sess, ckpt_path)

# graph = tf.get_default_graph()

sess.run(tf.global_variables_initializer())


def get_batch_data(step, batch_size, raw_data):
    start = step * batch_size
    end = (step + 1) * batch_size
Пример #14
0
    map = map / num_batches
    ndcg_1 = ndcg_1 / num_batches
    ndcg_3 = ndcg_3 / num_batches
    ndcg_10 = ndcg_10 / num_batches

    print('MAP - ', map)
    print('NDCG@1 - ', ndcg_1)
    print('NDCG@3 - ', ndcg_3)
    print('NDCG@10 - ', ndcg_10)


if __name__ == "__main__":
    # Load the saved pre-trained model
    dictionary = helper.load_object(args.save_path + 'dictionary.p')
    model = DSSM(dictionary, args)

    if 'CUDA_VISIBLE_DEVICES' in os.environ:
        cuda_visible_devices = [
            int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
        ]
        if len(cuda_visible_devices) > 1:
            model = torch.nn.DataParallel(model,
                                          device_ids=cuda_visible_devices)
    if args.cuda:
        model = model.cuda()

    helper.load_model_states_from_checkpoint(
        model, os.path.join(args.save_path, 'model_best.pth.tar'),
        'state_dict')
    print('Model and dictionary loaded.')
Пример #15
0
def run(cfg, rank, device, finished, train_dataset_path, valid_dataset):
    """
    train and evaluate
    :param args: config
    :param rank: process id
    :param device: device
    :param train_dataset: dataset instance of a process
    :return:
    """

    set_seed(7)
    print("Worker %d is setting dataset ... " % rank)
    # Build Dataloader
    train_dataset = FMData(np.load(train_dataset_path))
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=cfg.batch_size,
                                   shuffle=True,
                                   drop_last=True)
    valid_data_loader = DataLoader(valid_dataset,
                                   batch_size=cfg.batch_size,
                                   shuffle=False)

    # # Build model.
    if cfg.model == 'dssm':
        model = DSSM(cfg)
    elif cfg.model == 'gru':
        model = GRURec(cfg)
    else:
        raise Exception('model error')
    model.to(device)
    # Build optimizer.
    steps_one_epoch = len(train_data_loader)
    train_steps = cfg.epoch * steps_one_epoch
    print("Total train steps: ", train_steps)
    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=cfg.lr,
                                 weight_decay=cfg.weight_decay)
    print("Worker %d is working ... " % rank)
    # Fast check the validation process
    if (cfg.gpus < 2) or (cfg.gpus > 1 and rank == 0):
        validate(cfg,
                 -1,
                 model,
                 device,
                 rank,
                 valid_data_loader,
                 fast_dev=True)
        logging.warning(model)
        gather_all(cfg.result_path, 1, validate=True, save=False)

    # Training and validation
    for epoch in range(cfg.epoch):
        # print(model.match_prediction_layer.state_dict()['2.bias'])
        train(cfg, epoch, rank, model, train_data_loader, optimizer,
              steps_one_epoch, device)

        validate(cfg, epoch, model, device, rank, valid_data_loader)
        # add finished count
        finished.value += 1

        if (cfg.gpus < 2) or (cfg.gpus > 1 and rank == 0):
            save_checkpoint_by_epoch(model.state_dict(), epoch,
                                     cfg.checkpoint_path)

            while finished.value < cfg.gpus:
                time.sleep(1)
            gather_all(cfg.result_path, cfg.gpus, validate=True, save=False)
            finished.value = 0