Пример #1
0
def test_sex():
    from config import config
    from model_sex_chinese import model
    f = open('word2idx_chinese.pickle', 'rb')
    word2idx = pickle.load(f)
    #f = open('career2idx.pickle', 'rb')
    #career2idx = pickle.load(f)
    V = len(word2idx)
    config = config(V)
    model = model(config)
    batch_size = config.batch_size
    test_data = np.load('test_data_c.npy')
    test_data_words = np.load('test_data_words_c.npy')
    test_ans = np.load('test_ans_c.npy')
    blogs, blogs_vec, ans, loss, accuracy, _ = model.build(is_dropout=False)
    configs = tf.ConfigProto()
    sess = tf.InteractiveSession(config=configs)
    saver = tf.train.Saver()
    saver.restore(sess, './save_model_sex_c.ckpt')
    lens = test_data.shape[0]
    T = int(lens / batch_size)
    fin_ac = 0.0
    for i in range(T):
        data_list = test_data[i * batch_size:(i + 1) * batch_size]
        data_list_words = test_data_words[i * batch_size:(i + 1) * batch_size]
        ans_list = test_ans[i * batch_size:(i + 1) * batch_size, 1]
        ac = sess.run(accuracy,
                      feed_dict={
                          blogs: data_list,
                          blogs_vec: data_list_words,
                          ans: ans_list
                      })
        fin_ac += ac / T
    print('acc on sex: %f' % fin_ac)
def sex():
    from config import config
    from model_sex_chinese import model
    f = open('word2idx_chinese.pickle', 'rb')
    word2idx = pickle.load(f)
    V = len(word2idx)
    config = config(V)
    config.batch_size = 1
    model = model(config)
    [dr, blogs, blogs_vec, ans, loss, accuracy,
     pans] = model.build(is_dropout=False)
    configs = tf.ConfigProto()
    configs.gpu_options.allow_growth = True
    sess = tf.InteractiveSession(config=configs)
    saver = tf.train.Saver()
    saver.restore(sess, './saving/save_model_sex_c.ckpt')
    return blogs, blogs_vec, tf.arg_max(pans, 1), sess, dr
Пример #3
0
def train_career():
    from config import config
    from model_career_chinese import model
    f = open('word2idx_chinese.pickle', 'rb')
    word2idx = pickle.load(f)
    V = len(word2idx)
    config = config(V)
    model = model(config)
    batch_size = config.batch_size
    train_data = np.load('train_data_c.npy')
    train_data_words = np.load('train_data_words_c.npy')
    train_ans = np.load('train_ans_c.npy')
    blogs, blogs_vec, ans, loss, accuracy, _ = model.build(is_dropout=True)
    var = tf.trainable_variables()
    optim = tf.train.AdamOptimizer(0.001).minimize(loss, var_list=var)
    saver = tf.train.Saver()
    configs = tf.ConfigProto()
    sess = tf.InteractiveSession(config=configs)
    init = tf.initialize_all_variables()
    sess.run(init)
    #saver.restore(sess, './save_model_career_c.ckpt')
    epoch = 5001
    for i in range(epoch):
        seed = np.random.randint(0, train_data.shape[0], size=(batch_size))
        data_list = np.array([train_data[w] for w in seed])
        ans_list = np.array([train_ans[w][0] for w in seed])
        data_list_words = np.array([train_data_words[w] for w in seed])
        sess.run(optim,
                 feed_dict={
                     blogs: data_list,
                     blogs_vec: data_list_words,
                     ans: ans_list
                 })
        if i % 10 == 0:
            ac, ls = sess.run([accuracy, loss],
                              feed_dict={
                                  blogs: data_list,
                                  blogs_vec: data_list_words,
                                  ans: ans_list
                              })
            print("i: %d, acc: %f ,loss: %f" % (i, ac, ls))
        if i % 5000 == 0:
            saver.save(sess, './save_model_career_c.ckpt')
def train_sex():
    from config import config
    from model_sex_chinese import model
    f = open('word2idx_chinese.pickle', 'rb')
    word2idx = pickle.load(f)
    V = len(word2idx)
    config = config(V)
    model = model(config)
    batch_size = config.batch_size
    train_data = np.load('train_data_c.npy')
    train_data_words = np.load('train_data_words_c.npy')
    train_ans = np.load('train_ans_c.npy')
    test_data = np.load('test_data_c.npy')
    test_data_words = np.load('test_data_words_c.npy')
    test_ans = np.load('test_ans_c.npy')
    drop_rate, blogs, blogs_vec, ans, loss, accuracy, _ = model.build(
        is_dropout=True)
    var = tf.trainable_variables()
    optim = tf.train.AdamOptimizer(0.001).minimize(loss, var_list=var)
    saver = tf.train.Saver()
    configs = tf.ConfigProto()
    sess = tf.InteractiveSession(config=configs)
    init = tf.initialize_all_variables()
    sess.run(init)
    epoch = 10001
    for i in range(epoch):
        seed = np.random.randint(0, train_data.shape[0], size=(batch_size))
        data_list = np.array([train_data[w] for w in seed])
        ans_list = np.array([train_ans[w, 1] for w in seed])
        data_list_words = np.array([train_data_words[w] for w in seed])
        sess.run(optim,
                 feed_dict={
                     blogs: data_list,
                     blogs_vec: data_list_words,
                     ans: ans_list,
                     drop_rate: 0.5
                 })
        if i % 100 == 0:
            ac, ls = sess.run(
                [accuracy, loss],
                feed_dict={
                    blogs: data_list,
                    blogs_vec: data_list_words,
                    ans: ans_list,
                    drop_rate: 1.0
                })
            print("i: %d, acc: %f ,loss: %f" % (i, ac, ls))
        if i % 5000 == 0:
            saver.save(sess, './save_model_sex_c.ckpt')
        if i % 1000 == 0 and i > 0:
            lens = test_data.shape[0]
            T = int(lens / batch_size)
            fin_ac = 0.0
            for j in range(T):
                data_list = test_data[j * batch_size:(j + 1) * batch_size]
                data_list_words = test_data_words[j * batch_size:(j + 1) *
                                                  batch_size]
                ans_list = test_ans[j * batch_size:(j + 1) * batch_size, 1]
                ac = sess.run(accuracy,
                              feed_dict={
                                  blogs: data_list,
                                  blogs_vec: data_list_words,
                                  ans: ans_list,
                                  drop_rate: 1.0
                              })
                fin_ac += ac / T
            print('%d acc on sex: %f' % (i, fin_ac))