Пример #1
0
        optimizer = tf.train.AdamOptimizer(learning_rate=0.02)
        train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        train_op = optimizer.minimize(loss=loss, var_list=train_var)

gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(gpu_options=gpu_options,
                        allow_soft_placement=True,
                        log_device_placement=False)
with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    sess.run(tf.tables_initializer())
    #sess.graph.finalize()
    for num in range(epoch_num):
        print(getNow(), 'now shuffle begin')
        train_data = dataClass.samplingagain()
        x_data = train_data[dataClass.col_names]
        y_data = train_data[['label']]
        train_x, test_x, train_y, test_y = train_test_split(x_data,
                                                            y_data,
                                                            test_size=0.0005)
        print('alldata.shape:', train_data.shape)
        print('train_x.shape:', train_x.shape, 'train_y.shape:', train_y.shape)
        print('test _x.shape:', test_x.shape, 'test _y.shape:', test_y.shape)
        print(getNow(), 'now shuffle and split end')
        for i in range(int(train_x.shape[0] / batch_size)):
            input_x = train_x[i * batch_size:(i + 1) * batch_size]
            input_y = train_y[i * batch_size:(i + 1) * batch_size]
            sess.run(train_op, feed_dict={x: input_x, y: input_y})
            if i % 100 == 0:
Пример #2
0
         model_dir = model_dir, \
         config    = run_config, \
         params    = params)

model_dir = '../model/'
shutil.rmtree(model_dir)
os.mkdir(model_dir)
model = build_estimator(model_dir)
train_input_fn = get_input_from_pd(train_x, train_y, shuffle=True)
test_input_fn  = get_input_from_pd(test_x, test_y, shuffle=False)
print 'debug, train_x.shape:', train_x.shape, 'train_y.shape:', train_y.shape
print 'debug, test_x.shape :', test_x.shape,  'test_y.shape :', test_y.shape
epoch_per_eval = 1

for n in range(epoch_num):
  model.train(input_fn = train_input_fn, hooks=[])
  res = model.evaluate(input_fn = test_input_fn)
  print getNow(), 'epoch:', n, 'step:', global_step, 'test accuracy:', res['accuracy']
  #for key in sorted(res): print key, res[key]

'''
notice:
  1) hand code is faster than tf.estimator
  2) batch_size need to set large enough is epanded GPU power
  3) neg sampling need again to do each epoch, especial hand neg sampling
  4) accuracy in tf.estimator could be tf.metrics.accuracy; bu in hand code as follow
     # correct_prediction = tf.equal( tf.argmax(y_, 1), tf.argmax(output, 1))
     # accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32))
  5) 
'''
Пример #3
0
        #correct_prediction = tf.equal(labels, plabel)
        #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        optimizer = tf.train.AdamOptimizer(learning_rate=0.002)
        train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        train_op = optimizer.minimize(loss=loss, var_list=train_var)
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(gpu_options=gpu_options,
                        allow_soft_placement=True,
                        log_device_placement=False)
with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    sess.run(tf.tables_initializer())
    #sess.graph.finalize()
    for num in range(epoch_num):
        print getNow(), 'now shuffle begin'
        train_data = dataClass.shuffle()
        x_data = train_data[[
            'feat_' + str(i) for i in range(1 + seq_len * filed_len)
        ]]
        y_data = train_data[['label_' + str(i) for i in range(seq_len)]]
        #train_x, test_x, train_y, test_y = train_test_split(x_data, y_data, test_size=0.0005)
        train_x, train_y = x_data, y_data
        #test_data = testClass.data_read
        #test_x = test_data[['feat_'+str(i) for i in range(1+seq_len*filed_len)]]
        #test_y = test_data[['label_'+str(i) for i in range(seq_len)]]
        print 'alldata.shape:', train_data.shape
        print 'train_x.shape:', train_x.shape, 'train_y.shape:', train_y.shape
        #print 'test _x.shape:', test_x.shape,  'test _y.shape:', test_y.shape
        #test_data_raw = testClassRaw.data_read
        #test_x_raw = test_data_raw[['feat_'+str(i) for i in range(1+seq_len*filed_len)]]
Пример #4
0
except:
    topK = 30

index = faiss.IndexFlatIP(d)
index.add(id2emb)

#quantizer    = faiss.IndexFlatL2(d)
#index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT)
#assert not index.is_trained
#index.train(np.ascontiguousarray(id2emb))
#index.train(id2emb)
#index.add(id2emb)
#assert index.is_trained

import time
if os.path.isfile(file_save) == True:
    os.rename(file_save, file_save + '.' + getNow())
handle = open(file_save, 'aw')

for user, user_emb in user_emb_map.items():
    #print 'user', user, 'user_emb', user_emb
    S, I = index.search(user_emb, topK)
    #print 'type(I)', I, I.shape
    Id = [str(i) for i in I[0]]
    Tag = [id2tag[i] for i in Id]
    Score = [str(i) for i in S[0]]
    res = [Tag[i] + '\2' + Score[i] for i in range(topK)]
    handle.write(user + '\t' + '\1'.join(res) + '\n')

handle.close()