def test(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, batch_size=128, maxlen=100): # sample_io sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, embedding_dim=EMBEDDING_DIM, light_embedding_dim=LIGHT_EMBEDDING_DIM) model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, LIGHT_EMBEDDING_DIM, LIGHT_HIDDEN_SIZE, LIGHT_ATTENTION_SIZE, use_rocket_training=use_rocket_training()) # test datas = sample_io.next_test() test_ops = tf_test_model(*model.xdl_embedding( datas, EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, *sample_io.get_n())) eval_sess = xdl.TrainSession() print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval_model(eval_sess, test_ops))
def test(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, batch_size=128, maxlen=100): # sample_io sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, EMBEDDING_DIM) if xdl.get_config('model') == 'din': model = Model_DIN( EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif xdl.get_config('model') == 'dien': model = Model_DIEN( EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: raise Exception('only support din and dien model') # test datas = sample_io.next_test() test_ops = tf_test_model( *model.xdl_embedding(datas, EMBEDDING_DIM, *sample_io.get_n())) eval_sess = xdl.TrainSession() print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval_model(eval_sess, test_ops))
def test(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, batch_size=128, maxlen=100): # sample_io sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, EMBEDDING_DIM) if xdl.get_config('model') == 'din': model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif xdl.get_config('model') == 'dien': model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: raise Exception('only support din and dien model') # test # datas = sample_io.next_test() # test_ops = tf_test_model(*model.xdl_embedding(datas, EMBEDDING_DIM, *sample_io.get_n())) # print('='*10,'start test','='*10) test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False) print('=' * 10 + 'start test' + '=' * 10) saver = xdl.Saver() checkpoint_version = "ckpt-...............12000" saver.restore(version=checkpoint_version) eval_sess = xdl.TrainSession() print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval_model(eval_sess, test_ops))
def test(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, batch_size=128, maxlen=100): # sample_io sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, embedding_dim=EMBEDDING_DIM, light_embedding_dim=LIGHT_EMBEDDING_DIM) model = Model_DIEN( EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, LIGHT_EMBEDDING_DIM, LIGHT_HIDDEN_SIZE, LIGHT_ATTENTION_SIZE, use_rocket_training=use_rocket_training()) # test datas = sample_io.next_test() test_ops = tf_test_model( *model.xdl_embedding(datas, EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, *sample_io.get_n())) eval_sess = xdl.TrainSession() print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval_model(eval_sess, test_ops))
def train(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, item_info=item_info, reviews_info=reviews_info, batch_size=128, maxlen=100, test_iter=700): model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, LIGHT_EMBEDDING_DIM, LIGHT_HIDDEN_SIZE, LIGHT_ATTENTION_SIZE, use_rocket_training=use_rocket_training()) sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, embedding_dim=EMBEDDING_DIM, light_embedding_dim=LIGHT_EMBEDDING_DIM) with xdl.model_scope('train'): train_ops = model.build_final_net(EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, sample_io) lr = 0.001 # Adam Adagrad train_ops.append(xdl.Adam(lr).optimize()) hooks = [] log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]" hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)] if xdl.get_task_index() == 0: hooks.append( xdl.CheckpointHook( xdl.get_config('checkpoint', 'save_interval'))) train_sess = xdl.TrainSession(hooks=hooks) with xdl.model_scope('test'): test_ops = model.build_final_net(EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, sample_io, is_train=False) test_sess = xdl.TrainSession() model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
def train(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, item_info=item_info, reviews_info=reviews_info, batch_size=128, maxlen=100, test_iter=700): if xdl.get_config('model') == 'din': model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif xdl.get_config('model') == 'dien': model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: raise Exception('only support din and dien') sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, EMBEDDING_DIM) with xdl.model_scope('train'): train_ops = model.build_final_net(EMBEDDING_DIM, sample_io) lr = 0.001 # Adam Adagrad train_ops.append(xdl.Adam(lr).optimize()) hooks = [] log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]" hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)] if xdl.get_task_index() == 0: hooks.append( xdl.CheckpointHook( xdl.get_config('checkpoint', 'save_interval'))) train_sess = xdl.TrainSession(hooks=hooks) with xdl.model_scope('test'): test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False) test_sess = xdl.TrainSession() print('=' * 10 + 'start train' + '=' * 10) model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
def test(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, item_info=item_info, reviews_info=reviews_info, batch_size=99, maxlen=100): if xdl.get_config('model') == 'din': model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif xdl.get_config('model') == 'dien': model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: raise Exception('only support din and dien model') # create item cate dict i_c = {} for i in item_c: ii = i.strip().split('\t') i_c[ii[0]] = ii[1] saver = xdl.Saver() checkpoint_version = "ckpt-...............20000" saver.restore(version=checkpoint_version) last_hist = [] target_list = [] seq = [] test_set = pkl.load(open(test_file, 'rb')) knn_table = pkl.load( open('../data/ali_knn_table/knn' + str(test_file[-5]) + '_no_pro2.pkl', 'rb')) print('length before deal with : ', len(test_set)) test_knn = open('../data/test_knn', 'w') count22 = 0 for i in test_set: # knn ss = i.strip().split('\t') last = ss[4].split('/')[-1] # append last, target, and seq last_hist.append(last) target_list.append(ss[2]) seq.append((ss[1], ss[4])) # uid and hist knn = knn_table[last] for k in knn: count22 += 1 if k in i_c: tmp = '1\t' + ss[1] + '\t' + k + '\t' + i_c[k] + '\t' + ss[ 4] + '\t' + ss[5] else: tmp = '1\t' + ss[1] + '\t' + k + '\t' + 'UNK' + '\t' + ss[ 4] + '\t' + ss[5] print >> test_knn, tmp test_knn.close() print('after last_hist :', len(last_hist)) print('all test_knn length :', count22) # sample_io test_knn_f = os.path.join(get_data_prefix(), 'test_knn') sample_io = SampleIO(train_file, test_knn_f, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, EMBEDDING_DIM) print('all length:', len(last_hist)) test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False) print('=' * 10 + 'start test' + '=' * 10) eval_sess = xdl.TrainSession() pro_all, test_auc, loss_sum, accuracy_sum, aux_loss_sum = eval_model( eval_sess, test_ops) print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (test_auc, loss_sum, accuracy_sum, aux_loss_sum)) print('after pro length :', len(pro_all)) print('=' * 50) # sort the knn with prob rank_all_knn = {} rank = [] for i in range(len(last_hist)): knn = knn_table[last_hist[i]] pro = pro_all[i] c = list(zip(knn, pro)) c = sorted(c, key=lambda t: t[1], reverse=True) rank_all = [sss[0] for sss in c] rank_all_knn[seq[i][0]] = rank_all if target_list[i] in rank_all: rank.append(rank_all.index(target_list[i]) + 1) else: rank.append(100) # print(rank_all_knn) # save the result of re-rank user = [i[0] for i in seq] hist = [i[1] for i in seq] assert len(last_hist) == len(user) results = list(zip(user, hist, last_hist, target_list, rank)) # results = pd.DataFrame(results, columns = ['last','target','rank']) # esults.to_csv('ali_dien_rank.csv',index=False) with open('ali_dien_rank_4days' + test_file[-11:], 'wb') as d: pkl.dump(results, d)