def test(train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, model_type='DNN', seed=2): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) model.restore(sess, model_path) print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path))
def test(train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=20, model_type='DNN', seed=2): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DMIN': model = Model_DNN_Multi_Head(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return model.restore(sess, model_path) print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path, maxlen=maxlen))
def test( files, batch_size=1024, max_len=100, seed=2, shuffle_each_epoch=False, ): train_file, test_file, uid_voc, mid_voc, cat_voc = files[0], files[ 1], files[2], files[3], files[4] if shuffle_each_epoch: model_path = "best_model_SGD/ckpt_shuffle" + str(seed) else: model_path = "best_model_SGD/ckpt_noshuffle" + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, max_len) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, max_len) n_uid, n_mid, n_cat = train_data.get_n() model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ' % eval(sess, test_data, model, model_path))
def test(buckets, train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", model_folder = "dnn_best_model/ckpt_noshuff", batch_size = 128, maxlen = 100, model_type = 'DNN', seed = 2 ): model_path = model_folder + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file,buckets, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file,buckets, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type =='DHAN': model = Model_DHAN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path))
def test( train_file = "local_train", test_file = "local_test", uid_voc = "uid_voc_large.pkl", mid_voc = "mid_voc_large.pkl", cat_voc = "cat_voc_large.pkl", batch_size = 128, maxlen = 100, model_type = 'ASVD', seed = 2 ): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() #Baselines if model_type == 'ASVD': model = Model_ASVD(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'LSTM': model = Model_LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'LSTMPP': model = Model_LSTMPP(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'NARM': model = Model_NARM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'CARNN': model = Model_CARNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Time1LSTM': model = Model_Time1LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Time2LSTM': model = Model_Time2LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Time3LSTM': model = Model_Time3LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) #Our models elif model_type == 'A2SVD': model = Model_A2SVD(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'T_SeqRec': model = Model_T_SeqRec(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'TC_SeqRec_I': model = Model_TC_SeqRec_I(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'TC_SeqRec_G': model = Model_TC_SeqRec_G(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'TC_SeqRec': model = Model_TC_SeqRec(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'SLi_Rec_Fixed': model = Model_SLi_Rec_Fixed(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'SLi_Rec_Adaptive': model = Model_SLi_Rec_Adaptive(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path))
def test(train_file="local_train_splitByUser", test_file="local_test_splitByUser", batch_size=BATCH_SIZE, maxlen=MAXLEN, model_type='DNN', seed=2): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, FEATURE_COUNT, QUERY_COUNT, voc_list, batch_size, maxlen) test_data = DataIterator(test_file, FEATURE_COUNT, QUERY_COUNT, voc_list, batch_size, maxlen) n_query, n = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return with tf.summary.FileWriter('./test_log') as writer: writer.add_graph(sess.graph) model.restore(sess, model_path) print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path)) writer.flush()
def train(train_file = "data/local_train_splitByUser", test_file = "data/local_test_splitByUser", uid_voc = "data/uid_voc.pkl", mid_voc = "data/mid_voc.pkl", cat_voc = "data/cat_voc.pkl", batch_size = 128, maxlen = 100, test_iter = 100, save_iter = 100, model_type = 'DNN', seed = 2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_bast_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file,uid_voc,mid_voc,cat_voc,batch_size,maxlen) test_data = DataIterator(test_file,uid_voc,mid_voc,cat_voc,batch_size,maxlen) n_uid,n_mid,n_cat = train_data.get_n() model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid,n_mid,n_cat,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) iter = 0 lr = 0.001 for itr in range(3): loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 for src,tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data(src,tgt,maxlen,return_neg=True) loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr,noclk_mids, noclk_cats]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.8f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= 0.5
def test( train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 100, model_type = 'DNN', seed = 2 ): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path))
def test( train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 100, model_type = 'DNN', seed = 2 ): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path))
def test( train_file = "local_train_sample_sorted_by_time", test_file = "local_test_sample_sorted_by_time", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, user_maxlen = 50, maxlen = 20, model_type = 'DNN', seed = 2 ): model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed)+ "_"+str(user_maxlen) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'SVDPP': model = Model_SVDPP(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'GRU4REC': model = Model_GRU4REC(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DUMN': model = Model_DUMN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return model.restore(sess, model_path) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- Logloss: %.4f' % eval(sess, test_data, model, model_path, maxlen,user_maxlen))
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, test_iter=100, save_iter=100, model_type='DNN', seed=2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) # config = tf.ConfigProto() custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes( "allow_mix_precision") custom_op.parameter_map["use_off_line"].b = True config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # with tf.Session(config=config) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print( ' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 for itr in range(3): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print( ' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= 0.5
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, test_iter=50, save_iter=300, model_type='DNN', seed=2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN_with_InnerAtt': model = Model_DIEN_with_InnerAtt(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DHIN_without_InnerAtt': model = Model_DHIN_without_InnerAtt(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DHIN': model = Model_DHIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print( ' test_auc: %.4f --- test_loss: %.4f --- test_acc: %.4f --- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() #start_time = time.time() iter = 0 lr = 0.001 for itr in range(3): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for source, target in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( source, target, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ---> train_loss: %.4f --- train_acc: %.4f --- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print( ' test_auc: %.4f ---test_loss: %.4f --- test_acc: %.4f --- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= 0.5
def train(files, batch_size=1024, max_len=100, test_iter=30, seed=2, shuffle_each_epoch=False): # create csv to record auc within "similar" communication round numbers round_num_simulated = 0 auc_path = "./central_model_auc.csv" create_csv(auc_path) train_file, test_file, uid_voc, mid_voc, cat_voc = files[0], files[ 1], files[2], files[3], files[4] if shuffle_each_epoch: best_model_path = "best_model_SGD/ckpt_shuffle" + str(seed) else: best_model_path = "best_model_SGD/ckpt_noshuffle" + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, max_len, shuffle_each_epoch) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, max_len) n_uid, n_mid, n_cat = train_data.get_n() model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() test_auc, test_loss, test_accuracy = eval(sess, test_data, model, best_model_path) print( 'Initial test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f' % (test_auc, test_loss, test_accuracy)) write_csv(auc_path, round_num_simulated, test_auc, test_loss, test_accuracy) round_num_simulated += 1 sys.stdout.flush() iter = 0 lr = 1.0 decay_rate = 0.999 loss_sum = 0.0 accuracy_sum = 0.0 for epoch in range(50): start_time = time.time() for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl = prepare_data( src, tgt, max_len) loss, acc = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr ]) loss_sum += loss accuracy_sum += acc iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print( 'Epoch: %d ----> iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f' % (epoch, iter, loss_sum / test_iter, accuracy_sum / test_iter)) test_auc, test_loss, test_accuracy = eval( sess, test_data, model, best_model_path) print( ' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f' % (test_auc, test_loss, test_accuracy)) write_csv(auc_path, round_num_simulated, test_auc, test_loss, test_accuracy) round_num_simulated += 1 lr *= decay_rate loss_sum = 0.0 accuracy_sum = 0.0 ''' if epoch == 1: lr *= 0.1 ''' print('Epoch %d finished: Used Time: %d s, Best auc: %.4f' % (epoch, time.time() - start_time, best_auc)) print('') sys.stdout.flush()
def train( datasetdir="data/Electronics/", train_file="/training_set", test_file="/test_set", uid_voc="/uid_voc.pkl", mid_voc="/mid_voc.pkl", cat_voc="/cat_voc.pkl", batch_size=128, maxlen=30, matrix_width=36, test_iter=100, save_iter=4000000, model_type='DRIIN', seed=2, ): train_file = datasetdir + train_file test_file = datasetdir + test_file uid_voc = datasetdir + uid_voc mid_voc = datasetdir + mid_voc cat_voc = datasetdir + cat_voc model_path = datasetdir + "/dnn_save_path/ckpt_noshuff" + model_type + str( seed) best_model_path = datasetdir + "/dnn_best_model/ckpt_noshuff" + model_type + str( seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=True, datasetdir=datasetdir) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, datasetdir=datasetdir) n_uid, n_mid, n_cat = train_data.get_n() model = Model(n_uid, n_mid, n_cat, EMBEDDING_DIM, matrix_width=matrix_width) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) start_time = time.time() file1 = logging.FileHandler( filename=datasetdir + '/my_logs/' + "model_" + str(time.asctime(time.localtime(start_time))) + '.txt', mode='a', encoding='utf-8') logger_accuracy = logging.Logger(name='name_accuracy', level=logging.INFO) logger_accuracy.addHandler(file1) logger_accuracy.info("start_time:" + time.asctime(time.localtime(start_time)) + "\r\n") logger_accuracy.info(model_type + " " + datasetdir + " maxlen:" + str(maxlen) + " batch_size:" + str(batch_size) + "\r\n") file2 = logging.FileHandler( filename=datasetdir + '/loss_logs/' + "model_test_" + str(time.asctime(time.localtime(start_time))) + '.txt', mode='a', encoding='utf-8') logger_test_loss = logging.Logger(name='name_loss', level=logging.INFO) logger_test_loss.addHandler(file2) file3 = logging.FileHandler( filename=datasetdir + '/loss_logs/' + "model_train_" + str(time.asctime(time.localtime(start_time))) + '.txt', mode='a', encoding='utf-8') logger_train_loss = logging.Logger(name='name_loss', level=logging.INFO) logger_train_loss.addHandler(file3) iter = 0 lr = 0.01 global best_auc breakflag = False test_auc_log, loss_sum_log, accuracy_sum_log, aux_loss_sum_log, loss_without_aux = eval_DRIIN( sess, test_data, model, best_model_path, maxlen) logger_accuracy.info( 'test_auc: %.4f - test_loss: %.4f - test_accuracy: %.4f - test_aux_loss: %.4f - loss_without_aux: %.4f *best_auc: %.4f \r\n' % (test_auc_log, loss_sum_log, accuracy_sum_log, aux_loss_sum_log, loss_without_aux, best_auc)) # writer.add_summary(summary, iter) print( 'test_auc: %.4f - test_loss: %.4f - test_accuracy: %.4f - test_aux_loss: %.4f - loss_without_aux: %.4f *best_auc: %.4f' % (test_auc_log, loss_sum_log, accuracy_sum_log, aux_loss_sum_log, loss_without_aux, best_auc)) logger_test_loss.info( '%d,%.4f,%.4f,%.4f' % \ (iter, loss_sum_log, aux_loss_sum_log, loss_without_aux)) logger_train_loss.info( '%d,%.4f,%.4f,%.4f' % \ (iter, loss_sum_log, aux_loss_sum_log, loss_without_aux)) for epoch in range(5): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. if breakflag: break print("epoch:", epoch) logger_accuracy.info('epoch: %d\r\n' % epoch) for src, tgt in train_data: iter += 1 uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data_DRIIN( src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss sys.stdout.flush() if (iter % test_iter) == 0: logger_accuracy.info( 'iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f \r\n' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) logger_train_loss.info( '%d,%.4f,%.4f,%.4f' % \ (iter, loss_sum / test_iter, aux_loss_sum / test_iter, (loss_sum - aux_loss_sum) / test_iter, )) test_auc_log, loss_sum_log, accuracy_sum_log, aux_loss_sum_log, loss_without_aux = eval_DRIIN( sess, test_data, model, best_model_path, maxlen) logger_accuracy.info( 'test_auc: %.4f -test_loss: %.4f -test_accuracy: %.4f -test_aux_loss: %.4f -loss_without_aux: %.4f *best_auc: %.4f \r\n' % (test_auc_log, loss_sum_log, accuracy_sum_log, aux_loss_sum_log, loss_without_aux, best_auc)) print( 'test_auc: %.4f - test_loss: %.4f - test_accuracy: %.4f - test_aux_loss: %.4f - loss_without_aux: %.4f *best_auc: %.4f' % (test_auc_log, loss_sum_log, accuracy_sum_log, aux_loss_sum_log, loss_without_aux, best_auc)) logger_test_loss.info( '%d,%.4f,%.4f,%.4f' % \ (iter, loss_sum_log, aux_loss_sum_log, loss_without_aux)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 # if test_auc_log > 0.87: # test_iter = 10 # if iter >= test_iter: # test_iter = 10 # if iter == 2500: # test_iter = 100 # if iter == 6000: # breakflag = True # break if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) # if iter == 3000: # lr *= 2 test_time = time.time() print("test interval: " + str((test_time - start_time) / 60.0) + " min") logger_accuracy.info("test interval: " + str((test_time - start_time) / 60.0) + " min" + "\r\n") logger_accuracy.info("end_time:" + time.asctime(time.localtime(time.time())) + "\r\n")
def train( train_file = "local_train", test_file = "local_test", uid_voc = "uid_voc_large.pkl", mid_voc = "mid_voc_large.pkl", cat_voc = "cat_voc_large.pkl", batch_size = 128, maxlen = 100, test_iter = 1000, save_iter = 1000, model_type = 'ASVD', seed = 2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=True) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() #Baselines if model_type == 'ASVD': model = Model_ASVD(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'LSTM': model = Model_LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'LSTMPP': model = Model_LSTMPP(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'NARM': model = Model_NARM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'CARNN': model = Model_CARNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Time1LSTM': model = Model_Time1LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Time2LSTM': model = Model_Time2LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Time3LSTM': model = Model_Time3LSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) #Our models elif model_type == 'A2SVD': model = Model_A2SVD(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'T_SeqRec': model = Model_T_SeqRec(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'TC_SeqRec_I': model = Model_TC_SeqRec_I(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'TC_SeqRec_G': model = Model_TC_SeqRec_G(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'TC_SeqRec': model = Model_TC_SeqRec(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'SLi_Rec_Fixed': model = Model_SLi_Rec_Fixed(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'SLi_Rec_Adaptive': model = Model_SLi_Rec_Adaptive(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print('test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 for itr in range(10): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, time_his, time_last_his, time_now_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data(src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, time_his, time_last_his, time_now_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, test_iter=100, save_iter=100, model_type='DNN', seed=2, ): print("batch_size: ", batch_size) print("model: ", model_type) model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print( ' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() iter = 0 lr = 0.001 train_size = 0 approximate_accelerator_time = 0 for itr in range(1): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( src, tgt, maxlen, return_neg=True) start_time = time.time() loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) end_time = time.time() approximate_accelerator_time += end_time - start_time loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 train_size += batch_size sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print( ' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) if train_size >= TOTAL_TRAIN_SIZE: break lr *= 0.5 if train_size >= TOTAL_TRAIN_SIZE: break print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time)
def train(data_location, batch_size=128, maxlen=100, test_iter=100, save_iter=100, data_type='FP32', seed=2): print("batch_size: ", batch_size) model_type = "DIEN" print("model: ", model_type) model_path = os.path.join( data_location, "dnn_save_path/ckpt_noshuff" + model_type + str(seed)) best_model_path = os.path.join( data_location, "dnn_best_model/ckpt_noshuff" + model_type + str(seed)) train_file = os.path.join(data_location, "local_train_splitByUser") test_file = os.path.join(data_location, "local_test_splitByUser") uid_voc = os.path.join(data_location, "uid_voc.pkl") mid_voc = os.path.join(data_location, "mid_voc.pkl") cat_voc = os.path.join(data_location, "cat_voc.pkl") session_config = tf.compat.v1.ConfigProto() if args.num_intra_threads and args.num_inter_threads: session_config.intra_op_parallelism_threads = args.num_intra_threads session_config.inter_op_parallelism_threads = args.num_inter_threads with tf.compat.v1.Session(config=session_config) as sess: train_data = DataIterator(data_location, train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(data_location, test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() # Number of uid = 543060, mid = 367983, cat = 1601 for Amazon dataset print("Number of uid = %i, mid = %i, cat = %i" % (n_uid, n_mid, n_cat)) model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, data_type, batch_size=batch_size, max_length=maxlen, device='cpu') sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) sys.stdout.flush() iter = 0 lr = 0.001 train_size = 0 approximate_accelerator_time = 0 for itr in range(1): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. if timeline: sample_freq = 200 options = tf.compat.v1.RunOptions( trace_level=tf.compat.v1.RunOptions.FULL_TRACE) run_metadata = tf.compat.v1.RunMetadata() total_data = [] for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( src, tgt, maxlen, return_neg=True) total_data.append([ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats ]) elapsed_time_records = [] nums = 0 for i in range(len(total_data)): nums += 1 uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = tuple( total_data[i]) start_time = time.time() if args.timeline and nums == sample_freq: loss, acc, aux_loss = model.train( sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ], timeline_flag=True, options=options, run_metadata=run_metadata, step=nums) else: loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) end_time = time.time() approximate_accelerator_time += end_time - start_time elapsed_time_records.append(end_time - start_time) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 train_size += batch_size sys.stdout.flush() if (iter % test_iter) == 0: # print("train_size: %d" % train_size) print("approximate_accelerator_time: %.3f" % approximate_accelerator_time) print( 'iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) # delete test every 100 iterations no need in training time # print(' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f ---- eval_time: %.3f ---- num_iters: %d' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) if train_size >= TOTAL_TRAIN_SIZE: break print("iteration: ", nums) lr *= 0.5 if train_size >= TOTAL_TRAIN_SIZE: break print("iter: %d" % iter) print("Total recommendations: %d" % TOTAL_TRAIN_SIZE) print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time) print( "Approximate accelerator performance in recommendations/second is %.3f" % (float(TOTAL_TRAIN_SIZE) / float(approximate_accelerator_time)))
def train(batch_size=BATCH_SIZE, ubh_maxlen=MAXLEN, ibh_maxlen=SEQ_USER_MAXLEN, test_iter=100, save_iter=10000, model_type='DNN', seq_user_t=50, seed=2, learning_rate=0.001, epoch=2, dataset='Amazon_Clothing', emb=32, hidden_units='256,128,1'): train_file = datafilename(dataset, "local_train_by_time") test_file = datafilename(dataset, "local_test_by_time") test_file1 = datafilename(dataset, "local_test_u1") test_file2 = datafilename(dataset, "local_test_u2") test_file3 = datafilename(dataset, "local_test_u3") uid_voc = datafilename(dataset, "uid_voc.pkl") mid_voc = datafilename(dataset, "mid_voc.pkl") cat_voc = datafilename(dataset, "cat_voc.pkl") model_path = "dnn_save_path/{}_seed{}/".format(model_type, str(seed)) best_model_path = "dnn_best_path/{}_seed_{}/".format(model_type, str(seed)) if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(best_model_path): os.makedirs(best_model_path) train_writer = tf.summary.FileWriter(model_path + '/train') eval_writer = tf.summary.FileWriter(model_path + '/eval') gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, ubh_maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size * 100, ubh_maxlen) n_uid, n_mid, n_cat = train_data.get_n() print "uid count : %d" % n_uid print "mid count : %d" % n_mid print "cat count : %d" % n_cat EMBEDDING_DIM = emb HIDDEN_UNITS = hidden_units.split(',') if model_type == 'DNN': model = DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM) elif model_type == 'PNN': model = PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM) elif model_type == 'SVDPP': model = SVDPP(n_uid, n_mid, n_cat, EMBEDDING_DIM, user_fnum=1, item_fnum=2) elif model_type == 'GRU4REC': model = GRU4REC(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'DIN': model = DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'ATRANK': model = ATRANK(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'CASER': model = CASER(n_uid, n_mid, n_cat, EMBEDDING_DIM, user_fnum=1, item_fnum=2) elif model_type == 'DIEN': model = DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'UBGRUA': model = UBGRUA(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'TopoLSTM': model = TopoLSTM(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'DIB': model = DIB(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'IBGRUA': model = IBGRUA(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'TIEN_sumagg': model = TIEN_sumagg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'TIEN_timeatt': model = TIEN_timeatt(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'TIEN_robust': model = TIEN_robust(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'TIEN_timeaware': model = TIEN_timeaware(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t) elif model_type == 'TIEN': model = TIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, SEQ_USER_T=seq_user_t, HIDDEN_UNITS=HIDDEN_UNITS) # incorpration elif model_type == 'GRU4REC_TIEN': model = GRU4REC_TIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'ATRANK_TIEN': model = ATRANK_TIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) elif model_type == 'CASER_TIEN': model = CASER_TIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, user_fnum=1, item_fnum=2) elif model_type == 'DIEN_TIEN': model = DIEN_TIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE) else: print("Invalid model_type : %s", model_type) return sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() test_auc, test_loss, test_accuracy, test_f1 = eval( sess, test_data, model, best_model_path, eval_writer) print_metric('test', 0, 0, test_loss, test_accuracy, test_auc, test_f1) sys.stdout.flush() iter = 0 iter_epoch = 0 lr = learning_rate EPOCH = epoch for itr in range(EPOCH): y_true = [] y_pred = [] for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, ubh_len, user_his, ibh_len, user_mask, user_his_t, mid_his_t = data_utils.prepare_data( src, tgt, ubh_maxlen, ibh_maxlen) probs, loss, acc, summary = model.train( sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, ubh_len, lr, user_his, ibh_len, user_mask, user_his_t, mid_his_t ]) prob_1 = probs[:, 0].tolist() target_1 = target[:, 0].tolist() for p, t in zip(prob_1, target_1): y_true.append(t) y_pred.append(p) train_writer.add_summary(summary, global_step=model.global_step.eval()) iter += 1 iter_epoch += 1 sys.stdout.flush() if (iter % test_iter) == 0 or ((itr == EPOCH - 1) and iter % (test_iter / 1) == 0): train_auc = metrics.roc_auc_score(y_true, y_pred) train_f1 = metrics.f1_score(numpy.round(y_true), numpy.round(y_pred)) train_loss = metrics.log_loss(y_true, y_pred) train_acc = metrics.accuracy_score(numpy.round(y_true), numpy.round(y_pred)) print_metric('train', itr, iter, train_loss, train_acc, train_auc, train_f1) test_auc, test_loss, test_accuracy, test_f1 = eval( sess, test_data, model, best_model_path, eval_writer) print_metric('test', itr, iter, test_loss, test_accuracy, test_auc, test_f1) print_best_metric(itr, iter, test_loss, test_accuracy, test_auc, test_f1) sys.stdout.flush() y_true = [] y_pred = [] if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path) # if itr == EPOCH - 1: # if iter_epoch >= test_iter * 10: # break print('*****************************************') test_auc, test_loss, test_accuracy, test_f1 = eval( sess, test_data, model, best_model_path, eval_writer) print_metric('test', itr, iter, test_loss, test_accuracy, test_auc, test_f1) print_best_metric(itr, iter, test_loss, test_accuracy, test_auc, test_f1) sys.stdout.flush() print('*****************************************') lr *= 0.5 iter_epoch = 0
def test( train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 100, model_type = 'DNN', data_type = 'FP32', seed = 2 ): print("batch_size: ", batch_size) print("model: ", model_type) model_path = "dnn_best_model_trained/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, data_type) else: print ("Invalid model_type : %s", model_type) return # for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): # print("global variable: ", var) if data_type == 'FP32': model.restore(sess, model_path) if data_type == 'FP16': fp32_variables = [var_name for var_name, _ in tf.contrib.framework.list_variables(model_path)] #print("fp32_variables: ", fp32_variables) sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) for variable in tf.global_variables(): #print("variable: ", variable) if variable.op.name in fp32_variables: var = tf.contrib.framework.load_variable(model_path, variable.op.name) # print("var: ", var) # print("var.dtype: ", var.dtype) if(variable.dtype == 'float16_ref'): tf.add_to_collection('assignOps', variable.assign(tf.cast(var, tf.float16))) # print("var value: ", sess.run(tf.cast(var, tf.float16))) else: tf.add_to_collection('assignOps', variable.assign(var)) else: raise ValueError("Variable %s is missing from checkpoint!" % variable.op.name) sess.run(tf.get_collection('assignOps')) # for variable in sess.run(tf.get_collection('assignOps')): # print("after load checkpoint: ", variable) # for variable in tf.global_variables(): # print("after load checkpoint: ", sess.run(variable)) approximate_accelerator_time = 0 test_auc, test_loss, test_accuracy, test_aux_loss, eval_time, num_iters = eval(sess, test_data, model, model_path) approximate_accelerator_time += eval_time print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.9f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % (test_auc, test_loss, test_accuracy, test_aux_loss, eval_time)) test_auc, test_loss, test_accuracy, test_aux_loss, eval_time, num_iters = eval(sess, test_data, model, model_path) approximate_accelerator_time += eval_time print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.9f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % (test_auc, test_loss, test_accuracy, test_aux_loss, eval_time)) test_auc, test_loss, test_accuracy, test_aux_loss, eval_time, num_iters = eval(sess, test_data, model, model_path) approximate_accelerator_time += eval_time print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.9f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % (test_auc, test_loss, test_accuracy, test_aux_loss, eval_time)) test_auc, test_loss, test_accuracy, test_aux_loss, eval_time, num_iters = eval(sess, test_data, model, model_path) approximate_accelerator_time += eval_time print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.9f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % (test_auc, test_loss, test_accuracy, test_aux_loss, eval_time)) test_auc, test_loss, test_accuracy, test_aux_loss, eval_time, num_iters = eval(sess, test_data, model, model_path) approximate_accelerator_time += eval_time print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.9f ---- test_aux_loss: %.4f ---- eval_time: %.3f' % (test_auc, test_loss, test_accuracy, test_aux_loss, eval_time)) print("Total recommendations: %d" % (num_iters*batch_size)) print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time) print("Approximate accelerator performance in recommendations/second is %.3f" % (float(5*num_iters*batch_size)/float(approximate_accelerator_time)))
def train( train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 100, test_iter = 100, save_iter = 100, model_type = 'DNN', data_type = 'FP32', seed = 2,): print("model: ", model_type) model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) print("batch_size: ", batch_size) end_2_end_start_time = time.time() read_size = TOTAL_TRAIN_SIZE #1086120 train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, read_size, maxlen, shuffle_each_epoch=False) n_uid, n_mid, n_cat = train_data.get_n() print("n_uid:{}, n_mid:{}, n_cat:{}".format(n_uid, n_mid, n_cat)) config = {} config['total_steps'] = 40000 config['n_uid'] = n_uid config['n_mid'] = n_mid config['n_cat'] = n_cat config['maxlen'] = maxlen config['model_type'] = model_type config['data_type'] = data_type config['batch_size']= batch_size # create classifier estimator = tf.estimator.Estimator( model_dir='/workspace/save_models/', model_fn=model_fn, params=config) features = collections.OrderedDict() tgt_list = [] feature_keys = ['uids', 'mids', 'cats', 'mid_his', 'cat_his', 'mid_mask', 'sl', 'noclk_mids', 'noclk_cats'] for k in feature_keys: features[k] = [] for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data(src, tgt, maxlen, return_neg=True) features['uids'].append(uids) features['mids'].append(mids) features['cats' ].append(cats) features['mid_his'].append(mid_his) features['cat_his'].append(cat_his) features['mid_mask'].append(mid_mask) features['sl'].append(sl) features['noclk_mids'].append(noclk_mids) features['noclk_cats'].append(noclk_cats) tgt_list.append(tgt) break for k in features: features[k] = np.squeeze(np.vstack(features[k])) if features[k].dtype == 'float64': features[k] = np.float32(features[k]) if features[k].dtype == 'int64': features[k] = np.int32(features[k]) y = np.squeeze(np.vstack(tgt_list)) if y.dtype == 'int64': y = np.int32(y) if y.dtype == 'float64': y = np.float32(y) input_fn = tf.estimator.inputs.numpy_input_fn(x=features, y=y, batch_size=2048*2, shuffle=True) start_time = time.time() estimator.train(input_fn, hooks=None, steps=None, max_steps=None, saving_listeners=None) end_time = time.time() approximate_accelerator_time = end_time - start_time end2end_time = end_time - end_2_end_start_time print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time) print("Approximate end2end accelerator time in seconds is %.3f" % end2end_time) print("Approximate accelerator performance in recommendations/second is %.3f" % (float(TOTAL_TRAIN_SIZE)/float(approximate_accelerator_time))) import pdb pdb.set_trace() train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(),max_steps=config["total_steps"]) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn()) # setup train spec #train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, True), # max_steps=config["total_steps"]) #eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen,False)) # run train and evaluate tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", batch_size=BATCH_SIZE, maxlen=MAXLEN, test_iter=TEST_ITER, save_iter=100, model_type='DNN', seed=2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, FEATURE_COUNT, QUERY_COUNT, voc_list, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, FEATURE_COUNT, QUERY_COUNT, voc_list, batch_size, maxlen) n_query, n = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n, n_query, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return # model = Model_DNN(n_query, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print( ' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() iter = 0 lr = 0.001 for itr in range(3): print('iter start: ') loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. stored_arr = [] for src, tgt in train_data: uids, item, item_his, mid_mask, target, sl, noclk_his = prepare_data( src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, item, item_his, mid_mask, target, sl, lr, noclk_his ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss prob, _, _, _ = model.calculate( sess, [uids, item, item_his, mid_mask, target, sl, noclk_his]) prob_1 = prob[:, 0].tolist() target_1 = target[:, 0].tolist() for p, t in zip(prob_1, target_1): stored_arr.append([p, t]) iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: train_auc = calc_auc(stored_arr) print('iter: %d ----> train_auc: %.4f ---- train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % \ (iter, train_auc, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print( ' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) train_loss.append(loss_sum / test_iter) train_accuracy.append(accuracy_sum / test_iter) train_aux_loss.append(aux_loss_sum / test_iter) train_auc_list.append(train_auc) stored_arr = [] loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= LEARNING_RATE_DECAY print('iter end')
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, test_iter=8400, save_iter=8400, model_type='DNN', seed=2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: label_type = 1 train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False, label_type=label_type) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, label_type=label_type) n_uid, n_mid, n_cat, n_carte = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'Cartesion': model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False, use_cartes=True) elif model_type == 'CAN+Cartesion': model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True, use_cartes=True) elif model_type == 'CAN': model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'ONN': model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'NCF': model = Model_NCF(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'FM': model = Model_FM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'FFM': model = Model_FFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'DeepFM': model = Model_DeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'DeepFFM': model = Model_DeepFFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'xDeepFM': model = Model_xDeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) elif model_type == 'ONN': model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'CAN+DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) else: print("Invalid model_type : %s" % model_type) return print("Model: ", model_type) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() count() start_time = time.time() iter = 0 lr = 0.001 for itr in range(1): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data( src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats, carte ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % 100) == 0: print( 'iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % (iter, loss_sum / 100, accuracy_sum / 100, aux_loss_sum / 100)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % test_iter) == 0: auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) print( 'iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= 0.5
def train( train_file = "local_train_sample_sorted_by_time", test_file = "local_test_sample_sorted_by_time", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 20, user_maxlen = 50, test_iter = 100, save_iter = 100, model_type = 'DNN', seed = 2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed)+"_"+str(user_maxlen) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed)+"_"+str(user_maxlen) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() print(n_uid, n_mid, n_cat) if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'SVDPP': model = Model_SVDPP(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'GRU4REC': model = Model_GRU4REC(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DUMN': model = Model_DUMN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = DIEN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print(' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- Logloss: %.4f' % eval(sess, test_data, model, best_model_path,maxlen,user_maxlen)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 for itr in range(3): loss_sum = 0.0 accuracy_sum = 0. log_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, item_user_his, item_user_his_mask, item_user_his_mid, item_user_his_cat, item_user_his_mid_mask, target, sl = prepare_data(src, tgt, maxlen,user_maxlen) loss, acc, log_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, item_user_his, item_user_his_mask, item_user_his_mid, item_user_his_cat, item_user_his_mid_mask, target, sl, lr]) loss_sum += loss accuracy_sum += acc log_loss_sum += log_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- Logloss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, log_loss_sum / test_iter)) print(' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- Logloss: %.4f' % eval(sess, test_data, model, best_model_path, maxlen,user_maxlen)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' %(iter)) model.save(sess, model_path+"--"+str(iter)) lr *= 0.5
class SampleIO(object): def __init__(self, train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", item_info='item-info', reviews_info='reviews-info', batch_size=128, maxlen=100, embedding_dim=18, light_embedding_dim=4, return_neg=True): self.maxlen = maxlen self.return_neg = return_neg self.train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, shuffle_each_epoch=False) self.test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen) self.n_uid, self.n_mid, self.n_cat = self.train_data.get_n() self.embedding_dim = embedding_dim self.light_embedding_dim = light_embedding_dim def get_n(self): return self.n_uid, self.n_mid, self.n_cat def next_train(self): if self.return_neg: return self._py_func(self._next_train) else: return self._py_func(self._next_train, sparse_cnt=5) def next_test(self): if self.return_neg: return self._py_func(self._next_test) else: return self._py_func(self._next_test, sparse_cnt=5) def _next_train(self): try: src, tgt = self.train_data.next() except StopIteration: self.src = self.tgt = None raise OutOfRange("train end") return self.prepare_data(src, tgt, self.maxlen, return_neg=self.return_neg) def _next_test(self): try: src, tgt = self.test_data.next() except StopIteration: self.src = self.tgt = None raise OutOfRange("test end") return self.prepare_data(src, tgt, self.maxlen, return_neg=self.return_neg) def _py_func(self, fn, sparse_cnt=7): types = [] for _ in range(sparse_cnt): types.extend([np.int64, np.float32, np.int32]) types.extend([np.float32, np.float32, np.int32]) types.extend([np.int32 for _ in range(8)]) datas = xdl.py_func(fn, [], output_type=types) sparse_tensors = [] for i in range(sparse_cnt): sparse_tensors.append( xdl.SparseTensor(datas[3 * i], datas[3 * i + 1], datas[3 * i + 2])) return sparse_tensors + datas[sparse_cnt * 3:] def prepare_data(self, input, target, maxlen=None, return_neg=False): # x: a list of sentences lengths_x = [len(s[4]) for s in input] seqs_mid = [inp[3] for inp in input] seqs_cat = [inp[4] for inp in input] noclk_seqs_mid = [inp[5] for inp in input] noclk_seqs_cat = [inp[6] for inp in input] if maxlen is not None: new_seqs_mid = [] new_seqs_cat = [] new_noclk_seqs_mid = [] new_noclk_seqs_cat = [] new_lengths_x = [] for l_x, inp in zip(lengths_x, input): if l_x > maxlen: new_seqs_mid.append(inp[3][l_x - maxlen:]) new_seqs_cat.append(inp[4][l_x - maxlen:]) new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) new_lengths_x.append(maxlen) else: new_seqs_mid.append(inp[3]) new_seqs_cat.append(inp[4]) new_noclk_seqs_mid.append(inp[5]) new_noclk_seqs_cat.append(inp[6]) new_lengths_x.append(l_x) lengths_x = new_lengths_x seqs_mid = new_seqs_mid seqs_cat = new_seqs_cat noclk_seqs_mid = new_noclk_seqs_mid noclk_seqs_cat = new_noclk_seqs_cat if len(lengths_x) < 1: return None, None, None, None n_samples = len(seqs_mid) maxlen_x = np.max(lengths_x) + 1 neg_samples = len(noclk_seqs_mid[0][0]) mid_his = np.zeros((n_samples, maxlen_x)).astype('int64') cat_his = np.zeros((n_samples, maxlen_x)).astype('int64') noclk_mid_his = np.zeros( (n_samples, maxlen_x, neg_samples)).astype('int64') noclk_cat_his = np.zeros( (n_samples, maxlen_x, neg_samples)).astype('int64') mid_mask = np.zeros((n_samples, maxlen_x)).astype('float32') for idx, [s_x, s_y, no_sx, no_sy] in enumerate( zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat)): mid_mask[idx, :lengths_x[idx] + 1] = 1. mid_his[idx, :lengths_x[idx]] = s_x cat_his[idx, :lengths_x[idx]] = s_y noclk_mid_his[idx, :lengths_x[idx], :] = no_sx noclk_cat_his[idx, :lengths_x[idx], :] = no_sy uids = np.array([inp[0] for inp in input], dtype=np.int64) mids = np.array([inp[1] for inp in input], dtype=np.int64) cats = np.array([inp[2] for inp in input], dtype=np.int64) id_values = np.ones([n_samples], np.float32) his_values = np.ones([n_samples * maxlen_x], np.float32) neg_his_values = np.ones([n_samples * maxlen_x * neg_samples], np.float32) id_seg = np.array([i + 1 for i in range(n_samples)], dtype=np.int32) his_seg = np.array([i + 1 for i in range(n_samples * maxlen_x)], dtype=np.int32) neg_his_seg = np.array( [i + 1 for i in range(n_samples * maxlen_x * neg_samples)], dtype=np.int32) results = [] for e in [uids, mids, cats]: results.append(np.reshape(e, (-1))) results.append(id_values) results.append(id_seg) for e in [mid_his, cat_his]: results.append(np.reshape(e, (-1))) results.append(his_values) results.append(his_seg) if return_neg: for e in [noclk_mid_his, noclk_cat_his]: results.append(np.reshape(e, (-1))) results.append(neg_his_values) results.append(neg_his_seg) results.extend([ mid_mask, np.array(target, dtype=np.float32), np.array(lengths_x, dtype=np.int32) ]) # for split results.append(np.array([n_samples, n_samples], dtype=np.int32)) # shape results.extend([ np.array([-1, self.embedding_dim], dtype=np.int32), np.array([-1, maxlen_x, self.embedding_dim], dtype=np.int32), np.array([-1, maxlen_x, neg_samples, self.embedding_dim], dtype=np.int32), np.array([-1, self.light_embedding_dim], dtype=np.int32), np.array([-1, maxlen_x, self.light_embedding_dim], dtype=np.int32), np.array([-1, maxlen_x, neg_samples, self.light_embedding_dim], dtype=np.int32), np.array([-1, maxlen_x], dtype=np.int32) ]) return results
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", valid_file="local_valid_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=20, test_iter=100, save_iter=100, model_type='DNN', seed=2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) valid_data = DataIterator(valid_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() print('n_uid', n_uid) print('n_mid', n_mid) if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DMIN': model = Model_DNN_Multi_Head(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print( ' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path, maxlen)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 print(lr) for itr in range(3): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('itr: %d --->iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ ( itr,iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print( ' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, valid_data, model, best_model_path, maxlen=maxlen)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0
def train( train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=3, # 128, maxlen=10, #100, test_iter=100, save_iter=100, model_type='DNN', seed=2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print( 'test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 for itr in range(200): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( src, tgt, maxlen, return_neg=True) # uids:[0 0 0] # mids:[ 93004 250395 165045] # cats:[1 1 1] # mid_his:[[ 10858 206369 42674 281185 375 3980 158652 231 158893 842] # [327714 92818 137383 26048 237287 119547 112743 94936 345191 0] # [327714 92818 137383 26048 237287 119547 112743 94936 345191 0]] # cat_his:[[ 1 1 1 1 1 1 39 1 1 1] # [ 1 1 1 1 1 1 1 1 1 0] # [ 1 1 1 1 1 1 1 1 1 0]] # mid_mask:[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]...[1. 1. 1. 1. 1. 1. 1. 1. 1. 0.]] # target:[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0]] # lengths_x:[10, 9, 9] # noclk_mid_his:[... # [[ 7471 6 161152 8051 1688] # [ 67996 656 2911 9389 198959] # [ 36573 169400 327841 28416 7625] # [ 100 795 48951 208851 352026] # [ 10325 173799 1004 78434 132] # [ 23248 126527 72156 47462 4704] # [ 74930 48823 275672 14529 174552] # [230953 13807 79895 987 30795] # [ 7843 142992 46036 368 101692] # [ 0 0 0 0 0]] # ...] # noclk_cat_his:[... # [[ 1 1 1 2 1] # [ 1 1 1 1 1] # [ 1 5 1510 71 2] # [ 1 1 1 1 1] # [ 1 541 1 2 1] # [ 1 1 1 2 1] # [ 5 1 4 1 1] # [ 1 1 1 1 2] # [ 1 1 1 1 1] # [ 0 0 0 0 0]] # ...] loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print( 'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= 0.5
class SampleIO(object): def __init__(self, train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", item_info='item-info', reviews_info='reviews-info', batch_size=128, maxlen=100, embedding_dim=None, return_neg=True): self.maxlen = maxlen self.embedding_dim = embedding_dim self.return_neg = return_neg self.train_data = DataIterator( train_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, shuffle_each_epoch=False) self.test_data = DataIterator( test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen) self.n_uid, self.n_mid, self.n_cat = self.train_data.get_n() def get_n(self): return self.n_uid, self.n_mid, self.n_cat def next_train(self): if self.return_neg: return self._py_func(self._next_train) else: return self._py_func(self._next_train, sparse_cnt=5) def next_test(self): if self.return_neg: return self._py_func(self._next_test) else: return self._py_func(self._next_test, sparse_cnt=5) def _next_train(self): try: src, tgt = self.train_data.next() except StopIteration: self.src = self.tgt = None raise OutOfRange("train end") return self.prepare_data(src, tgt, self.maxlen, return_neg=self.return_neg) def _next_test(self): try: src, tgt = self.test_data.next() except StopIteration: self.src = self.tgt = None raise OutOfRange("test end") return self.prepare_data(src, tgt, self.maxlen, return_neg=self.return_neg) def _py_func(self, fn, sparse_cnt=7): types = [] for _ in range(sparse_cnt): types.extend([np.int64, np.float32, np.int32]) types.extend([np.float32, np.float32, np.int32]) types.extend([np.int32 for _ in range(5)]) datas = xdl.py_func(fn, [], output_type=types) sparse_tensors = [] for i in range(sparse_cnt): sparse_tensors.append(xdl.SparseTensor( datas[3 * i], datas[3 * i + 1], datas[3 * i + 2])) return sparse_tensors + datas[sparse_cnt * 3:] def prepare_data(self, input, target, maxlen=None, return_neg=False): # x: a list of sentences lengths_x = [len(s[4]) for s in input] seqs_mid = [inp[3] for inp in input] seqs_cat = [inp[4] for inp in input] noclk_seqs_mid = [inp[5] for inp in input] noclk_seqs_cat = [inp[6] for inp in input] if maxlen is not None: new_seqs_mid = [] new_seqs_cat = [] new_noclk_seqs_mid = [] new_noclk_seqs_cat = [] new_lengths_x = [] for l_x, inp in zip(lengths_x, input): if l_x > maxlen: new_seqs_mid.append(inp[3][l_x - maxlen:]) new_seqs_cat.append(inp[4][l_x - maxlen:]) new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) new_lengths_x.append(maxlen) else: new_seqs_mid.append(inp[3]) new_seqs_cat.append(inp[4]) new_noclk_seqs_mid.append(inp[5]) new_noclk_seqs_cat.append(inp[6]) new_lengths_x.append(l_x) lengths_x = new_lengths_x seqs_mid = new_seqs_mid seqs_cat = new_seqs_cat noclk_seqs_mid = new_noclk_seqs_mid noclk_seqs_cat = new_noclk_seqs_cat if len(lengths_x) < 1: return None, None, None, None n_samples = len(seqs_mid) maxlen_x = np.max(lengths_x) + 1 neg_samples = len(noclk_seqs_mid[0][0]) mid_his = np.zeros((n_samples, maxlen_x)).astype('int64') cat_his = np.zeros((n_samples, maxlen_x)).astype('int64') noclk_mid_his = np.zeros( (n_samples, maxlen_x, neg_samples)).astype('int64') noclk_cat_his = np.zeros( (n_samples, maxlen_x, neg_samples)).astype('int64') mid_mask = np.zeros((n_samples, maxlen_x)).astype('float32') for idx, [s_x, s_y, no_sx, no_sy] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat)): mid_mask[idx, :lengths_x[idx] + 1] = 1. mid_his[idx, :lengths_x[idx]] = s_x cat_his[idx, :lengths_x[idx]] = s_y noclk_mid_his[idx, :lengths_x[idx], :] = no_sx noclk_cat_his[idx, :lengths_x[idx], :] = no_sy uids = np.array([inp[0] for inp in input], dtype=np.int64) mids = np.array([inp[1] for inp in input], dtype=np.int64) cats = np.array([inp[2] for inp in input], dtype=np.int64) id_values = np.ones([n_samples], np.float32) his_values = np.ones([n_samples * maxlen_x], np.float32) neg_his_values = np.ones( [n_samples * maxlen_x * neg_samples], np.float32) id_seg = np.array([i + 1 for i in range(n_samples)], dtype=np.int32) his_seg = np.array( [i + 1 for i in range(n_samples * maxlen_x)], dtype=np.int32) neg_his_seg = np.array( [i + 1 for i in range(n_samples * maxlen_x * neg_samples)], dtype=np.int32) results = [] for e in [uids, mids, cats]: results.append(np.reshape(e, (-1))) results.append(id_values) results.append(id_seg) for e in [mid_his, cat_his]: results.append(np.reshape(e, (-1))) results.append(his_values) results.append(his_seg) if return_neg: for e in [noclk_mid_his, noclk_cat_his]: results.append(np.reshape(e, (-1))) results.append(neg_his_values) results.append(neg_his_seg) results.extend( [mid_mask, np.array(target, dtype=np.float32), np.array(lengths_x, dtype=np.int32)]) # for split results.append(np.array([n_samples, n_samples], dtype=np.int32)) # shape results.extend([np.array([-1, self.embedding_dim], dtype=np.int32), np.array([-1, maxlen_x, self.embedding_dim], dtype=np.int32), np.array( [-1, maxlen_x, neg_samples, self.embedding_dim], dtype=np.int32), np.array([-1, maxlen_x], dtype=np.int32)]) return results
def train( train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 100, test_iter = 100, save_iter = 100, model_type = 'DNN', seed = 2, ): model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print ("Invalid model_type : %s", model_type) return # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print(' test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 for itr in range(3): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data(src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print(' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' %(iter)) model.save(sess, model_path+"--"+str(iter)) lr *= 0.5
def train( train_file = "local_train_splitByUser", test_file = "local_test_splitByUser", uid_voc = "uid_voc.pkl", mid_voc = "mid_voc.pkl", cat_voc = "cat_voc.pkl", batch_size = 128, maxlen = 100, test_iter = 100, save_iter = 100, model_type = 'DNN', data_type = 'FP32', seed = 2, ): print("batch_size: ", batch_size) print("model: ", model_type) model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() print("Number of uid = %i, mid = %i, cat = %i" % (n_uid, n_mid, n_cat)) #Number of uid = 543060, mid = 367983, cat = 1601 for Amazon dataset if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, data_type = data_type, batch_size = batch_size, max_length = maxlen) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, data_type, batch_size = batch_size, max_length = maxlen) else: print ("Invalid model_type : %s", model_type) return # for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): # print("global variable dtype: ", var.dtype) # if var.dtype == 'float32_ref': # print("global variable: ", var) # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) sys.stdout.flush() #print('test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f ---- eval_time: %.3f ---- num_iters: %d' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() iter = 0 lr = 0.001 train_size = 0 approximate_accelerator_time = 0 for itr in range(1): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data(src, tgt, maxlen, return_neg=True) start_time = time.time() loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats]) end_time = time.time() # print("training time of one batch: %.3f" % (end_time - start_time)) approximate_accelerator_time += end_time - start_time loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 train_size += batch_size sys.stdout.flush() if (iter % test_iter) == 0: # print("train_size: %d" % train_size) # print("approximate_accelerator_time: %.3f" % approximate_accelerator_time) print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) print(' test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f ---- eval_time: %.3f ---- num_iters: %d' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('save model iter: %d' %(iter)) model.save(sess, model_path+"--"+str(iter)) if train_size >= TOTAL_TRAIN_SIZE: break lr *= 0.5 if train_size >= TOTAL_TRAIN_SIZE: break print("iter: %d" % iter) print("Total recommendations: %d" % TOTAL_TRAIN_SIZE) print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time) print("Approximate accelerator performance in recommendations/second is %.3f" % (float(TOTAL_TRAIN_SIZE)/float(approximate_accelerator_time)))
def train(train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, test_iter=100, save_iter=100, model_type='DNN', data_type='FP32', seed=2, saved_models=args.saved_models): print("model: ", model_type) model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) end_2_end_start_time = time.time() tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) if tf_config['task']['type'] == 'ps': read_total_size = 128 #TOTAL_TRAIN_SIZE #1086120 else: read_total_size = TOTAL_TRAIN_SIZE #1086120 print("batch_size: ", batch_size) print("Trainning size:{}".format(read_total_size)) print("Loading and preprocessing data ...") train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, read_total_size, maxlen, shuffle_each_epoch=False) n_uid, n_mid, n_cat = train_data.get_n() print("n_uid:{}, n_mid:{}, n_cat:{}".format(n_uid, n_mid, n_cat)) tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) ps_nodes = tf_config.get("cluster", {})['ps'] worker_nodes = tf_config.get("cluster", {})['worker'] task_env = tf_config.get("task", {}) task_type = task_env.get("type", {}) task_index = task_env.get("index", {}) rpc_layer = tf_config.get("rpc_layer", {}) if task_type == 'ps': gpu_count = 0 else: gpu_count = len(worker_nodes) num_accelerators = {"GPU": gpu_count} ''' import pdb pdb.set_trace() cluster_spec = tf.train.ClusterSpec({ "ps": ps_nodes, "worker": worker_nodes, }) simple_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver(cluster_spec, task_type=task_type, task_id=task_index, num_accelerators=num_accelerators, rpc_layer=rpc_layer) # DistributedStrategy #strategy = tf.distribute.experimental.ParameterServerStrategy(cluster_resolver=simple_resolver) ''' strategy = tf.distribute.experimental.ParameterServerStrategy() # Create Estimator config = {} config['total_steps'] = 40000 config['n_uid'] = n_uid config['n_mid'] = n_mid config['n_cat'] = n_cat config['maxlen'] = maxlen config['model_type'] = model_type config['data_type'] = data_type config['batch_size'] = batch_size run_config = tf.estimator.RunConfig(train_distribute=strategy, session_config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=True), protocol='grpc+verbs') estimator = tf.estimator.Estimator(model_dir=saved_models, model_fn=model_fn, config=run_config, params=config) # The default is 128 read_samples_per_batch = batch_size start_time = time.time() # Training only #train_input_fn = tf.estimator.inputs.numpy_input_fn(x=features, y=y, batch_size=read_samples_per_batch, shuffle=True) #evaluate_input_fn = tf.estimator.inputs.numpy_input_fn(x=features, y=y, batch_size=read_samples_per_batch, shuffle=False) #estimator.train(input_fn, hooks=None, steps=None, max_steps=None, saving_listeners=None) #features, y = preprocess(train_data, maxlen) train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn_v3( train_data, maxlen, batch_size=batch_size), max_steps=config["total_steps"]) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn_v3( train_data, maxlen, batch_size=batch_size)) ''' # Train and Evaluate train_spec = tf.estimator.TrainSpec(input_fn= lambda: tf.estimator.inputs.numpy_input_fn(x=features, y=y, batch_size=read_samples_per_batch, shuffle=True),max_steps=config["total_steps"]) eval_spec = tf.estimator.EvalSpec(input_fn= lambda: tf.estimator.inputs.numpy_input_fn(x=features, y=y, batch_size=read_samples_per_batch, shuffle=False)) ''' # Generator Input #train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, True), # max_steps=config["total_steps"]) #eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen,False)) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) end_time = time.time() approximate_accelerator_time = end_time - start_time end2end_time = end_time - end_2_end_start_time print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time) print("Approximate end2end accelerator time in seconds is %.3f" % end2end_time) print( "Approximate accelerator performance in recommendations/second is %.3f" % (float(TOTAL_TRAIN_SIZE) / float(approximate_accelerator_time)))
def train( train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, batch_size=128, maxlen=100, test_iter=100, save_iter=100, model_type='DNN', seed=2, ): model_path = "../ckpt/" + model_type + str(seed) best_model_path = model_path #best_model_path = "../ckpt/" + model_type + str(seed) gpu_options = tf.GPUOptions(allow_growth=True) print("========model type is {0}=====".format(model_type)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False) test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) n_uid, n_mid, n_cat = train_data.get_n() if model_type == 'DNN': model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'PNN': model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'Wide': model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN': model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-att-gru': model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-gru-att': model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-qa-attGru': model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIN-V2-gru-vec-attGru': model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif model_type == 'DIEN': model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: print("Invalid model_type : %s", model_type) return # model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) # add tensordboard & timeline train_writer = tf.summary.FileWriter(model_path, sess.graph) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sys.stdout.flush() print('======start to train =========') #print('test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) sys.stdout.flush() start_time = time.time() iter = 0 lr = 0.001 for itr in range(3): loss_sum = 0.0 accuracy_sum = 0. aux_loss_sum = 0. for src, tgt in train_data: uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats = prepare_data( src, tgt, maxlen, return_neg=True) loss, acc, aux_loss = model.train(sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ]) loss_sum += loss accuracy_sum += acc aux_loss_sum += aux_loss iter += 1 sys.stdout.flush() if (iter % test_iter) == 0: print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- tran_aux_loss: %.4f' % \ (iter, loss_sum / test_iter, accuracy_sum / test_iter, aux_loss_sum / test_iter)) #print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, best_model_path)) loss_sum = 0.0 accuracy_sum = 0.0 aux_loss_sum = 0.0 if (iter % save_iter) == 0: print('==print run metadata and timeline ') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() loss, acc, aux_loss = model.train_with_metadata( sess, [ uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats ], run_options, run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % iter) trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_writer = open( model_path + '/chrome-trace-' + str(iter), 'w') trace_writer.write( trace.generate_chrome_trace_format(show_memory=True)) iter += 1 print('save model iter: %d' % (iter)) model.save(sess, model_path + "--" + str(iter)) lr *= 0.5
def train_generator(train_file="local_train_splitByUser", test_file="local_test_splitByUser", uid_voc="uid_voc.pkl", mid_voc="mid_voc.pkl", cat_voc="cat_voc.pkl", batch_size=128, maxlen=100, test_iter=100, save_iter=100, model_type='DNN', data_type='FP32', seed=2, saved_models=args.saved_models): print("model: ", model_type) model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) end_2_end_start_time = time.time() tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) if tf_config: if tf_config['task']['type'] == 'ps': read_total_size = 128 #TOTAL_TRAIN_SIZE #1086120 else: read_total_size = 128 #TOTAL_TRAIN_SIZE #1086120 else: read_total_size = 128 #TOTAL_TRAIN_SIZE #1086120 print("batch_size: ", batch_size) print("Trainning size:{}".format(read_total_size)) print("Loading and preprocessing data ...") train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, read_total_size, maxlen, shuffle_each_epoch=False) n_uid, n_mid, n_cat = train_data.get_n() print("n_uid:{}, n_mid:{}, n_cat:{}".format(n_uid, n_mid, n_cat)) # DistributedStrategy strategy = tf.distribute.experimental.ParameterServerStrategy() # Create Estimator config = {} config['total_steps'] = 40000 config['n_uid'] = n_uid config['n_mid'] = n_mid config['n_cat'] = n_cat config['maxlen'] = maxlen config['model_type'] = model_type config['data_type'] = data_type config['batch_size'] = batch_size run_config = tf.estimator.RunConfig(train_distribute=strategy, session_config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=True)) estimator = tf.estimator.Estimator(model_dir=saved_models, model_fn=model_fn, config=run_config, params=config) # The default is 128 read_samples_per_batch = batch_size start_time = time.time() # Generator Input def train_input_fn(): return input_fn_v2(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, True) def eval_input_fn(): return input_fn_v2(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, False) # Training only estimator.train(input_fn=lambda: train_input_fn, hooks=None, steps=None, max_steps=None, saving_listeners=None) ''' # Train and Evaluate train_spec = tf.estimator.TrainSpec(input_fn=lambda: train_input_fn, max_steps=config["total_steps"]) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: eval_input_fn) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) ''' end_time = time.time() approximate_accelerator_time = end_time - start_time end2end_time = end_time - end_2_end_start_time print("Approximate accelerator time in seconds is %.3f" % approximate_accelerator_time) print("Approximate end2end accelerator time in seconds is %.3f" % end2end_time) print( "Approximate accelerator performance in recommendations/second is %.3f" % (float(TOTAL_TRAIN_SIZE) / float(approximate_accelerator_time)))