file = '../dataset/Criteo/train.txt' read_part = True sample_num = 100000 test_size = 0.2 embed_dim = 8 dnn_dropout = 0.5 hidden_units = [256, 128, 64] learning_rate = 0.001 batch_size = 512 epochs = 5 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== model = NFM(feature_columns, hidden_units, dropout_rate=dnn_dropout) model.summary() # ============================model checkpoint====================== # check_path = 'save/nfm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ===========================Fit==============================
# Author : junchaoli # File : train.py ''' from model import CCPM from utils import create_criteo_dataset import tensorflow as tf from tensorflow.keras import optimizers, losses from sklearn.metrics import accuracy_score if __name__ == '__main__': file_path = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt' feature_columns, (X_train, y_train), (X_test, y_test) = create_criteo_dataset(file_path, test_size=0.2) hidden_units = [128] dropout = 0.2 filters = [4, 4] kernel_width = [6, 5] model = CCPM(feature_columns, hidden_units, dropout=dropout, filters=filters, kernel_width=kernel_width) optimizer = optimizers.SGD(0.01) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) train_dataset = train_dataset.batch(32).prefetch( tf.data.experimental.AUTOTUNE)
''' from model import AutoInt from utils import create_criteo_dataset import numpy as np import tensorflow as tf from tensorflow.keras import losses, optimizers from sklearn.metrics import accuracy_score if __name__ == '__main__': file = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt' test_size = 0.1 hidden_units = [256, 128, 64] feature_columns, (X_train, y_train), (X_test, y_test) = create_criteo_dataset( file, test_size=test_size) model = AutoInt(feature_columns, hidden_units, dnn_dropout=0.2, n_heads=4, head_dim=16, att_dropout=0.2) optimizer = optimizers.SGD(0.01) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) train_dataset = train_dataset.batch(32).prefetch( tf.data.experimental.AUTOTUNE) summary_writer = tf.summary.create_file_writer( 'E:\\PycharmProjects\\tensorboard')
from utils import create_criteo_dataset import tensorflow as tf from tensorflow.keras import optimizers, losses, metrics from sklearn.metrics import accuracy_score import argparse parser = argparse.ArgumentParser(description='命令行参数') parser.add_argument('-k', type=int, help='v_dim', default=8) parser.add_argument('-w_reg', type=float, help='w正则', default=1e-4) parser.add_argument('-v_reg', type=float, help='v正则', default=1e-4) args=parser.parse_args() if __name__ == '__main__': file_path = 'train.txt' (X_train, y_train), (X_test, y_test) = create_criteo_dataset(file_path, test_size=0.5) k = args.k w_reg = args.w_reg v_reg = args.v_reg model = FM(k, w_reg, v_reg) optimizer = optimizers.SGD(0.01) # train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) # train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE) # model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy']) # model.fit(train_dataset, epochs=200) # print(model.evaluate(X_test, y_test)) # model.summary() summary_writer = tf.summary.create_file_writer('E:\\PycharmProjects\\tensorboard')
from sklearn.metrics import confusion_matrix, roc_curve import matplotlib.pyplot as plt import seaborn as sns file = './criteo_sampled_data.csv' read_part = True sample_num = 100000 test_size = 0.2 k = 32 learning_rate = 0.001 batch_size = 500 epochs = 100 feature_columns, train, test, val = create_criteo_dataset( file=file, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test val_X, val_y = val model = FM(feature_columns=feature_columns, k=k) model.summary() early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_auc', verbose=1, patience=10, mode='max', restore_best_weights=True) model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
# Author : junchaoli # File : train.py ''' from model import DCN from utils import create_criteo_dataset import tensorflow as tf from tensorflow.keras import losses, optimizers from sklearn.metrics import accuracy_score if __name__ == '__main__': file = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt' test_size = 0.4 hidden_units = [256, 128, 64] feature_columns, (X_train, y_train), (X_test, y_test) = create_criteo_dataset(file, test_size=test_size) model = DCN(feature_columns, hidden_units, 1, activation='relu', layer_num=6) optimizer = optimizers.SGD(0.01) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE) # model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) # model.fit(train_dataset, epochs=100) # logloss, auc = model.evaluate(X_test, y_test) # print('logloss {}\nAUC {}'.format(round(logloss,2), round(auc,2))) # model.summary() summary_writer = tf.summary.create_file_writer('E:\\PycharmProjects\\tensorboard') for i in range(100):
# -*- encoding: utf-8 -*- ''' @File : data_test.py @Time : 2020/09/13 16:34:13 @Author : Peng He @Version : 1.0 @Contact : [email protected] @IDE : Visual Studio Code @License : (C)Copyright 2020, CUG @Desc : None ''' from utils import create_criteo_dataset # here put the import lib if __name__ == "__main__": feature_columns, (train_X, train_Y), ( test_X, test_Y ) = create_criteo_dataset( 'D:/data/Chrome-Download/198459_438654_bundle_archive/train_1m.txt') print(feature_columns)