def build_model(features_dict, vocabularies): model = DCN( features_dict=features_dict, vocabularies=vocabularies, num_cross_layers=FLAGS.num_cross_layers, num_deep_layers=FLAGS.num_deep_layers, deep_layer_size=FLAGS.deep_layer_size, model_structure=FLAGS.model_structure, embedding_dim=FLAGS.embedding_dim, projection_dim=FLAGS.projection_dim, l2_penalty=FLAGS.l2_penalty, ) return model
def main(learning_rate, epochs, hidden_units): """ feature_columns is a list and contains two dict: - dense_features: {feat: dense_feature_name} - sparse_features: {feat: sparse_feature_name, feat_num: the number of this feature, embed_dim: the embedding dimension of this feature } train_X: [dense_train_X, sparse_train_X] test_X: [dense_test_X, sparse_test_X] """ feature_columns, train_X, test_X, train_y, test_y = create_dataset() # ============================Build Model========================== model = DCN(feature_columns, hidden_units) model.summary() # =============================Tensorboard========================= current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") log_dir = 'logs/' + current_time tensorboard = tf.keras.callbacks.TensorBoard( log_dir=log_dir, histogram_freq=1, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq=500 ) # ============================model checkpoint====================== check_path = 'save/dcn_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, verbose=1, period=4) # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ===========================Fit============================== model.fit( train_X, train_y, epochs=epochs, callbacks=[tensorboard, checkpoint], batch_size=128, validation_split=0.2 ) # ===========================Test============================== print('test AUC: %f' % model.evaluate(test_X, test_y)[1])
dnn_dropout = 0.5 hidden_units = [256, 128, 64] learning_rate = 0.001 batch_size = 4096 epochs = 10 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== model = DCN(feature_columns, hidden_units, dnn_dropout=dnn_dropout) model.summary() # ============================model checkpoint====================== # check_path = 'save/dcn_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ===========================Fit============================== model.fit( train_X, train_y, epochs=epochs, callbacks=[
def run_DCN(): dcn = DCN.DCN(field_size, feature_sizes, batch_size=32 * 8, verbose=True, use_cuda=True, weight_decay=0.00002, use_inner_product=True, n_epochs=num_epoch) if online = False: dcn.fit(Xi_train, Xv_train, y_train, Xi_test, Xv_test, y_test, ealry_stopping=True,refit=True)
import tensorflow as tf from tensorflow.keras import losses, optimizers from sklearn.metrics import accuracy_score if __name__ == '__main__': file = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt' test_size = 0.4 hidden_units = [256, 128, 64] feature_columns, (X_train, y_train), (X_test, y_test) = create_criteo_dataset( file, test_size=test_size) model = DCN(feature_columns, hidden_units, 1, activation='relu', layer_num=6) optimizer = optimizers.SGD(0.01) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) train_dataset = train_dataset.batch(32).prefetch( tf.data.experimental.AUTOTUNE) # model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) # model.fit(train_dataset, epochs=100) # logloss, auc = model.evaluate(X_test, y_test) # print('logloss {}\nAUC {}'.format(round(logloss,2), round(auc,2))) # model.summary() summary_writer = tf.summary.create_file_writer(
dnn_dropout = 0.5 hidden_units = [256, 128, 64] learning_rate = 0.001 batch_size = 512 epochs = 5 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== model = DCN(feature_columns, hidden_units, dnn_dropout) model.summary() # ============================model checkpoint====================== # check_path = 'save/dcn_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ===========================Fit============================== model.fit( train_X, train_y, epochs=epochs, # callbacks=[tensorboard, checkpoint],
mean = data[feat].mean() std = data[feat].std() data[feat] = (data[feat] - mean) / (std + 1e-12) # print(data.shape) # print(data.head()) train, valid = train_test_split(data, test_size=0.1, random_state=42) # print(train.shape) # (540000, 40) # print(valid.shape) # (60000, 40) train_dataset = TensorDataset( torch.LongTensor(train[sparse_features].values), torch.FloatTensor(train[dense_features].values), torch.FloatTensor(train['label'].values)) train_loader = DataLoader(dataset=train_dataset, batch_size=args.train_batch_size, shuffle=True) valid_dataset = TensorDataset( torch.LongTensor(valid[sparse_features].values), torch.FloatTensor(valid[dense_features].values), torch.FloatTensor(valid['label'].values)) valid_loader = DataLoader(dataset=valid_dataset, batch_size=args.eval_batch_size, shuffle=False) cat_fea_unique = [data[f].nunique() for f in sparse_features] model = DCN(cat_fea_unique, num_fea_size=len(dense_features)) train_model(model)