def main(dataPath, dataPath_val, batch_size): # must have list of training files files = glob.glob(dataPath + "/*.csv")[::5] # validation files files_val = glob.glob(dataPath_val + "/*.csv")[::5] # Count number of examples in training data nexs = get_total_examples(files) print("Number of training examples: ", nexs) nexs_val = get_total_examples(files_val) print("Number of validation examples: ", nexs_val) # Create data generator train_gen = DataGenerator(files, nexs, batch_size=batch_size) val_gen = DataGenerator(files_val, nexs_val, batch_size=batch_size) linear_feature_columns = train_gen.linear_feature_columns dnn_feature_columns = train_gen.dnn_feature_columns # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0) model.compile( optimizer, "binary_crossentropy", metrics=['binary_crossentropy', auroc], ) pbar = ProgbarLogger(count_mode='steps', stateful_metrics=None) weights_file = "model-5-lr0p001.h5" model_checkpoint = ModelCheckpoint(weights_file, monitor="val_binary_crossentropy", save_best_only=True, save_weights_only=True, verbose=1) history = model.fit_generator(train_gen, epochs=10, verbose=1, steps_per_epoch=nexs / batch_size, validation_data=val_gen, validation_steps=nexs / batch_size, callbacks=[model_checkpoint])
monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) #history = model.fit_generator(generate_arrays_from_file('./data/sample/feature_mapped_valid.data', batch_size=batch_size), # steps_per_epoch=int(np.ceil(num_train/batch_size)), callbacks=[checkpoint], epochs=50, verbose=1, # validation_data=generate_arrays_from_file('./data/sample/feature_mapped_valid.data', batch_size=batch_size), # validation_steps=int(np.ceil(num_valid/batch_size))) history = model.fit_generator( generate_arrays_from_file( './data/feature_mapped_combined_train.data', batch_size=batch_size), steps_per_epoch=int(np.ceil(num_train / batch_size)), callbacks=[checkpoint], epochs=50, verbose=1, validation_data=generate_arrays_from_file( './data/feature_mapped_combined_valid.data', batch_size=batch_size), validation_steps=int(np.ceil(num_valid / batch_size))) elif mode == 'test': # model.load_weights('model_save/deep_fm_fn-ep002-loss0.148-val_loss0.174.h5') # auc: 0.718467 batch_size=6000 #model.load_weights('model_save/deep_fm_fn-ep001-loss0.149-val_loss0.175.h5') # auc: 0.714243 batch_size = 2048 # model.load_weights('model_save/deep_fm_fn-ep005-loss0.147-val_loss0.173.h5') # auc: 0.722535 batch_size = 10000 # model.load_weights('model_save/deep_fm_fn_bs10000-ep001-loss0.155-val_loss0.153.h5') # auc: 0.738023 #model.load_weights('model_save/deep_fm_fn_bs15000-ep001-loss0.156-val_loss0.152.h5') # auc: 0.739935 #model.load_weights('model_save/deep_fm_fn-ep002-loss0.154-val_loss0.154-bs15000-ee20-hz[128, 128].h5') # auc: 0.741590 model.load_weights( 'model_save/deep_fm_fn-ep020-loss0.153-val_loss0.153-bs15000-ee20-hz[5, 600].h5'
return dataset linear_feature_columns = varlen_feature_columns + fixed_feature_columns dnn_feature_columns = varlen_feature_columns + fixed_feature_columns callbacks = [] GPU = True if GPU: strategy = tf.distribute.MirroredStrategy(devices=['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3']) # strategy = tf.distribute.MirroredStrategy(devices=['/gpu:3']) with strategy.scope(): model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[1024, 512, 256], task='binary', dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False) model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy', tf.keras.metrics.AUC()]) # model.run_eagerly = True model.fit_generator(generator=get_dataset(), steps_per_epoch=None, epochs=10, verbose=2, callbacks=callbacks, validation_data=get_dataset(eval_data_path), validation_steps=None, validation_freq=1, class_weight=None, max_queue_size=100, workers=10, use_multiprocessing=False, shuffle=True, initial_epoch=0) tf.saved_model.save(model, "./models") else: model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[1024, 512, 256], task='binary', dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False) model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy', tf.keras.metrics.AUC()]) model.run_eagerly = True model.fit_generator(generator=get_dataset(), steps_per_epoch=None, epochs=10, verbose=2, callbacks=callbacks, validation_data=get_dataset(eval_data_path), validation_steps=None, validation_freq=1, class_weight=None, max_queue_size=100, workers=10, use_multiprocessing=False, shuffle=True, initial_epoch=0)