def run_cv(s, fold, base_folder, params, dataset, classifier_builder): if file_exists(base_folder + "/models/model_" + str(fold) + "_model"): print("will not run") return if fold == -1: if file_exists(base_folder + "/models/model_final_net"): print("will not run") return # set random seeds set_seeds(s) create_dir_structure(base_folder) # build the final model if fold == -1: classifier = classifier_builder(**params) classifier.train(dataset, lc_file=base_folder + "/lcs/lc_fold_-1") ids, predictions, classes_d = classifier.evaluate(dataset) CrossValidation.write_results(ids, predictions, classes_d, -1, base_folder + "/predictions/") classifier.save_model(base_folder + "/models/model_final") ids, projection, classes = classifier.get_projection(dataset) cross_validation.save_projections( projection, base_folder + "/projections/fold_train_" + str(fold), ids) for class_name in classes_d.columns: ids, projection, classes = classifier.get_projection( dataset, class_name) cross_validation.save_projections( projection, base_folder + "/projections/class_fold_train_" + class_name + "_" + str(fold), ids) else: # run 1 fold of 10 cv, saving each model cross_valid = CrossValidation(dataset, classifier_builder, params, base_folder) cross_valid.run(fold)
rna.setImputDimNeurons(30) rna.setNumberNeuronsHiddenLayer(31) rna.setActivationFunctionHiddenLayer("tanh") rna.setNumberNeuronsOutputLayer(1) rna.setActivationFunctionOutputLayer("tanh") rna_classifier = RnaClassifier() rna_classifier.setRna(rna) #PREPROCESSADOR PARA ATRIBUTOS CATEGORICOS preprocessor = Preprocessor() preprocessor.setColumnsCategory(['protocol_type', 'service', 'flag']) evaluate = EvaluateModule() cross = CrossValidation() #DEFINIR A ITERACAO QUE O CROSS VALIDATION ESTA cross.setIteration(1) cross.setPreprocessor(preprocessor) cross.setFilePath("../../bases/sub_bases_nslkdd_30_attribute/") cross.setResultPath("../../results/30_attribute/rna_oculta_31_time/") cross.setClassifier(rna_classifier) cross.setEvaluateModule(evaluate) cross.run()
# from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import MultinomialNB import numpy as np from feature_engineering import FeatureEngineering from cross_validation import CrossValidation from multi_log_loss import multi_log_loss f = FeatureEngineering('../data/gender_age_train.csv', '../data/gender_age_test.csv', 'device_id', wide_files=[#'../features/apps_per_event.csv', '../features/avg_position.csv', #'../features/count_by_hour.csv', '../features/count_by_period.csv', '../features/event_counts.csv', '../features/sd_position.csv'], long_files=[#'../features/active_app_category_counts.csv', #'../features/installed_app_category_counts.csv', '../features/phone_brand.csv']) labels, features, colnames = f.extract_features() labels.set_index(np.arange(labels.shape[0]), inplace=True) colnames.set_index(np.arange(colnames.shape[0]), inplace=True) train_filter = [i for i, x in enumerate(labels['age'].tolist()) if not np.isnan(x)] test_filter = [i for i, x in enumerate(labels['age'].tolist()) if np.isnan(x)] cv = CrossValidation(features[train_filter, :], labels.ix[train_filter, 'group'], features[test_filter, :], multi_log_loss) model = MultinomialNB() model.predict = model.predict_proba out = cv.run(model, 'test')
@staticmethod def filter_X(X): # pass list so a DataFrame is returned return X.loc[:, ['age']] def my_roc(actual, predicted): actual = [int(x) for x in actual] return roc_auc_score(actual, predicted) train = pd.read_csv('gender_age_train.csv') train['train_fl'] = True train['gender'] = [float(x == 'M') for x in train['gender']] test = pd.read_csv('gender_age_test.csv') test['gender'] = '' test['age'] = -1 test['group'] = '' test['train_fl'] = False data = pd.concat([train, test]) cv = CrossValidation(data, my_roc, 'gender', 'train_fl', id_col='device_id', logged=True) model = MyLasso() cv.run(model, 'test_age_gender')
def _train(current_experiment, train_data_folder_path, train_labels_folder_path, train_ids_path): # this must be imported after setting CUDA_VISIBLE_DEVICES environment variable, otherwise it won't work from cross_validation import CrossValidation cv = CrossValidation(current_experiment) cv.run(train_data_folder_path, train_labels_folder_path, train_ids_path)