def cnn(filters, pooling_size=2, epochs=15, table_folder="/", kernel_size=3, input_dim=34, batch_size=32, nb_filters=34, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) print("Creating layers...") model = Sequential() model.add( Conv1D(nb_filters, kernel_size=kernel_size, input_shape=(timesteps, input_dim), activation='relu')) model.add(MaxPooling1D(pooling_size)) # model.add(Conv1D(nb_filters, kernel_size=kernel_size, activation='relu')) # model.add(Conv1D(nb_filters*2, kernel_size=3, activation='relu')) # model.add(GlobalAveragePooling1D()) # model.add(Dropout(0.5)) model.add(Flatten()) # model.add(Dense(128, activation='relu')) model.add(Dense(1, activation='sigmoid')) print("Compiling model...") model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=[ 'accuracy', keras_metrics.precision(), keras_metrics.recall(), keras_metrics.f1_score() ]) print("Fitting model...") print(model.summary()) callback = [EarlyStopping(monitor='loss', patience=5)] history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, epochs=epochs) #, callbacks=callback) score = model.evaluate(X_test, y_test, batch_size=batch_size) y_pred = model.predict(X_test) log_to_csv("cnn", score, history, filters, table_folder, input_dim, batch_size, time_from, time_to, model.to_json(), nb_filters, kernel_size) return [score, history, churn_number, total_number, y_pred]
def svm_run(filters, c_range=1.0, kernel_type='rbf', gamma='auto', train_sizes=[15, 100, 300, 500, 800], table_folder="/", save_file=None, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) X_train = list(map(lambda x: x.flatten(), X_train)) X_test = list(map(lambda x: x.flatten(), X_test)) clf = svm.SVC(kernel=kernel_type, gamma=gamma, C=c_range) # train_sizes, train_scores, validation_scores = learning_curve(clf, X_train, y_train, train_sizes=train_sizes, cv=5, shuffle=True, scoring='f1') train_sizes, train_scores, validation_scores = training_curve( clf, X_train, y_train, X_test, y_test, train_sizes=train_sizes, shuffle=True, scoring='precision', train_last=True) # print(train_scores, valid_scores) clf.fit(X_train, y_train) # cross_val_score(clf, X_train, y_train, scoring='recall_macro', cv=5) y_pred = clf.predict(X_test) if kernel_type == 'linear': feature_importances = clf.coef_.flatten() else: feature_importances = [] scores = [ accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), hinge_loss(y_test, y_pred), f1_score(y_test, y_pred) ] # print_feature_importances(feature_importances, feature_names, all=True) print(y_pred) return [ y_pred, y_test, feature_importances, scores, train_sizes, train_scores, validation_scores, churn_number, total_number, feature_names ]
def lstm5(filters, epochs=15, table_folder="/", save_file=None, input_dim=34, batch_size=32, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) print("Creating layers...") model = Sequential() model.add( LSTM(34, input_length=timesteps, input_dim=34, return_sequences=True)) model.add(Dropout(0.2)) model.add(LSTM(input_dim, return_sequences=True)) model.add(Dropout(0.2)) # model.add(LSTM(input_dim, return_sequences=True)) # model.add(Dropout(0.2)) # model.add(LSTM(input_dim, return_sequences=True)) # model.add(Dropout(0.2)) model.add(LSTM(input_dim)) model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) print("Compiling model...") model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=[ 'accuracy', keras_metrics.precision(), keras_metrics.recall(), keras_metrics.f1_score() ]) print("Fitting model...") print(model.summary()) callback = [EarlyStopping(monitor='val_loss', patience=5)] history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=batch_size, epochs=epochs) #, callbacks=callback) score = model.evaluate(X_test, y_test, batch_size=batch_size) y_pred = model.predict(X_test) log_to_csv("lstm", score, history, filters, table_folder, input_dim, batch_size, time_from, time_to, model.to_json()) return [score, history, churn_number, total_number, y_pred]
def optimize_svm_hyperparameters(filters, train_sizes=[15, 100, 300, 500, 800], table_folder="/", save_file=None, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) X_train = list(map(lambda x: x.flatten(), X_train)) X_test = list(map(lambda x: x.flatten(), X_test)) clf = svm.SVC() grid_result = grid_search_svm(clf, c_ranges, gammas, kernels, X_train, y_train, X_test, y_test) print(grid_result)
def optimize_rf_hyperparameters(filters, train_sizes=[15, 100, 300, 500, 800], table_folder="/", save_file=None, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) X_train = list(map(lambda x: x.flatten(), X_train)) X_test = list(map(lambda x: x.flatten(), X_test)) clf = RandomForestClassifier() grid_result = grid_search_rf(clf, n_estimators, max_depth, min_samples_splits, min_samples_leafs, max_featuress, X_train, y_train, X_test, y_test) print(grid_result)
def rf_run(filters, n_estimators=10, max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features="auto", train_sizes=[15, 100, 300, 500, 800], epochs=15, table_folder="/", save_file=None, input_dim=34, batch_size=32, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number, feature_names = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) X_train = list(map(lambda x: x.flatten(), X_train)) X_test = list(map(lambda x: x.flatten(), X_test)) clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_features=max_features) # train_sizes, train_scores, validation_scores = learning_curve(clf, X_train, y_train, train_sizes=train_sizes, cv=5, shuffle=True, scoring='f1') train_sizes, train_scores, validation_scores = training_curve( clf, X_train, y_train, X_test, y_test, train_sizes=train_sizes, shuffle=True, scoring='precision', train_last=True) # exit() # print(train_sizes) # print(train_scores) # print(validation_scores) clf.fit(X_train, y_train) # cross_val_score(clf, X_train, y_train, scoring='recall_macro', cv=5) y_pred = clf.predict(X_test) feature_importances = clf.feature_importances_ scores = [ accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), hinge_loss(y_test, y_pred), f1_score(y_test, y_pred) ] # print_feature_importances(feature_importances, feature_names) # exit() print(len(y_pred)) print(len(y_test)) return [ y_pred, y_test, feature_importances, scores, train_sizes, train_scores, validation_scores, churn_number, total_number, feature_names ]
def lstm(filters, epochs=15, table_folder="/", save_file=None, input_dim=34, batch_size=32, time_from=32, time_to=8, downsample_ratio=None, oversample=None): timesteps = time_from - time_to X_train, X_test, y_train, y_test, churn_number, total_number = import_and_preprocess_table( timesteps, time_from, time_to, filters, table_folder, downsample_ratio, oversample) print("Creating layers...") kfold = StratifiedKFold(n_splits=5, shuffle=True) scores = [] histories = [] churn_numbers = [] total_numbers = [] history_names = [i for i in range(0, kfold.get_n_splits())] for train, test in kfold.split(np.zeros(len(y_train)), y_train): model = Sequential() model.add( LSTM(input_dim, input_length=timesteps, input_dim=input_dim, return_sequences=True)) model.add(LSTM(input_dim)) model.add(Dense(1, activation='sigmoid')) print("Compiling model...") model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy']) print("Fitting model...") print(model.summary()) callback = [EarlyStopping(monitor='val_loss', patience=5)] history = model.fit(X_train[test], y_train[test], validation_data=(X_test, y_test), batch_size=batch_size, epochs=epochs) #, callbacks=callback) score = model.evaluate(X_test, y_test, batch_size=batch_size) scores.append(score) histories.append(history) churn_numbers.append(churn_number) total_numbers.append(total_number) log_to_csv("lstm", score, history, filters, table_folder, input_dim, batch_size, time_from, time_to, model.to_json()) plot_to_file(histories=histories, history_names=history_names, plot_types=["acc", "loss"], algorithm="lstm", save_file="regular") pretty_print_scores(scores, history_names, churn_numbers, total_numbers)