from fusion import cv10 from fusion import dt from fusion import lr_feature_selection from fusion import knn from standardized_data import * from thematic_data_combined import combine_data_from_feature_selection from svms import svm_selected_for_features_fusion if __name__ == "__main__": spreadsheet = Spreadsheet(project_data_file) data = Data(spreadsheet) targets = data.targets ids = data.ids percentage = float(raw_input("Enter percentage.")) combined_dataset, targets = combine_data_from_feature_selection(targets, percentage) alg = raw_input("Enter algorithm. Choose lr, dt, knn, svm") fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn") for i in range(100): if alg == "lr": cv10(combined_dataset, targets, fusion_algorithm, ids, lr_feature_selection, prt=True, file_name="best_lr_"+str(percentage)+alg+"_"+fusion_algorithm+".txt") elif alg == "dt": cv10(combined_dataset, targets, fusion_algorithm, ids, dt, prt=True, file_name="best_dt_"+str(percentage)+alg+"_"+fusion_algorithm+".txt") elif alg == "knn": cv10(combined_dataset, targets, fusion_algorithm, ids, knn, prt=True, file_name="best_knn_"+str(percentage)+alg+"_"+fusion_algorithm+".txt") elif alg == "svm": std = StandardizedData(targets) dataset = std.standardize_dataset(combined_dataset)
""" Logistic Regression Classification Combine LR for themes Feature selection is applied before """ print(__doc__) import sys sys.path.insert(0, 'utils/') sys.path.insert(0, 'feature context/') from load_data import * from project_data import * from fusion import cv10 from fusion import lr_feature_selection from thematic_data_combined import combine_data_from_feature_selection from parameters import CV_PERCENTAGE_OCCURENCE_THRESHOLD if __name__ == "__main__": spreadsheet = Spreadsheet(project_data_file) data = Data(spreadsheet) targets = data.targets ids = data.ids combined_dataset, targets = combine_data_from_feature_selection(targets, CV_PERCENTAGE_OCCURENCE_THRESHOLD) fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn") cv10(combined_dataset, targets, fusion_algorithm, ids, lr_feature_selection)
C_ideo, g_ideo = params() C_range = [C_net, C_ill, C_ideo] g_range = [g_net, g_ill, g_ideo] for cs in itertools.product(*C_range): for gs in itertools.product(*g_range): c_net = cs[0] c_ill = cs[1] c_ideo = cs[2] g_net = gs[0] g_ill = gs[1] g_ideo = gs[2] best_svm = BestSVM(c_net, g_net, c_ill, g_ill, c_ideo, g_ideo) combined_dataset, targets = combine_data_from_feature_selection(targets, 0.9) std = StandardizedData(targets) dataset = std.standardize_dataset(combined_dataset) error, f1 = cross_validation(dataset, targets, best_svm) if error <= 0.33 and f1 > 0: with open("result.txt", "a") as myfile: myfile.write('\n##############################\n') with open("result.txt", "a") as myfile: myfile.write(best_svm.to_string()) with open("result.txt", "a") as myfile: myfile.write('\nerror_maj %f' % error) with open("result.txt", "a") as myfile: myfile.write('\nf1 %f' % f1)