def once_a_hour(): log.info("main", "once_a_hour") logline = "" for i in range(20): logline += str(values[i]) + ";" logline = logline.replace(' ', '') log.line(logline) return
def load_data(feature, classes, components_cnt): log.line() log.dataset_configuration(feature, classes, components_cnt) log.message('Loading data...') start = time.time() train_data, train_labels, test_data, test_labels = d.read_test_and_train_data(feature, classes) log.time(time.time() - start) log.dataset_summary(len(train_data), len(test_data)) return train_data, train_labels, test_data, test_labels
def transform_data(data, components_cnt): train_data, train_labels, test_data, test_labels = data log.line() start = time.time() log.message('pca...') train_data, test_data = transform.perform_pca(train_data, test_data, components_cnt) log.time(time.time() - start) start = time.time() log.message('normalize...') train_data, test_data = transform.normalize_data(train_data, test_data) log.time(time.time() - start) log.dataset_summary(len(train_data), len(test_data)) return train_data, train_labels, test_data, test_labels
def transform_data_kernel(data, components_cnt, kernel_params): train_data, train_labels, test_data, test_labels = data log.line() start = time.time() log.message('pca...') if kernel_params[0] == 'chi2': train_data, test_data = transform.normalize_data(train_data, test_data) train_data, test_data = transform.perform_kernel_pca(train_data, test_data, components_cnt, kernel_params) log.time(time.time() - start) start = time.time() log.message('normalize...') train_data, test_data = transform.normalize_data(train_data, test_data) log.time(time.time() - start) log.dataset_summary(len(train_data), len(test_data)) return train_data, train_labels, test_data, test_labels
def parameters_search(score, params, data, pair, n_jobs=8): # Create a pipeline where our custom predefined kernel Chi2Kernel # is run before SVC. pipe = Pipeline([ ('chi2', Chi2Kernel()), ('svm', svm.SVC()), ]) A_train, y_train, A_test, y_test = data log.line() print("# Tuning hyper-parameters for %s\n" % score) clf = GridSearchCV(pipe, params, cv=5, scoring=score, n_jobs=n_jobs) start = time.time() clf.fit(A_train, y_train) log.time(time.time() - start) print("Best parameters set found on development set:\n%s\n" % clf.best_params_) print("\nGrid scores on development set:\n") means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) print() y_pred = clf.predict(A_test) report = classification_report(y_test, y_pred) print(report) with open(const.RESULTS_FILE_NAME, "a") as file: file.write(str(pair) + "\n") for mean, std, params in zip(means, stds, clf.cv_results_['params']): file.write("%0.3f (+/-%0.03f) for %r\n" % (mean, std * 2, params)) file.write("\n%s\n\n" % clf.best_params_) file.write(report + "\n\n") return clf
def parameters_search(score, params, data, pair, n_jobs=8): train_data, train_labels, test_data, test_labels = data log.line() print("# Tuning hyper-parameters for %s\n" % score) clf = GridSearchCV( svm.SVC(C=1), params, cv=5, scoring=score, n_jobs=n_jobs ) start = time.time() clf.fit(train_data, train_labels) log.time(time.time() - start) print("Best parameters set found on development set:\n%s\n" % clf.best_params_) print("\nGrid scores on development set:\n") means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) pred_labels = clf.predict(test_data) report = classification_report(test_labels, pred_labels) print(report) with open(const.RESULTS_FILE_NAME, "a") as file: file.write(str(pair) + "\n") for mean, std, params in zip(means, stds, clf.cv_results_['params']): file.write("%0.3f (+/-%0.03f) for %r\n" % (mean, std * 2, params)) file.write("\n%s\n\n" % clf.best_params_) file.write(report + "\n\n") return clf