def do_cross_validation(): summaries = [] for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print 'Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name) scores = [] for target in targets: print 'Processing %s (classifier %s)' % (target, classifier_name) task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, # target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) data = CrossValidationScoreTask(task_core).run() score = data.score scores.append(score) print '%.3f' % score if len(scores) > 0: name = pipeline.get_name() + '_' + classifier_name summary = get_score_summary(name, scores) summaries.append((summary, np.mean(scores))) print summary print_results(summaries)
def train_full_model(make_predictions): for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print 'Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name) guesses = ['clip,seizure,early'] classifier_filenames = [] for target in targets: task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_ictal=pipeline.gen_ictal, cv_ratio=cv_ratio) if make_predictions: predictions = MakePredictionsTask(task_core).run() guesses.append(predictions.data) else: task = TrainClassifierTask(task_core) task.run() classifier_filenames.append(task.filename()) if make_predictions: filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name()) filename = os.path.join(submission_dir, filename) with open(filename, 'w') as f: print >> f, '\n'.join(guesses) print 'wrote', filename else: print 'Trained classifiers ready in %s' % cache_dir for filename in classifier_filenames: print os.path.join(cache_dir, filename + '.pickle')
def do_cross_validation(): for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print 'Using pipeline %s with classifier %s' % ( pipeline.get_name(), classifier_name) scores = [] for target in targets: print 'Processing %s (classifier %s)' % (target, classifier_name) task_core = TaskCore( cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_ictal=pipeline.gen_ictal, cv_ratio=cv_ratio) data = CrossValidationScoreTask(task_core).run() score = data.score scores.append(score) print target, 'Seizure_AUC=', data.S_auc, 'Early_AUC=', data.E_auc
def do_cross_validation(): summaries = [] for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print('Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)) scores = [] S_scores = [] E_scores = [] for target in targets: print('Processing %s (classifier %s)' % (target, classifier_name)) task_core = TaskCore( cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_ictal=pipeline.gen_ictal, cv_ratio=cv_ratio) data = CrossValidationScoreTask(task_core).run() score = data.score scores.append(score) print('%.3f' % score, 'S=%.4f' % data.S_auc, 'E=%.4f' % data.E_auc) S_scores.append(data.S_auc) E_scores.append(data.E_auc) if len(scores) > 0: name = pipeline.get_name() + '_' + classifier_name summary = get_score_summary(name, scores) summaries.append((summary, np.mean(scores))) print(summary) if len(S_scores) > 0: name = pipeline.get_name() + '_' + classifier_name summary = get_score_summary(name, S_scores) print('S', summary) if len(E_scores) > 0: name = pipeline.get_name() + '_' + classifier_name summary = get_score_summary(name, E_scores) print('E', summary) print_results(summaries)
def train_full_model(make_predictions): for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print('Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)) guesses = ['clip,preictal'] classifier_filenames = [] plot2file = PdfPages( os.path.join(figure_dir, ('figure%d-_%s_%s_.pdf' % (ts, classifier_name, pipeline.get_name())))) for target in targets: task_core = TaskCore( cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio, plot2file=plot2file) if make_predictions: predictions = MakePredictionsTask(task_core).run() guesses.append(predictions.data) else: task = TrainClassifierTask(task_core) task.run() classifier_filenames.append(task.filename()) if make_predictions: filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name()) filename = os.path.join(submission_dir, filename) with open(filename, 'w') as f: print('\n'.join(guesses), file=f) print('wrote', filename) else: print('Trained classifiers ready in %s' % cache_dir) for filename in classifier_filenames: print(os.path.join(cache_dir, filename + '.pickle')) plot2file.close()
def train_full_model(make_predictions): for pipeline in pipelines: for classifier in classifiers: print 'Using pipeline %s with classifier %s' % ( pipeline.get_name(), classifier) guesses = ['File,Class'] classifier_filenames = [] #plot2file = PdfPages(os.path.join(figure_dir, ('figure%d-_%s_%s_.pdf' % (ts, classifier, pipeline.get_name())))) for target in targets: task_core = TaskCore( cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio, bin_size=bin_size) if make_predictions: predictions = MakePredictionsTask(task_core).run() guesses.append(predictions.data) else: # task = TrainClassifierTask(task_core) # task.run() # classifier_filenames.append(task.filename()) print 'not implemented' if make_predictions: filename = 'submission%d-%s_%s.csv' % (ts, classifier, pipeline.get_name()) filename = os.path.join(submission_dir, filename) with open(filename, 'w') as f: print >> f, '\n'.join(guesses) print 'wrote', filename else: print 'Trained classifiers ready in %s' % cache_dir for filename in classifier_filenames: print os.path.join(cache_dir, filename + '.pickle')
def predict_all(make_predictions): for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print('Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)) lines = ['clip,preictal'] subjectID = 0 X_train = y_train = X_test = test_size = [] for target in targets: task_core = TaskCore( cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) data = GetCrossSubjectDataTask(task_core).run() # a = np.shape(data.X_test)[0] test_size.append(np.shape(data.X_test)[0]) if subjectID > 0: X_train = np.concatenate((X_train, data.X_train), axis=0) y_train = np.concatenate((y_train, data.y_train), axis=0) X_test = np.concatenate((X_test, data.X_test), axis=0) else: X_train = data.X_train y_train = data.y_train X_test = data.X_test subjectID += 1 #Training task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir, target=[], pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) y_train = np.ceil(0.1 * y_train) y_train.astype('int_') if should_normalize(classifier): X_train, temp = normalize_data(X_train, X_train) print("Training ...") print('Dim', np.shape(X_train), np.shape(y_train)) start = time.get_seconds() classifier.fit(X_train, y_train) elapsedSecs = time.get_seconds() - start print("t=%ds" % int(elapsedSecs)) y_estimate = classifier.predict_proba(X_train) lr = LogisticRegression(random_state=0) lr.fit(y_estimate, y_train) predictions_proba = classifier.predict_proba(X_test) predictions_calibrated = lr.predict_proba(predictions_proba) #output m = 0 totalSample = 12 startIdx = 0 for target in targets: for i in range(test_size[m] / totalSample): j = i + 1 if j < 10: nstr = '000%d' % j elif j < 100: nstr = '00%d' % j elif j < 1000: nstr = '0%d' % j else: nstr = '%d' % j preictalOverAllSample = 0 for k in range(totalSample): p = predictions_calibrated[i * totalSample + k + startIdx] preictal = translate_prediction(p) preictalOverAllSample += preictal / totalSample newline = '%s_test_segment_%s.mat,%.15f' % ( target, nstr, preictalOverAllSample) lines.append(newline) print(newline) startIdx = startIdx + test_size[m] m += 1 filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name()) filename = os.path.join(submission_dir, filename) with open(filename, 'w') as f: print('\n'.join(lines), file=f) print('wrote', filename)