def plot_pr(self, ml_method, classifier, n_pca): title = compose_configuration(f'PR curves of {ml_method} {classifier}', self.config['filter_latent'], self.config['standardization'], n_pca, self.name) print(f"Plotting {title}") plt.figure(figsize=(6.5, 4.3333)) self.plot_pr_class(ml_method, classifier, 'micro', f'micro-average', ':', color_phase_dict['micro']) self.plot_macro_pr(ml_method, classifier, 'macro-average', ':', color_phase_dict['macro']) for class_ in ['immediate-early', 'early', 'late']: self.plot_pr_class(ml_method, classifier, class_, f'{class_}', '-', color_phase_dict[class_]) plt.legend() # plt.title(title) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.tight_layout() filename = compose_filename( self.config['output_pr_curves_plot_directory'], self.config['filter_latent'], self.config['standardization'], n_pca, f'PR_{self.MULTI_CLASS_NAME[ml_method]}_{self.CLASSIFIER_NAME[classifier]}', self.name, '') plt.savefig(filename, dpi=150) plt.close()
def plot_average_of_all_pr(self, av_kind): title = compose_configuration(f'{av_kind} PR curves', self.config['filter_latent'], self.config['standardization'], 'no-pca', self.name) print(f"Plotting {title}") plt.figure(figsize=(6, 4.5)) for ml_method, classifiers in self.results.items(): for classifier, result in classifiers.items(): if len(result['fpr']['early']) > 0: if av_kind == 'macro': self.plot_macro_pr( ml_method, classifier, f'{self.MULTI_CLASS_NAME[ml_method]} {self.CLASSIFIER_NAME[classifier]} {av_kind}-average', '-', None, 1.5) else: self.plot_pr_class( ml_method, classifier, av_kind, f'{self.MULTI_CLASS_NAME[ml_method]} {self.CLASSIFIER_NAME[classifier]} {av_kind}-average', '-', None, 1.5) plt.legend() plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.tight_layout() filename = compose_filename( self.config['output_pr_curves_plot_directory'], self.config['filter_latent'], self.config['standardization'], 'no-pca', f'PR_{av_kind}_average', self.name, '') plt.savefig(filename, dpi=150) plt.close()
def plot_average_of_all_roc(self, av_kind): title = compose_configuration(f'Micro and Macro average ROC curves', self.config['filter_latent'], self.config['standardization'], 'no-pca', self.name) print(f"Plotting {title}") plt.figure(figsize=(6, 4.5)) for ml_method, classifiers in self.results.items(): if ml_method == 'ML': for classifier, result in classifiers.items(): if len(result['fpr']['early']) > 0: self.plot_roc_class( ml_method, classifier, av_kind, f'{self.CLASSIFIER_NAME[classifier]} {av_kind}-average', '-', None, 2) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend() plt.tight_layout() filename = compose_filename( self.config['output_roc_curves_plot_directory'], self.config['filter_latent'], self.config['standardization'], 'no-pca', f'ROC_{av_kind}_average', self.name, '') plt.savefig(filename, dpi=150) plt.close()
def _extract(self, name, n_pca): print( compose_configuration('Extracting features', self.filter_phase, self.standardization, n_pca, name)) if self.filter_phase: self.filter_original_phases() features = self.feature_possibilities[name] if 'length' in features: self.add_length() if 'counts' in features: self.add_aa_counts() if 'relative-counts' in features: self.add_relative_counts() if 'physchem' in features: self.add_physchem_properties() if 'mass' in features: self.add_mass() if 'pI' in features: self.add_p_i() for feature in features: if feature[2:] == 'windowed': self.add_windowed(int(feature[0])) if self.standardization: self.standardize()
def grid_search(self, ml_method, classifier_name, grid, splits, n_pca): print( compose_configuration('Grid Search', self.config['filter_latent'], self.config['standardization'], n_pca, self.name)) print(ml_method, classifier_name) classifier = Classifier(self.config_filepath, ml_method, classifier_name) classifier.grid_search(self.name, grid, splits, n_pca)
def _fit_all(self, n_pca): print( compose_configuration('Fitting features', self.config['filter_latent'], self.config['standardization'], n_pca, self.name)) for ml_method in self.config['ML-method-options']: for classifier_name in self.config['Classifier-options']: classifier = Classifier(self.config_filepath, ml_method, classifier_name) scores = classifier.fit(self.name, n_pca) if ml_method in self.results: self.results[ml_method][classifier_name] = scores else: self.results[ml_method] = {classifier_name: scores}
def plot_permutation_importance_summary(self, classifier, max_n, n_pca): title = compose_configuration( f'Summarized Permutation Importances of {classifier}', self.config['filter_latent'], self.config['standardization'], n_pca, self.name) print(f"Plotting {title}") features = self.results['RR'][classifier]['features'] perm_imps = [] for mc_technique in ['ML', '1vsA', 'RR']: perm_imps.extend(self.results[mc_technique][classifier] ['permutation_importance']) boxplot_data = {} for i, f in enumerate(features): feature_imps = [perm_imp[i] for perm_imp in perm_imps] boxplot_data[f] = feature_imps boxplot_data = { k: v for k, v in sorted( boxplot_data.items(), key=lambda item: np.median(item[1]) + 0.001 * np.mean(item[1]), reverse=True) } fig, ax = plt.subplots(figsize=(8.5, 4.5)) ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=1) ax.set_axisbelow(True) ax.boxplot(boxplot_data.values(), flierprops=dict(markersize=4, markeredgewidth=0.6)) ax.set_xticklabels(boxplot_data.keys()) plt.xticks(rotation=45, rotation_mode='anchor', ha='right') plt.ylabel('Permutation Importance') plt.xlabel('Feature') plt.tight_layout() filename = compose_filename(self.config['output_pi_plot_directory'], self.config['filter_latent'], self.config['standardization'], n_pca, f'pi_{classifier}', self.name, '') plt.savefig(filename, dpi=150) plt.close()
def plot_permutation_importance(self, ml_method, classifier, n_pca): title = compose_configuration( f'Permutation Importances of {ml_method} {classifier}', self.config['filter_latent'], self.config['standardization'], n_pca, self.name) print(f"Plotting {title}") features = self.results[ml_method][classifier]['features'] perm_imps = self.results[ml_method][classifier][ 'permutation_importance'] permutation_importances = {} for i, f in enumerate(features): feature_imps = [perm_imp[i] for perm_imp in perm_imps] permutation_importances[f] = (np.mean(feature_imps), np.std(feature_imps)) permutation_importances = { k: v for k, v in sorted(permutation_importances.items(), key=lambda item: item[1][0], reverse=True)[:30] } x_size = len(permutation_importances) / 2.5 y_size = x_size / 1.375 plt.figure(figsize=(x_size, y_size)) plt.bar(permutation_importances.keys(), [val[0] for val in permutation_importances.values()], yerr=[val[1] for val in permutation_importances.values()], width=1, capsize=5) plt.xticks(rotation=45, rotation_mode='anchor', ha='right') plt.ylabel('Permutation Importance') plt.xlabel('Feature') plt.title(title, wrap=True) plt.tight_layout() filename = compose_filename( self.config['output_pi_plot_directory'], self.config['filter_latent'], self.config['standardization'], n_pca, f'permutation_importance_{ml_method}_{classifier}', self.name, '') plt.savefig(filename) plt.close()
def plot(self, score_metric, n_pca): plt.figure() title = compose_configuration(self.TITLE[score_metric], self.config['filter_latent'], self.config['standardization'], n_pca, self.name) print(f"Plotting {title}") max_score = 0 max_configuration = None bar_width = 0.25 n_groups = len(self.results) index = np.arange(n_groups) for i, ml_method in enumerate(self.config["ML-method-options"]): classifier = self.results[ml_method] classifier_tuples = list(classifier.items()) score_name = self.SCORE_NAME[score_metric] x = [] y = [] error = [] for cl, res in classifier_tuples: if score_name in res: x.append(self.CLASSIFIER_NAME[cl]) score = np.mean(res.get(score_name, None)) y.append(score) error.append(np.std(res.get(score_name), None)) if score >= max_score: max_score = score max_configuration = (self.MULTI_CLASS_NAME[ml_method], self.CLASSIFIER_NAME[cl]) if len(x) > 0: plt.bar(index + (i * bar_width), y, yerr=error, width=bar_width, label=self.MULTI_CLASS_NAME[ml_method], capsize=5, color=color_ml_dict[ml_method]) print(f"\t{self.MULTI_CLASS_NAME[ml_method]}") for j, mean in enumerate(y): print( f"\t\t{x[j]}: {100 * mean:.2f}% +-{100 * error[j]:.2f}%" ) print( f"Maximum score: {max_configuration[0]}, {max_configuration[1]}: {100 * max_score:.2f}%" ) # plt.title(title, wrap=True) plt.ylim(top=self.YMAX[score_metric]) plt.xticks(index + bar_width, [ self.CLASSIFIER_NAME[cl] for cl in list(self.results.values())[0].keys() ]) plt.xlabel('Classifier') plt.ylabel(self.YLABEL[score_metric]) plt.legend(title='Multiclass strategy') plt.tight_layout() filename = output_filename(self.config['output_bar_plot_directory'], self.config['filter_latent'], self.config['standardization'], n_pca, self.SAVE_TITLE[score_metric], self.name, '') plt.savefig(filename) plt.close() print()