def plot_pr(self, ml_method, classifier, n_pca):
        title = compose_configuration(f'PR curves of {ml_method} {classifier}',
                                      self.config['filter_latent'],
                                      self.config['standardization'], n_pca,
                                      self.name)
        print(f"Plotting {title}")
        plt.figure(figsize=(6.5, 4.3333))

        self.plot_pr_class(ml_method, classifier, 'micro', f'micro-average',
                           ':', color_phase_dict['micro'])
        self.plot_macro_pr(ml_method, classifier, 'macro-average', ':',
                           color_phase_dict['macro'])

        for class_ in ['immediate-early', 'early', 'late']:
            self.plot_pr_class(ml_method, classifier, class_, f'{class_}', '-',
                               color_phase_dict[class_])

        plt.legend()
        # plt.title(title)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.tight_layout()
        filename = compose_filename(
            self.config['output_pr_curves_plot_directory'],
            self.config['filter_latent'], self.config['standardization'],
            n_pca,
            f'PR_{self.MULTI_CLASS_NAME[ml_method]}_{self.CLASSIFIER_NAME[classifier]}',
            self.name, '')
        plt.savefig(filename, dpi=150)
        plt.close()
    def plot_average_of_all_pr(self, av_kind):
        title = compose_configuration(f'{av_kind} PR curves',
                                      self.config['filter_latent'],
                                      self.config['standardization'], 'no-pca',
                                      self.name)
        print(f"Plotting {title}")

        plt.figure(figsize=(6, 4.5))

        for ml_method, classifiers in self.results.items():
            for classifier, result in classifiers.items():
                if len(result['fpr']['early']) > 0:
                    if av_kind == 'macro':
                        self.plot_macro_pr(
                            ml_method, classifier,
                            f'{self.MULTI_CLASS_NAME[ml_method]} {self.CLASSIFIER_NAME[classifier]} {av_kind}-average',
                            '-', None, 1.5)
                    else:
                        self.plot_pr_class(
                            ml_method, classifier, av_kind,
                            f'{self.MULTI_CLASS_NAME[ml_method]} {self.CLASSIFIER_NAME[classifier]} {av_kind}-average',
                            '-', None, 1.5)
        plt.legend()
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.tight_layout()
        filename = compose_filename(
            self.config['output_pr_curves_plot_directory'],
            self.config['filter_latent'], self.config['standardization'],
            'no-pca', f'PR_{av_kind}_average', self.name, '')
        plt.savefig(filename, dpi=150)
        plt.close()
    def plot_average_of_all_roc(self, av_kind):
        title = compose_configuration(f'Micro and Macro average ROC curves',
                                      self.config['filter_latent'],
                                      self.config['standardization'], 'no-pca',
                                      self.name)
        print(f"Plotting {title}")

        plt.figure(figsize=(6, 4.5))

        for ml_method, classifiers in self.results.items():
            if ml_method == 'ML':
                for classifier, result in classifiers.items():
                    if len(result['fpr']['early']) > 0:
                        self.plot_roc_class(
                            ml_method, classifier, av_kind,
                            f'{self.CLASSIFIER_NAME[classifier]} {av_kind}-average',
                            '-', None, 2)

        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend()
        plt.tight_layout()
        filename = compose_filename(
            self.config['output_roc_curves_plot_directory'],
            self.config['filter_latent'], self.config['standardization'],
            'no-pca', f'ROC_{av_kind}_average', self.name, '')
        plt.savefig(filename, dpi=150)
        plt.close()
    def _extract(self, name, n_pca):
        print(
            compose_configuration('Extracting features', self.filter_phase,
                                  self.standardization, n_pca, name))

        if self.filter_phase:
            self.filter_original_phases()

        features = self.feature_possibilities[name]
        if 'length' in features:
            self.add_length()
        if 'counts' in features:
            self.add_aa_counts()
        if 'relative-counts' in features:
            self.add_relative_counts()
        if 'physchem' in features:
            self.add_physchem_properties()
        if 'mass' in features:
            self.add_mass()
        if 'pI' in features:
            self.add_p_i()

        for feature in features:
            if feature[2:] == 'windowed':
                self.add_windowed(int(feature[0]))

        if self.standardization:
            self.standardize()
示例#5
0
 def grid_search(self, ml_method, classifier_name, grid, splits, n_pca):
     print(
         compose_configuration('Grid Search', self.config['filter_latent'],
                               self.config['standardization'], n_pca,
                               self.name))
     print(ml_method, classifier_name)
     classifier = Classifier(self.config_filepath, ml_method,
                             classifier_name)
     classifier.grid_search(self.name, grid, splits, n_pca)
示例#6
0
 def _fit_all(self, n_pca):
     print(
         compose_configuration('Fitting features',
                               self.config['filter_latent'],
                               self.config['standardization'], n_pca,
                               self.name))
     for ml_method in self.config['ML-method-options']:
         for classifier_name in self.config['Classifier-options']:
             classifier = Classifier(self.config_filepath, ml_method,
                                     classifier_name)
             scores = classifier.fit(self.name, n_pca)
             if ml_method in self.results:
                 self.results[ml_method][classifier_name] = scores
             else:
                 self.results[ml_method] = {classifier_name: scores}
    def plot_permutation_importance_summary(self, classifier, max_n, n_pca):
        title = compose_configuration(
            f'Summarized Permutation Importances of {classifier}',
            self.config['filter_latent'], self.config['standardization'],
            n_pca, self.name)
        print(f"Plotting {title}")

        features = self.results['RR'][classifier]['features']

        perm_imps = []
        for mc_technique in ['ML', '1vsA', 'RR']:
            perm_imps.extend(self.results[mc_technique][classifier]
                             ['permutation_importance'])

        boxplot_data = {}
        for i, f in enumerate(features):
            feature_imps = [perm_imp[i] for perm_imp in perm_imps]
            boxplot_data[f] = feature_imps
        boxplot_data = {
            k: v
            for k, v in sorted(
                boxplot_data.items(),
                key=lambda item: np.median(item[1]) + 0.001 * np.mean(item[1]),
                reverse=True)
        }

        fig, ax = plt.subplots(figsize=(8.5, 4.5))
        ax.yaxis.grid(True,
                      linestyle='-',
                      which='major',
                      color='lightgrey',
                      alpha=1)
        ax.set_axisbelow(True)
        ax.boxplot(boxplot_data.values(),
                   flierprops=dict(markersize=4, markeredgewidth=0.6))
        ax.set_xticklabels(boxplot_data.keys())

        plt.xticks(rotation=45, rotation_mode='anchor', ha='right')
        plt.ylabel('Permutation Importance')
        plt.xlabel('Feature')
        plt.tight_layout()
        filename = compose_filename(self.config['output_pi_plot_directory'],
                                    self.config['filter_latent'],
                                    self.config['standardization'], n_pca,
                                    f'pi_{classifier}', self.name, '')
        plt.savefig(filename, dpi=150)
        plt.close()
    def plot_permutation_importance(self, ml_method, classifier, n_pca):
        title = compose_configuration(
            f'Permutation Importances of {ml_method} {classifier}',
            self.config['filter_latent'], self.config['standardization'],
            n_pca, self.name)
        print(f"Plotting {title}")

        features = self.results[ml_method][classifier]['features']
        perm_imps = self.results[ml_method][classifier][
            'permutation_importance']

        permutation_importances = {}
        for i, f in enumerate(features):
            feature_imps = [perm_imp[i] for perm_imp in perm_imps]
            permutation_importances[f] = (np.mean(feature_imps),
                                          np.std(feature_imps))

        permutation_importances = {
            k: v
            for k, v in sorted(permutation_importances.items(),
                               key=lambda item: item[1][0],
                               reverse=True)[:30]
        }

        x_size = len(permutation_importances) / 2.5
        y_size = x_size / 1.375
        plt.figure(figsize=(x_size, y_size))
        plt.bar(permutation_importances.keys(),
                [val[0] for val in permutation_importances.values()],
                yerr=[val[1] for val in permutation_importances.values()],
                width=1,
                capsize=5)
        plt.xticks(rotation=45, rotation_mode='anchor', ha='right')
        plt.ylabel('Permutation Importance')
        plt.xlabel('Feature')
        plt.title(title, wrap=True)
        plt.tight_layout()
        filename = compose_filename(
            self.config['output_pi_plot_directory'],
            self.config['filter_latent'], self.config['standardization'],
            n_pca, f'permutation_importance_{ml_method}_{classifier}',
            self.name, '')
        plt.savefig(filename)
        plt.close()
    def plot(self, score_metric, n_pca):
        plt.figure()
        title = compose_configuration(self.TITLE[score_metric],
                                      self.config['filter_latent'],
                                      self.config['standardization'], n_pca,
                                      self.name)
        print(f"Plotting {title}")
        max_score = 0
        max_configuration = None

        bar_width = 0.25
        n_groups = len(self.results)
        index = np.arange(n_groups)

        for i, ml_method in enumerate(self.config["ML-method-options"]):
            classifier = self.results[ml_method]
            classifier_tuples = list(classifier.items())
            score_name = self.SCORE_NAME[score_metric]
            x = []
            y = []
            error = []
            for cl, res in classifier_tuples:
                if score_name in res:
                    x.append(self.CLASSIFIER_NAME[cl])
                    score = np.mean(res.get(score_name, None))
                    y.append(score)
                    error.append(np.std(res.get(score_name), None))
                    if score >= max_score:
                        max_score = score
                        max_configuration = (self.MULTI_CLASS_NAME[ml_method],
                                             self.CLASSIFIER_NAME[cl])

            if len(x) > 0:
                plt.bar(index + (i * bar_width),
                        y,
                        yerr=error,
                        width=bar_width,
                        label=self.MULTI_CLASS_NAME[ml_method],
                        capsize=5,
                        color=color_ml_dict[ml_method])

                print(f"\t{self.MULTI_CLASS_NAME[ml_method]}")
                for j, mean in enumerate(y):
                    print(
                        f"\t\t{x[j]}: {100 * mean:.2f}% +-{100 * error[j]:.2f}%"
                    )
        print(
            f"Maximum score: {max_configuration[0]}, {max_configuration[1]}: {100 * max_score:.2f}%"
        )

        # plt.title(title, wrap=True)
        plt.ylim(top=self.YMAX[score_metric])
        plt.xticks(index + bar_width, [
            self.CLASSIFIER_NAME[cl]
            for cl in list(self.results.values())[0].keys()
        ])
        plt.xlabel('Classifier')
        plt.ylabel(self.YLABEL[score_metric])
        plt.legend(title='Multiclass strategy')
        plt.tight_layout()
        filename = output_filename(self.config['output_bar_plot_directory'],
                                   self.config['filter_latent'],
                                   self.config['standardization'], n_pca,
                                   self.SAVE_TITLE[score_metric], self.name,
                                   '')
        plt.savefig(filename)
        plt.close()
        print()