def _generate(self) -> ReportResult: self.result_path = PathBuilder.build(self.result_path + f'{self.name}/') assert all(self.instruction_states[0].label_configuration.get_labels_by_name() == state.label_configuration.get_labels_by_name() and self.instruction_states[0].label_configuration.get_label_values( self.instruction_states[0].label_configuration.get_labels_by_name()[0]) == state.label_configuration.get_label_values(state.label_configuration.get_labels_by_name()[0]) for state in self.instruction_states), \ "PerformanceOverview: there is a difference in labels between instructions, the plots cannot be created." assert len(self.instruction_states[0].label_configuration.get_labels_by_name()) == 1, \ 'PerformanceOverview: multiple labels were provided, but only one can be used in this report.' assert all(state.refit_optimal_model is False for state in self.instruction_states), \ f"{PerformanceOverview.__name__}: no test datasets were available to assess the performance of optimal models as they were refitted on " \ f"the full datasets. No reports will be generated." label = self.instruction_states[ 0].label_configuration.get_label_objects()[0] optimal_hp_items = [ list(state.optimal_hp_items.values())[0] for state in self.instruction_states ] colors = px.colors.sequential.Viridis[::2][::-1] figure_auc, table_aucs = self.plot_roc(optimal_hp_items, label, colors) figure_pr, table_pr = self.plot_precision_recall( optimal_hp_items, label, colors) return ReportResult(output_figures=[figure_auc, figure_pr], output_tables=table_aucs + table_pr)
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) data_long_format = DataReshaper.reshape(self.dataset) table_result = self._write_results_table(data_long_format) report_output_fig = self._safe_plot(data_long_format=data_long_format) output_figures = None if report_output_fig is None else [report_output_fig] return ReportResult(self.name, output_figures, [table_result])
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) test_metadata_filepath = self.test_dataset.encoded_data.info[ 'metadata_filepath'] label_names = [self.label] hdf5_filepath = self.method._metadata_to_hdf5(test_metadata_filepath, label_names) n_examples_test = len(self.test_dataset.encoded_data.example_ids) indices = np.array(range(n_examples_test)) dataloader = self.method.make_data_loader(hdf5_filepath, pre_loaded_hdf5_file=None, indices=indices, label=self.label, eval_only=True, is_train=False) model = self.method.get_model(self.label)[self.label] compute_contributions(intgrds_set_loader=dataloader, deeprc_model=model, n_steps=self.n_steps, threshold=self.threshold, resdir=self.result_path, filename_inputs=self.filename_inputs, filename_kernels=self.filename_kernels) return ReportResult(self.name, output_figures=[ ReportOutput(self.filename_inputs), ReportOutput(self.filename_kernels) ])
def _generate(self) -> ReportResult: figures, tables = [], [] PathBuilder.build(self.result_path) if ReferenceSequenceOverlap._check_encoder_class( self.state.optimal_hp_items[self.label].encoder): figure, data = self._compute_optimal_model_overlap() figures.append(figure) tables.append(data) for assessment_state in self.state.assessment_states: encoder = assessment_state.label_states[ self.label].optimal_assessment_item.encoder if ReferenceSequenceOverlap._check_encoder_class(encoder): figure_filename = f"{self.result_path}assessment_split_{assessment_state.split_index + 1}_model_vs_reference_overlap_{self.label}.pdf" df_filename = f"{self.result_path}assessment_split_{assessment_state.split_index + 1}_overlap_sequences_{self.label}" figure, data = self._compute_model_overlap( figure_filename, df_filename, encoder, f"overlap sequences between the model for assessment split " f"{assessment_state.split_index + 1} and reference list") figures.append(figure) tables.append(data) return ReportResult(self.name, output_figures=figures, output_tables=tables)
def _generate(self) -> ReportResult: data = self._generate_data() report_output_fig = self._safe_plot(data=data, output_written=False) output_figures = [report_output_fig ] if report_output_fig is not None else [] return ReportResult(self.name, output_figures=output_figures)
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) matrix_result = self._export_matrix() details_result = self._export_details() label_result = self._export_labels() return ReportResult(self.name, output_tables=[matrix_result], output_text=[details_result, label_result])
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) plotting_data = self._retrieve_plotting_data() result_table = self._write_results_table(plotting_data) report_output_fig = self._safe_plot(plotting_data=plotting_data) output_figures = [report_output_fig ] if report_output_fig is not None else [] return ReportResult(self.name, output_tables=[result_table], output_figures=output_figures)
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) upper_limit, lower_limit = self.get_distribution_limits() self.result_name = "beta_distribution" report_output_fig = self._safe_plot(upper_limit=upper_limit, lower_limit=lower_limit, output_written=False) output_figures = [] if report_output_fig is None else [report_output_fig] return ReportResult(name="Beta distribution priors - probability that a sequence is disease-associated", output_figures=output_figures)
def _generate(self) -> ReportResult: self.result_path = PathBuilder.build(f"{self.result_path}/{self.name}/") self._extract_label() hp_items = [state.optimal_hp_items[self.label] for state in self.instruction_states] overlap_matrix = SequenceAnalysisHelper.compute_overlap_matrix(hp_items) labels = [state.dataset.name for state in self.instruction_states] figure_path = self._make_figure(overlap_matrix, labels) data_path = self._export_matrix(overlap_matrix, labels) return ReportResult(output_figures=[ReportOutput(figure_path, 'sequence overlap across datasets')], output_tables=[ReportOutput(data_path, 'sequence overlap across datasets (csv)')])
def _generate(self) -> ReportResult: df = pd.read_csv( self.dataset.encoded_data.info["relevant_sequence_path"]) column_mapping = self._compute_column_mapping(df) df.rename(columns=column_mapping, inplace=True) PathBuilder.build(self.result_path) filename = f"{self.result_path}relevant_sequences.csv" df.to_csv(filename, index=False) return ReportResult( self.name, output_tables=[ReportOutput(filename, "relevant sequences")])
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) report_result = ReportResult() sequence_alphabet = EnvironmentSettings.get_sequence_alphabet(self.method.sequence_type) for kernel_name in self.method.CNN.conv_chain_1 + self.method.CNN.conv_chain_2: figure_outputs, table_outputs = self._plot_kernels(kernel_name, sequence_alphabet) report_result.output_figures.extend(figure_outputs) report_result.output_tables.extend(table_outputs) figure_output, table_output = self._plot_fc_layer() report_result.output_figures.append(figure_output) report_result.output_tables.append(table_output) return report_result
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) alpha_chains, beta_chains, trbv, trbj, subject_condition, count = [], [], [], [], [], [] for index, receptor in enumerate(self.dataset.get_data()): alpha_chains.append(receptor.get_chain("alpha").amino_acid_sequence) beta_chains.append(receptor.get_chain("beta").amino_acid_sequence) trbv.append(receptor.get_chain("beta").metadata.v_gene) trbj.append(receptor.get_chain("beta").metadata.j_gene) subject_condition.append(f"{getattr(receptor.metadata, 'subject_id', str(index))}:{receptor.metadata[self.condition]}") count.append(receptor.get_chain("beta").metadata.count if receptor.get_chain('beta').metadata is not None else 1) df = pd.DataFrame({"CDR3b": beta_chains, "TRBV": trbv, "TRBJ": trbj, "CDR3a": alpha_chains, "subject:condition": subject_condition, "count": count}) file_path = self.result_path + "exported_data.tsv" df.to_csv(file_path, sep="\t", index=False) return ReportResult(self.name, output_tables=[ReportOutput(file_path, "exported data in GLIPH2 format")])
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) tables, figures = [], [] for label in self.state.label_configuration.get_labels_by_name(): if self.compare_in_assessment: table, figure = self._generate_for_assessment(label) tables.append(table) figures.append(figure) if self.compare_in_selection: tmp_tables, tmp_figures = self._generate_for_selection(label) tables += tmp_tables figures += tmp_figures return ReportResult(self.name, [fig for fig in figures if fig is not None], [tab for tab in tables if tab is not None])
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) self.result_name = f"{self.feature}_performance" training_dataframe, test_dataframe = self._make_plot_dataframes() table_results = self._store_dataframes(training_dataframe, test_dataframe) report_output_fig = self._plot(training_dataframe=training_dataframe, test_dataframe=test_dataframe) output_figures = None if report_output_fig is None else [ report_output_fig ] return ReportResult(output_tables=table_results, output_figures=output_figures)
def _generate(self): report_output_tables = [] if isinstance(self.dataset, RepertoireDataset): for repertoire in self.dataset.get_data(): result_path = f"{self.result_path}/{repertoire.identifier}/" PathBuilder.build(result_path) report_output_tables = self.export_receptorlist( repertoire.receptors, result_path) elif isinstance(self.dataset, ReceptorDataset): receptors = self.dataset.get_data() result_path = f"{self.result_path}/{self.dataset.identifier}/" PathBuilder.build(result_path) report_output_tables = self.export_receptorlist( receptors, result_path=result_path) return ReportResult(output_tables=report_output_tables)
def _generate(self) -> ReportResult: self.label = list(self.train_dataset.encoded_data.labels.keys())[0] from source.util.TCRdistHelper import TCRdistHelper from tcrdist.rep_diff import hcluster_diff PathBuilder.build(self.result_path) subsampled_dataset = self._extract_positive_example_dataset() reference_sequences = self._extract_reference_sequences() tcr_rep = TCRdistHelper.compute_tcr_dist(subsampled_dataset, [self.label], self.cores) tcr_rep.hcluster_df, tcr_rep.Z = hcluster_diff( clone_df=tcr_rep.clone_df, pwmat=tcr_rep.pw_alpha + tcr_rep.pw_beta, x_cols=["epitope"], count_col='count') figures, tables = [], [] logging.info( f'{TCRdistMotifDiscovery.__name__}: created {tcr_rep.hcluster_df.shape[0]} clusters, now discovering motifs in clusters.' ) for index, row in tcr_rep.hcluster_df.iterrows(): if len(row['neighbors_i']) >= self.min_cluster_size: figure_outputs, table_outputs = self._discover_motif_in_cluster( tcr_rep, index, row, reference_sequences) figures.extend(figure_outputs) tables.extend(table_outputs) res_summary = member_summ(res_df=tcr_rep.hcluster_df, clone_df=tcr_rep.clone_df, addl_cols=['epitope']) res_summary.to_csv(self.result_path + "tcrdist_summary.csv") tables.append( ReportOutput(path=self.result_path + "tcrdist_summary.csv", name="TCRdist summary (csv)")) return ReportResult("TCRdist motif discovery", figures, tables)
def _write_reports(self) -> ReportResult: all_matches_table = self._write_match_table() repertoire_sizes = self._write_repertoire_sizes() output_tables = [all_matches_table, repertoire_sizes] if self.dataset.encoded_data.encoding == "MatchedSequencesEncoder": output_tables += self._write_sequence_info(self.result_path + "/sequence_info") else: if len(self.dataset.encoded_data.feature_annotations["chain"]. unique()) == 2: output_tables += self._write_paired_matches(self.result_path + "/paired_matches") if self.dataset.encoded_data.encoding == "MatchedReceptorsEncoder": output_tables += self._write_receptor_info(self.result_path + "/receptor_info") return ReportResult(self.name, output_tables=output_tables)
def _generate(self): PathBuilder.build(self.result_path) paths = [] self._set_plotting_parameters() plot_data = self._retrieve_plot_data() plot_data["abs_coefficients"] = abs(plot_data["coefficients"]) plot_data.sort_values(by="abs_coefficients", inplace=True, ascending=False) result_table_path = self._write_results_table(plot_data[["features", "coefficients"]]) self._write_settings() if CoefficientPlottingSetting.ALL in self._coefs_to_plot: report_output_fig = self._plot(plotting_data=plot_data, output_name="all_coefficients") paths.append(report_output_fig) if CoefficientPlottingSetting.NONZERO in self._coefs_to_plot: nonzero_data = plot_data[plot_data["coefficients"] != 0] report_output_fig = self._plot(plotting_data=nonzero_data, output_name="nonzero_coefficients") paths.append(report_output_fig) if CoefficientPlottingSetting.CUTOFF in self._coefs_to_plot: for cutoff_val in self._cutoff: cutoff_data = plot_data[plot_data["abs_coefficients"] >= cutoff_val] report_output_fig = self._plot(plotting_data=cutoff_data, output_name="cutoff_{}_coefficients".format(cutoff_val)) paths.append(report_output_fig) if CoefficientPlottingSetting.N_LARGEST in self._coefs_to_plot: for n_val in self._n_largest: n_largest_data = plot_data.nlargest(n=n_val, columns=["abs_coefficients"]) report_output_fig = self._plot(plotting_data=n_largest_data, output_name="largest_{}_coefficients".format(n_val)) paths.append(report_output_fig) return ReportResult(self.name, output_tables=[ReportOutput(result_table_path, "features and coefficients csv")], output_figures=[p for p in paths if p is not None])
def _generate(self) -> ReportResult: sequence_lengths = self._get_sequence_lengths() report_output_fig = self._plot(sequence_lengths=sequence_lengths) output_figures = None if report_output_fig is None else [report_output_fig] return ReportResult(type(self).__name__, output_figures=output_figures)
def _generate(self) -> ReportResult: PathBuilder.build(self.result_path) figure, tables = self._generate_sequence_length_distribution_plots() return ReportResult(name=self.name, output_figures=[figure], output_tables=tables)