def _export_labels(self) -> ReportOutput: if self.dataset.encoded_data.labels is not None: labels_df = pd.DataFrame(self.dataset.encoded_data.labels) file_path = f"{self.result_path}labels.csv" labels_df.to_csv(file_path, sep=",", index=False) return ReportOutput(file_path, "exported labels")
def _write_results_table(self, data): table_path = f"{self.result_path}{self.result_name}.csv" data.to_csv(table_path, index=False) return ReportOutput(table_path, "feature values")
def _export_matrix(self) -> ReportOutput: data = self._get_data() file_path = self._save_to_file(data, f"{self.result_path}design_matrix") return ReportOutput(file_path, "design matrix")
def export_receptorlist(self, receptors, result_path): export_list = [] node_metadata_list = [] edge_metadata_list = [] for receptor in receptors: first_chain = receptor.get_chain(self.chains[0]) second_chain = receptor.get_chain(self.chains[1]) first_chain_name = self.get_shared_name(first_chain) second_chain_name = self.get_shared_name(second_chain) export_list.append([first_chain_name, "pair", second_chain_name]) node_metadata_list.append( [first_chain_name, self.chains[0]] + self.get_formatted_node_metadata(first_chain)) node_metadata_list.append( [second_chain_name, self.chains[1]] + self.get_formatted_node_metadata(second_chain)) edge_metadata_list.append( [f"{first_chain_name} (pair) {second_chain_name}"] + self.get_formatted_edge_metadata(first_chain, second_chain)) full_df = pd.DataFrame( export_list, columns=[self.chains[0], "relationship", self.chains[1]]) node_meta_df = pd.DataFrame( node_metadata_list, columns=[ "shared_name", "chain", "sequence", "v_subgroup", "v_gene", "j_subgroup", "j_gene" ] + self.additional_node_attributes) edge_meta_df = pd.DataFrame(edge_metadata_list, columns=["shared_name"] + self.additional_edge_attributes) node_cols = list(node_meta_df.columns) node_meta_df["n_duplicates"] = 1 node_meta_df = node_meta_df.groupby( node_cols, as_index=False)["n_duplicates"].sum() edge_meta_df.drop_duplicates(inplace=True) node_meta_df.to_csv(f"{result_path}node_metadata.tsv", sep="\t", index=0, header=True) edge_meta_df.to_csv(f"{result_path}edge_metadata.tsv", sep="\t", index=0, header=True) if self.drop_duplicates: full_df.drop_duplicates(inplace=True) full_df.to_csv(f"{result_path}all_chains.sif", sep="\t", index=0, header=False) shared_df = full_df[(full_df.duplicated(["alpha"], keep=False)) | (full_df.duplicated(["beta"], keep=False))] shared_df.to_csv(f"{result_path}shared_chains.sif", sep="\t", index=0, header=False) return [ ReportOutput(path=f"{result_path}node_metadata.tsv"), ReportOutput(path=f"{result_path}edge_metadata.tsv"), ReportOutput(path=f"{result_path}all_chains.sif"), ReportOutput(path=f"{result_path}shared_chains.sif") ]