def get_gene_to_haplotypes_call( cls, full_call_data: FullCallData, panel: Panel) -> Dict[str, Set[HaplotypeCall]]: gene_to_haplotype_calls = {} for gene_info in panel.get_gene_infos(): logging.info(f"Calling haplotypes for {gene_info.gene}") gene_to_haplotype_calls[ gene_info.gene] = cls.__get_haplotypes_call( full_call_data, gene_info) return gene_to_haplotype_calls
def get_genotype_tsv_text(cls, pgx_analysis: PgxAnalysis, panel: Panel, version: str) -> str: gene_to_haplotype_calls = pgx_analysis.get_gene_to_haplotype_calls() genes_in_analysis = set(gene_to_haplotype_calls.keys()) assert genes_in_analysis == panel.get_genes(), ( f"Gene lists inconsistent.\n" f"From analysis={sorted(list(genes_in_analysis))}\n" f"From panel={sorted(list(panel.get_genes()))}") gene_to_drug_info = {} for gene_info in panel.get_gene_infos(): sorted_drugs = sorted([drug for drug in gene_info.drugs], key=lambda info: (info.name, info.url_prescription_info)) gene_to_drug_info[gene_info.gene] = (cls.DRUG_SEPARATOR.join( [drug.name for drug in sorted_drugs]), cls.DRUG_SEPARATOR.join([ drug.url_prescription_info for drug in sorted_drugs ])) header = cls.TSV_SEPARATOR.join(cls.GENOTYPE_TSV_COLUMNS) lines = [header] for gene in sorted(gene_to_haplotype_calls.keys()): if gene_to_haplotype_calls[gene]: for haplotype_call in sorted( gene_to_haplotype_calls[gene], key=lambda call: call.haplotype_name): lines.append( cls.TSV_SEPARATOR.join([ gene, haplotype_call.haplotype_name, cls.__get_zygosity(haplotype_call), panel.get_haplotype_function( gene, haplotype_call.haplotype_name), gene_to_drug_info[gene][0], gene_to_drug_info[gene][1], panel.get_id(), version, ])) else: lines.append( cls.TSV_SEPARATOR.join([ gene, cls.UNRESOLVED_HAPLOTYPE_STRING, cls.NOT_APPLICABLE_ZYGOSITY_STRING, UNKNOWN_FUNCTION_STRING, gene_to_drug_info[gene][0], gene_to_drug_info[gene][1], panel.get_id(), version, ])) text = "\n".join(lines) + "\n" return text
def __get_calls_for_panel_variants_without_calls( cls, simple_call_data: SimpleCallData, panel: Panel) -> FrozenSet[SimpleCall]: # assume ref call when no call is found. Set filter to NO_CALL reference_assembly = simple_call_data.reference_assembly rs_ids_found_in_patient = { rs_id for call in simple_call_data.calls for rs_id in call.rs_ids if rs_id != "." } coordinates_covered_by_found_calls = { coordinate for call in simple_call_data.calls for coordinate in call.get_relevant_coordinates() } uncalled_calls = set() for gene_info in panel.get_gene_infos(): for rs_id_info in gene_info.rs_id_infos: coordinates_partially_handled = bool( rs_id_info.get_relevant_coordinates(reference_assembly). intersection(coordinates_covered_by_found_calls)) if rs_id_info.rs_id not in rs_ids_found_in_patient and not coordinates_partially_handled: # Assuming REF/REF relative to reference assembly start_coordinate = rs_id_info.get_start_coordinate( reference_assembly) reference_allele = rs_id_info.get_reference_allele( reference_assembly) uncalled_ref_call = SimpleCall( start_coordinate, reference_allele, (reference_allele, reference_allele), gene_info.gene, (rs_id_info.rs_id, ), REF_CALL_ANNOTATION_STRING, SimpleCallFilter.NO_CALL, ) uncalled_calls.add(uncalled_ref_call) return frozenset(uncalled_calls)