def _add_genotype_calls(self, variant_obj, variant_line, case_obj): """Add the genotype calls for the variant Args: variant_obj (puzzle.models.Variant) variant_dict (dict): A variant dictionary case_obj (puzzle.models.Case) """ variant_line = variant_line.split('\t') #if there is gt calls we have no individuals to add if len(variant_line) > 8: gt_format = variant_line[8].split(':') for individual in case_obj.individuals: sample_id = individual.ind_id index = individual.ind_index gt_call = variant_line[9 + index].split(':') raw_call = dict(zip(gt_format, gt_call)) genotype = Genotype(**raw_call) variant_obj.add_individual( puzzle_genotype( sample_id=sample_id, genotype=genotype.genotype, case_id=case_obj.name, phenotype=individual.phenotype, ref_depth=genotype.ref_depth, alt_depth=genotype.alt_depth, genotype_quality=genotype.genotype_quality, depth=genotype.depth_of_coverage, supporting_evidence=genotype.supporting_evidence, pe_support=genotype.pe_support, sr_support=genotype.sr_support, ))
def _add_genotype_calls(self, variant_obj, variant_line, case_obj): """Add the genotype calls for the variant Args: variant_obj (puzzle.models.Variant) variant_dict (dict): A variant dictionary case_obj (puzzle.models.Case) """ variant_line = variant_line.split('\t') #if there is gt calls we have no individuals to add if len(variant_line) > 8: gt_format = variant_line[8].split(':') for individual in case_obj.individuals: sample_id = individual.ind_id index = individual.ind_index gt_call = variant_line[9+index].split(':') raw_call = dict(zip(gt_format, gt_call)) genotype = Genotype(**raw_call) variant_obj.add_individual(puzzle_genotype( sample_id = sample_id, genotype = genotype.genotype, case_id = case_obj.name, phenotype = individual.phenotype, ref_depth = genotype.ref_depth, alt_depth = genotype.alt_depth, genotype_quality = genotype.genotype_quality, depth = genotype.depth_of_coverage, supporting_evidence = genotype.supporting_evidence, pe_support = genotype.pe_support, sr_support = genotype.sr_support, ))
def _format_variant(self, variant_line, index, case_obj, head): """Return variant objects Args: raw_variants (Iterable): An iterable with variant lines case_obj (puzzle.nodels.Case): A case object """ header_line = head.header # Get the individual ids for individuals in vcf file vcf_individuals = set([ind_id for ind_id in head.individuals]) variant_columns = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER'] vep_header = head.vep_columns snpeff_header = head.snpeff_columns #Create a variant dict: variant_dict = get_variant_dict( variant_line = variant_line, header_line = header_line ) variant_dict['CHROM'] = variant_dict['CHROM'].lstrip('chrCHR') #Crreate a info dict: info_dict = get_info_dict( info_line = variant_dict['INFO'] ) #Check if vep annotation: vep_string = info_dict.get('CSQ') #Check if snpeff annotation: snpeff_string = info_dict.get('ANN') if vep_string: #Get the vep annotations vep_info = get_vep_info( vep_string = vep_string, vep_header = vep_header ) elif snpeff_string: #Get the vep annotations snpeff_info = get_snpeff_info( snpeff_string = snpeff_string, snpeff_header = snpeff_header ) variant = Variant( **{column: variant_dict.get(column, '.') for column in variant_columns} ) logger.debug("Creating a variant object of variant {0}".format( variant.get('variant_id'))) variant['index'] = index logger.debug("Updating index to: {0}".format( index)) variant['start'] = int(variant_dict['POS']) if self.variant_type == 'sv': other_chrom = variant['CHROM'] # If we have a translocation: if ':' in variant_dict['ALT'] and not '<' in variant_dict['ALT']: other_coordinates = variant_dict['ALT'].strip('ACGTN[]').split(':') other_chrom = other_coordinates[0].lstrip('chrCHR') other_position = other_coordinates[1] variant['stop'] = other_position #Set 'infinity' to length if translocation variant['sv_len'] = float('inf') else: variant['stop'] = int(info_dict.get('END', variant_dict['POS'])) variant['sv_len'] = variant['stop'] - variant['start'] variant['stop_chrom'] = other_chrom else: variant['stop'] = int(variant_dict['POS']) + \ (len(variant_dict['REF']) - len(variant_dict['ALT'])) variant['sv_type'] = info_dict.get('SVTYPE') variant['cytoband_start'] = get_cytoband_coord( chrom=variant['CHROM'], pos=variant['start']) if variant.get('stop_chrom'): variant['cytoband_stop'] = get_cytoband_coord( chrom=variant['stop_chrom'], pos=variant['stop']) # It would be easy to update these keys... thousand_g = info_dict.get('1000GAF') if thousand_g: logger.debug("Updating thousand_g to: {0}".format( thousand_g)) variant['thousand_g'] = float(thousand_g) variant.add_frequency('1000GAF', variant.get('thousand_g')) #SV specific tag for number of occurances occurances = info_dict.get('OCC') if occurances: logger.debug("Updating occurances to: {0}".format( occurances)) variant['occurances'] = float(occurances) variant.add_frequency('OCC', occurances) cadd_score = info_dict.get('CADD') if cadd_score: logger.debug("Updating cadd_score to: {0}".format( cadd_score)) variant['cadd_score'] = float(cadd_score) rank_score_entry = info_dict.get('RankScore') if rank_score_entry: for family_annotation in rank_score_entry.split(','): rank_score = family_annotation.split(':')[-1] logger.debug("Updating rank_score to: {0}".format( rank_score)) variant['rank_score'] = float(rank_score) genetic_models_entry = info_dict.get('GeneticModels') if genetic_models_entry: genetic_models = [] for family_annotation in genetic_models_entry.split(','): for genetic_model in family_annotation.split(':')[-1].split('|'): genetic_models.append(genetic_model) logger.debug("Updating rank_score to: {0}".format( rank_score)) variant['genetic_models'] = genetic_models #Add genotype calls: for individual in case_obj.individuals: sample_id = individual.ind_id if sample_id in vcf_individuals: raw_call = dict(zip( variant_dict['FORMAT'].split(':'), variant_dict[sample_id].split(':')) ) genotype = Genotype(**raw_call) variant.add_individual(puzzle_genotype( sample_id = sample_id, genotype = genotype.genotype, case_id = individual.case_name, phenotype = individual.phenotype, ref_depth = genotype.ref_depth, alt_depth = genotype.alt_depth, genotype_quality = genotype.genotype_quality, depth = genotype.depth_of_coverage, supporting_evidence = genotype.supporting_evidence, pe_support = genotype.pe_support, sr_support = genotype.sr_support, )) # Add transcript information: gmaf = None if vep_string: for transcript_info in vep_info: transcript = self._get_vep_transcripts(transcript_info) gmaf_raw = transcript_info.get('GMAF') if gmaf_raw: gmaf = float(gmaf_raw.split(':')[-1]) variant.add_transcript(transcript) if gmaf: variant.add_frequency('GMAF', gmaf) if not variant.thousand_g: variant.thousand_g = gmaf elif snpeff_string: for transcript_info in snpeff_info: transcript = self._get_snpeff_transcripts(transcript_info) variant.add_transcript(transcript) most_severe_consequence = get_most_severe_consequence( variant['transcripts'] ) if most_severe_consequence: variant['most_severe_consequence'] = most_severe_consequence variant['impact_severity'] = IMPACT_SEVERITIES.get(most_severe_consequence) for gene in self._get_genes(variant): variant.add_gene(gene) self._add_compounds(variant=variant, info_dict=info_dict) return variant