def variant(): """Return a variant dictionary""" data = dict(CHROM='1', POS='100', ID='rs01', REF='A', ALT='T', QUAL='100', FILTER='PASS') variant = Variant(**data) return variant
def _format_variants(self, variant, index, case_obj, add_all_info=False): """Return a Variant object Format variant make a variant that includes enough information for the variant view. If add_all_info then all transcripts will be parsed Args: variant (cython2.Variant): A variant object index (int): The index of the variant case_obj (puzzle.models.Case): A case object """ header_line = self.head.header # Get the individual ids for individuals in vcf file vcf_individuals = set([ind_id for ind_id in self.head.individuals]) #Create a info dict: info_dict = dict(variant.INFO) chrom = variant.CHROM if chrom.startswith('chr') or chrom.startswith('CHR'): chrom = chrom[3:] variant_obj = Variant( CHROM=chrom, POS=variant.POS, ID=variant.ID, REF=variant.REF, ALT=variant.ALT[0], QUAL=variant.QUAL, FILTER=variant.FILTER, ) variant_obj._set_variant_id() logger.debug("Creating a variant object of variant {0}".format( variant_obj.variant_id)) variant_obj.index = index logger.debug("Updating index to: {0}".format(index)) ########### Get the coordinates for the variant ############## variant_obj.start = variant.start variant_obj.stop = variant.end #SV variants needs to be handeled a bit different since the can be huge #it would take to much power to parse all vep/snpeff entrys for these. if self.variant_type == 'sv': variant_obj.stop = int(info_dict.get('END', variant_obj.POS)) self._add_sv_coordinates(variant_obj) variant_obj.sv_type = info_dict.get('SVTYPE') # Special for FindSV software: # SV specific tag for number of occurances occurances = info_dict.get('OCC') if occurances: logger.debug("Updating occurances to: {0}".format(occurances)) variant_obj['occurances'] = float(occurances) variant_obj.add_frequency('OCC', occurances) else: self._add_thousand_g(variant_obj, info_dict) self._add_cadd_score(variant_obj, info_dict) self._add_genetic_models(variant_obj, info_dict) self._add_transcripts(variant_obj, info_dict) self._add_exac(variant_obj, info_dict) self._add_hgnc_symbols(variant_obj) if add_all_info: self._add_genotype_calls(variant_obj, str(variant), case_obj) self._add_compounds(variant_obj, info_dict) self._add_gmaf(variant_obj, info_dict) self._add_genes(variant_obj) ##### Add consequences #### self._add_consequences(variant_obj, str(variant)) self._add_most_severe_consequence(variant_obj) self._add_impact_severity(variant_obj) self._add_rank_score(variant_obj, info_dict) variant_obj.set_max_freq() return variant_obj
def _format_variant(self, case_id, gemini_variant, individual_objs, index=0, add_all_info=False): """Make a puzzle variant from a gemini variant Args: case_id (str): related case id gemini_variant (GeminiQueryRow): The gemini variant individual_objs (list(dict)): A list of Individuals index(int): The index of the variant Returns: variant (dict): A Variant object """ chrom = gemini_variant['chrom'] if chrom.startswith('chr') or chrom.startswith('CHR'): chrom = chrom[3:] variant_dict = { 'CHROM': chrom, 'POS': str(gemini_variant['start']), 'ID': gemini_variant['rs_ids'], 'REF': gemini_variant['ref'], 'ALT': gemini_variant['alt'], 'QUAL': gemini_variant['qual'], 'FILTER': gemini_variant['filter'] } variant = Variant(**variant_dict) # Use the gemini id for fast search variant.update_variant_id(gemini_variant['variant_id']) logger.debug("Creating a variant object of variant {0}".format( variant.variant_id)) variant['index'] = index # Add the most severe consequence self._add_most_severe_consequence(variant, gemini_variant) #Add the impact severity self._add_impact_severity(variant, gemini_variant) ### POSITON ANNOATTIONS ### variant.start = int(gemini_variant['start']) variant.stop = int(gemini_variant['end']) #Add the sv specific coordinates if self.variant_type == 'sv': variant.sv_type = gemini_variant['sub_type'] variant.stop = int(gemini_variant['end']) self._add_sv_coordinates(variant) else: ### Consequence and region annotations #Add the transcript information self._add_transcripts(variant, gemini_variant) self._add_thousand_g(variant, gemini_variant) self._add_exac(variant, gemini_variant) self._add_gmaf(variant, gemini_variant) #### Check the impact annotations #### if gemini_variant['cadd_scaled']: variant.cadd_score = gemini_variant['cadd_scaled'] # We use the prediction in text polyphen = gemini_variant['polyphen_pred'] if polyphen: variant.add_severity('Polyphen', polyphen) # We use the prediction in text sift = gemini_variant['sift_pred'] if sift: variant.add_severity('SIFT', sift) #Add the genes based on the hgnc symbols self._add_hgnc_symbols(variant) if self.variant_type == 'snv': self._add_genes(variant) self._add_consequences(variant) ### GENOTYPE ANNOATTIONS ### #Get the genotype info if add_all_info: self._add_genotypes(variant, gemini_variant, case_id, individual_objs) if self.variant_type == 'sv': self._add_genes(variant) return variant