def variant(self, case_id, variant_id): """Return a specific variant. Args: case_id (str): Path to vcf file variant_id (str): A variant id Returns: variant (Variant): The variant object for the given id """ case_obj = self.case(case_id=case_id) vcf_file_path = case_obj.variant_source self.head = get_header(vcf_file_path) self.vep_header = self.head.vep_columns self.snpeff_header = self.head.snpeff_columns handle = VCF(vcf_file_path) for index, variant in enumerate(handle): index += 1 line_id = get_variant_id( variant_line=str(variant)).lstrip('chrCHR') if line_id == variant_id: return self._format_variants(variant=variant, index=index, case_obj=case_obj, add_all_info=True) return None
def variant(self, case_id, variant_id): """Return a specific variant. Args: case_id (str): Path to vcf file variant_id (str): A variant id Returns: variant (Variant): The variant object for the given id """ case_obj = self.case(case_id=case_id) vcf_file_path = case_obj.variant_source head = self._get_header(vcf_file_path) handle = get_vcf_handle(infile=vcf_file_path) relevant_lines = (line for line in handle if not line.startswith('#')) for index, variant_line in enumerate(relevant_lines): index += 1 line_id = get_variant_id(variant_line=variant_line).lstrip('chrCHR') if line_id == variant_id: return self._format_variant( variant_line=variant_line, index=index, case_obj=case_obj, head=head ) return None
def test_get_variant_id_dict(): """docstring for test_get_variant_id_dict""" variant_dict = { 'CHROM': '1', 'POS': '10', 'REF':'A', 'ALT': 'T' } assert get_variant_id(variant_dict) == '1_10_A_T'
def father(ctx): """Check number of variants in common""" logger.info("Running variant_integrity father version {0}".format( variant_integrity.__version__)) print_columns = ['ind_id', 'fraction_of_common_variants', 'common_variants', 'number_calls'] # Children is a dictionary of children that counts the number of errors duos = [] children = {} analysis_individuals = set() for ind_id in ctx.parent.individuals: individual_object = ctx.parent.individuals[ind_id] if individual_object.father != '0': duo = { 'child': ind_id, 'father': individual_object.father } analysis_individuals.add(ind_id) analysis_individuals.add(individual_object.father) duos.append(duo) logger.info("Duo found: {0}".format( ', '.join(list(duo.values())) )) logger.info("Individuals included in analysis: {0}".format( ','.join(list(analysis_individuals)))) for duo in duos: children[duo['child']] = dict(zip( print_columns, [duo['child'], 0, 0, 0])) for line in ctx.parent.variant_file: variant_dict = get_variant_dict( variant_line=line, header_line=ctx.parent.header_line ) logger.debug("Checking genotype calls for variant {0}".format( get_variant_id(variant_dict=variant_dict) )) genotypes = get_genotypes(variant_dict, analysis_individuals) for duo in duos: child_id = duo['child'] child_genotype = genotypes[child_id] father_genotype = genotypes[duo['father']] duo_genotypes = [ child_genotype, father_genotype ] #First check if the child has the variant: if child_genotype.has_variant: # If child have high quality we count the variant if check_high_quality([child_genotype], ctx.parent.gq_treshold): children[child_id]['number_calls'] += 1 if check_common_variant(duo_genotypes): children[child_id]['common_variants'] += 1 results = [] for child_id in children: child_info = children[child_id] common = child_info['common_variants'] variants = child_info['number_calls'] percentage = common/variants child_info['fraction_of_common_variants'] = round(percentage, 3) results.append(child_info) to_json = ctx.parent.to_json outfile = ctx.parent.outfile if to_json: if outfile: json.dump(results, outfile) else: print(json.dumps(results)) else: if outfile: outfile.write("#{0}\n".format('\t'.join(print_columns))) else: print("#{0}".format('\t'.join(print_columns))) for result in results: print_line = "{0}\t{1}\t{2}\t{3}".format( result['ind_id'], result['fraction_of_common_variants'], result['common_variants'], result['number_calls'] ) if outfile: outfile.write("{0}\n".format(print_line)) else: print(print_line)
def mendel(ctx): """Check mendelian errors in all trios""" logger.info("Running variant_integrity mendel {0}".format( variant_integrity.__version__)) print_columns = ['ind_id', 'fraction_of_errors', 'mendelian_errors', 'number_calls'] # Children is a dictionary of children that counts the number of errors trios = [] children = {} analysis_individuals = set() for family in ctx.parent.families: family_object = ctx.parent.families[family] for trio in family_object.trios: trio_individuals = { 'mother':None, 'father':None, 'child':None, } for ind_id in trio: analysis_individuals.add(ind_id) individual_object = ctx.parent.individuals[ind_id] if individual_object.mother in trio: trio_individuals['child'] = ind_id elif individual_object.sex == 1: trio_individuals['father'] = ind_id else: trio_individuals['mother'] = ind_id trios.append(trio_individuals) logger.info("Trio found: {0}".format( ', '.join(list(trio_individuals.values())) )) logger.info("Individuals included in analysis: {0}".format( ','.join(list(analysis_individuals)))) for trio in trios: children[trio['child']] = dict(zip( print_columns, [trio['child'], 0, 0, 0])) for line in ctx.parent.variant_file: variant_dict = get_variant_dict( variant_line=line, header_line=ctx.parent.header_line ) logger.debug("Checking genotype calls for variant {0}".format( get_variant_id(variant_dict=variant_dict) )) genotypes = get_genotypes(variant_dict, analysis_individuals) for trio in trios: child_id = trio['child'] child_genotype = genotypes[child_id] mother_genotype = genotypes[trio['mother']] father_genotype = genotypes[trio['father']] trio_genotypes = [ child_genotype, mother_genotype, father_genotype ] #First check if the child has the variant: if child_genotype.has_variant: # If all individuals are high quality we count the variant if check_high_quality(trio_genotypes, ctx.parent.gq_treshold): children[child_id]['number_calls'] += 1 if check_mendelian_error(child_genotype, mother_genotype, father_genotype): children[child_id]['mendelian_errors'] += 1 results = [] for child_id in children: child_info = children[child_id] errors = child_info['mendelian_errors'] variants = child_info['number_calls'] percentage = errors/variants child_info['fraction_of_errors'] = round(percentage, 3) results.append(child_info) to_json = ctx.parent.to_json outfile = ctx.parent.outfile if to_json: if outfile: json.dump(results, outfile) else: print(json.dumps(results)) else: if outfile: outfile.write("#{0}\n".format('\t'.join(print_columns))) else: print("#{0}".format('\t'.join(print_columns))) for result in results: print_line = "{0}\t{1}\t{2}\t{3}".format( result['ind_id'], result['fraction_of_errors'], result['mendelian_errors'], result['number_calls'] ) if outfile: outfile.write("{0}\n".format(print_line)) else: print(print_line)
def test_send_wrong(): """docstring for test_get_variant_id_dict""" with pytest.raises(Exception): get_variant_id()
def test_get_variant_id_line(): """docstring for test_get_variant_id_dict""" variant_line = '1\t10\t.\tA\tT' assert get_variant_id(variant_line=variant_line) == '1_10_A_T'
def test_get_variant_id_dict(): """docstring for test_get_variant_id_dict""" variant_dict = {'CHROM': '1', 'POS': '10', 'REF': 'A', 'ALT': 'T'} assert get_variant_id(variant_dict) == '1_10_A_T'