def test_check_coding(self): ''' check that check_coding works correctly ''' # checking whether a site is coding or not works correctly self.variants['consequence'] = ['synonymous_variant', 'intergenic_variant'] status = check_coding(self.variants) self.assertTrue(all(status == Series([True, False]))) # make sure we can easily use a different column name self.variants = self.variants.drop('consequence', axis=1) self.variants['cq'] = ['synonymous_variant', 'intergenic_variant'] status = check_coding(self.variants, cq_name='cq') self.assertTrue(all(status == Series([True, False]))) # raise an error if we try a missing column. with self.assertRaises(KeyError): check_coding(self.variants, cq_name='UNKNOWN')
def main(): args = get_options() # set a blank dataframe de_novos = pandas.DataFrame(columns=["person_stable_id", "chrom", "pos", "ref", "alt", "symbol", "consequence", "max_af", "pp_dnm"]) de_novos['pos'] = de_novos['pos'].astype(int) denovogear = screen_candidates(args.de_novos, args.sample_fails, filter_denovogear_sites, maf=0.01, fix_symbols=args.fix_missing_genes, annotate_only=args.annotate_only, build=args.build) indels = screen_candidates(args.de_novos_indels, args.sample_fails_indels, filter_missing_indels, maf=0.0001, fix_symbols=args.fix_missing_genes, annotate_only=args.annotate_only, build=args.build) de_novos = de_novos.append(denovogear, ignore_index=True) de_novos = de_novos.append(indels, ignore_index=True) if not args.include_noncoding and not args.annotate_only: de_novos = de_novos[check_coding(de_novos)] if args.last_base_sites is not None: de_novos = change_conserved_last_base_consequence(de_novos, args.last_base_sites) # include sex, to later check if chrX candidates are likely pathogenic. families = pandas.read_table(args.families, sep='\t') sex = dict(zip(families['individual_id'], families['sex'])) de_novos['sex'] = de_novos['person_stable_id'].map(sex) if not args.include_recurrent: family_ids = dict(zip(families['individual_id'], families['family_id'])) independent = check_independence(de_novos, family_ids) if args.annotate_only: de_novos['pass'] = de_novos['pass'] & independent else: de_novos = de_novos[independent] ids = ['DDDP123847', 'DDDP138759', 'DDDP135949', 'DDDP100238', 'DDDP125725', 'DDDP118316'] de_novos = de_novos[~de_novos.person_stable_id.isin(ids)] de_novos.to_csv(args.output, sep= "\t", index=False, na_rep='NA')
def main(): args = get_options() # set a blank dataframe de_novos = pandas.DataFrame(columns=["person_stable_id", "chrom", "pos", "ref", "alt", "symbol", "consequence", "max_af", "pp_dnm"]) denovogear = screen_candidates(args.de_novos, args.sample_fails, filter_denovogear_sites, maf=0.01, fix_symbols=args.fix_missing_genes, annotate_only=args.annotate_only) indels = screen_candidates(args.de_novos_indels, args.sample_fails_indels, filter_missing_indels, maf=0.0, fix_symbols=args.fix_missing_genes, annotate_only=args.annotate_only) de_novos = de_novos.append(denovogear, ignore_index=True) de_novos = de_novos.append(indels, ignore_index=True) if not args.include_noncoding and not args.annotate_only: de_novos = de_novos[check_coding(de_novos)] if args.last_base_sites is not None: de_novos = change_conserved_last_base_consequence(de_novos, args.last_base_sites) # include sex, to later check if chrX candidates are likely pathogenic. families = pandas.read_table(args.families, sep='\t') sex = dict(zip(families['individual_id'], families['sex'])) de_novos['sex'] = de_novos['person_stable_id'].map(sex) if not args.include_recurrent: family_ids = dict(zip(families['individual_id'], families['family_id'])) independent = check_independence(de_novos, family_ids) if args.annotate_only: de_novos['pass'] = de_novos['pass'] & independent else: de_novos = de_novos[independent] de_novos.to_csv(args.output, sep= "\t", index=False, na_value='NA')