def test_open_cnv_regions(self): ''' test that open_cnv_regions() works correctly ''' lines = ['id_syndrome_feature\tid_syndrome\tcopy_number\tchr_start\tchr_end\tchr\n', '20\t1\t1\t1569197\t2110236\t4\tNA\t2650330\t149066\t1t\n'] lines = [ x.encode('utf8') for x in lines ] self.temp.writelines(lines) self.temp.flush() self.assertEqual(open_cnv_regions(self.temp.name), {('4', '1569197', '2110236'): '1'})
def test_open_cnv_regions(self): ''' test that open_cnv_regions() works correctly ''' lines = [ 'id_syndrome_feature\tid_syndrome\tcopy_number\tchr_start\tchr_end\tchr\n', '20\t1\t1\t1569197\t2110236\t4\tNA\t2650330\t149066\t1t\n' ] lines = [x.encode('utf8') for x in lines] self.temp.writelines(lines) self.temp.flush() self.assertEqual(open_cnv_regions(self.temp.name), {('4', '1569197', '2110236'): '1'})
def __init__(self, population_tags=None, count=0, known_genes=None, date=None, regions=None, lof_sites=None, pp_filter=0.0, sum_x_lr2_file=None, output_path=None, export_vcf=None, debug_chrom=None, debug_pos=None): """ initialise the class object Args: population_tags: list of population ID tags, that could exist within the INFO field, or None. count: number of probands to analyse, helpful for tracking progress in output logs. known_genes: path to table of genes genes known to be associated with genetic disorders, or None. date: date of the known_genes file, or None if not using/unknown. regions: path to a table of regions for DECIPHER CNV syndromes. lof_sites: path to json file of [chrom, position] coordinates in genome, for modifying to a loss-of-function consequence if required. Can be None if unneeded. pp_filter: threshold from 0 to 1 for pp_dnm value to filter out candidiate DNMs which fall below this value sum_x_lr2_file: File containing sum of l2r values on x chromosome for each person output_path: path to write output tab-separated file to export_vcf: path to file or folder to write VCFs to. debug_chrom: chromosome for debugging purposes. debug_pos: position for debugging variant filtering at. """ self.pp_filter = pp_filter self.total = count self.count = 0 self.populations = population_tags self.debug_chrom = debug_chrom self.debug_pos = debug_pos # open reference datasets, these return None if the paths are None self.known_genes = open_known_genes(known_genes) self.cnv_regions = open_cnv_regions(regions) self.last_base = open_last_base_sites(lof_sites) #open file containing sum of mean log 2 ratios on X, returns an empty dict if path is None self.sum_x_lr2 = open_x_lr2_file(sum_x_lr2_file) self.reporter = Report(output_path, export_vcf, date)
def load_definitions_files(self): """loads all the config files for the script (eg filters, gene IDs) """ # if we have named a gene file, then load a dictionary of genes, and # add them to the filters, so we can screen variants for being in genes # known to be involved with disorders self.known_genes = None if self.options.genes is not None: self.known_genes = open_known_genes(self.options.genes) # if we have named an ID mapping file, the load a dictionary of IDs and # alternate IDs, so we can convert between different ID schemes. self.ID_mapper = None if self.options.alternate_ids is not None: self.ID_mapper = create_person_ID_mapper(self.options.alternate_ids) # open a list of regions associated with DECIPHER syndromes self.cnv_regions = None if self.options.regions is not None: self.cnv_regions = open_cnv_regions(self.options.regions)
def load_definitions_files(self): """loads all the config files for the script (eg filters, gene IDs) """ # if we have named a gene file, then load a dictionary of genes, and # add them to the filters, so we can screen variants for being in genes # known to be involved with disorders self.known_genes = None if self.options.genes is not None: self.known_genes = open_known_genes(self.options.genes) # if we have named an ID mapping file, the load a dictionary of IDs and # alternate IDs, so we can convert between different ID schemes. self.ID_mapper = None if self.options.alternate_ids is not None: self.ID_mapper = create_person_ID_mapper( self.options.alternate_ids) # open a list of regions associated with DECIPHER syndromes self.cnv_regions = None if self.options.regions is not None: self.cnv_regions = open_cnv_regions(self.options.regions)