def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1] * ngenos) a.genotypes[0][1] = Alleles([2] * ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [ InheritanceSpan(a, 0, 0, 0, ngenos // 2), InheritanceSpan(b, 0, 1, ngenos // 2, ngenos) ] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1]*ngenos) a.genotypes[0][1] = Alleles([2]*ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2), InheritanceSpan(b, 0, 1, ngenos//2, ngenos)] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1]*(ngenos//2) + [4] * (ngenos//2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def __init__(self, label=None): """ Create a pedigree. :param label: pedigree label """ Population.__init__(self) self.label = label self.kinmat = {} self.fratmat = {}
def test_labelledalleles(): IS = InheritanceSpan ngenos = 50 p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c) expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c) assert actual == expected
def _vcf_parseheader(fileobj): pop = Population() for line in fileobj: if line.startswith('##'): continue elif line.startswith('#'): ind_ids = line.strip().split()[9:] inds = [Individual(pop, ind_id) for ind_id in ind_ids] for ind in inds: pop.register_individual(ind) return pop, inds else: raise FileFormatError("No header line in VCF")
def read_beagle(genofile, markerfile): ''' Reads BEAGLE formatted genotype data :param genofile: Filename containing genotype information for individuals :param markerfile: Filename containing marker location and allele information corresponding to genofile :type genofile: string :type markerfile: string :rtype: Population ''' pop = Population() chrom = read_beagle_markerfile(markerfile) chrom.finalize() pop.chromosomes.add_chromosome(chrom) read_beagle_genotypefile(genofile, pop) return pop
def read_ped(filename, population=None, delimiter=None, affected_labels=None, population_handler=None, data_handler=None, connect_inds=True, onlyinds=None): """ Reads a plink format pedigree file, ie: :: familyid indid father mother sex whatever whatever whatever into a pydigree pedigree object, with optional population to assign to pedigree members. If you don't provide a population you can't simulate genotypes! :param filename: The file to be read :param population: The population to assign individuals to :param delimiter: a string defining the field separator, default: any whitespace :param affected_labels: The labels that determine affection status. :param population_handler: a function to set up the population :param data_handler: a function to turn the data into useful individual information :param connect_inds: build references between individuals. Requires all individuals be present in the file :param onlyinds: only include data for specified individuals :type filename: string :type population: Population :type delimiter: string :type affected_labels: dict (str -> value) :type data_handler: callable :type connect_inds: bool :type onlyinds: iterable :returns: individuals contained in the pedigree file :rtype: PedigreeCollection """ if not affected_labels: affected_labels = { '1': 0, '2': 1, 'A': 1, 'U': 0, 'X': None, '-9': None } if not isinstance(data_handler, Callable): data_handler = lambda *x: None if not isinstance(population_handler, Callable): population_handler = lambda *x: None population = Population() if population is None else population p = Pedigree() population_handler(p) # Step 1: Read the data and create the individuals with smartopen(filename) as f: # Parse the lines in the file for line in f: rec = PEDRecord(line, delimiter) if onlyinds and (rec.ind_id not in onlyinds): continue ind = rec.create_individual(population) ind.pedigree = p ind.phenotypes['affected'] = affected_labels.get(rec.aff, None) p[ind.label] = ind if rec.data: data_handler(p[ind.label], rec.data) # Step 2: Create the between-individual relationships # Fix the individual-level data: individuals currently only have parent-ids # in their parent fields and not references to actual individuals if connect_inds: connect_individuals(p) # Step 3: Separate the individuals into pedigrees pc = sort_pedigrees(p.individuals, population_handler) return pc
def __init__(self, label=None): Population.__init__(self) self.label = label self.kinmat = {} self.fratmat = {}
def read_vcf(filename, require_pass=False, freq_info=None, info_filters=None): ''' Reads a VCF file and returns a Population object with the individuals represented in the file ''' if not info_filters: info_filters = [] for filter in info_filters: if not callable(filter): raise ValueError('Filter not callable') with open(filename) as f: pop = Population() last_chrom = None genotypes = [] for i, line in enumerate(f): if line.startswith('##'): continue elif line.startswith('#'): ind_ids = line.strip().split()[9:] inds = [Individual(pop, ind_id) for ind_id in ind_ids] for ind in inds: pop.register_individual(ind) break for i, line in enumerate(f): record = VCFRecord(line) if info_filters and not all(filter(record) for filter in info_filters): continue if require_pass and not record.filter_passed: continue if record.chrom != last_chrom: if last_chrom is not None: chromobj.finalize() pop.add_chromosome(chromobj) chromobj = ChromosomeTemplate(label=record.chrom) if freq_info is not None and freq_info in record.info: freq = record.info[freq_info] if ',' in freq: freq = freq.split(',')[0] freq = float(freq) else: freq = 0 genorow = record.genotypes() genotypes.append(genorow) chromobj.add_genotype(bp=record.pos, label=record.label, frequency=freq) last_chrom = record.chrom chromobj.finalize() pop.add_chromosome(chromobj) for ind in inds: # Initialize new genotypes ind._init_genotypes(sparse=True) # Now actually sift through markers and assign them to individuals final_indices = [] for chromidx, chromobj in enumerate(pop.chromosomes): indices = zip([chromidx]*chromobj.nmark(), range(chromobj.nmark())) final_indices.extend(indices) raw_indices = range(len(genotypes)) for raw, final in zip(raw_indices, final_indices): chromidx, markidx = final row = genotypes[raw] assign_genorow(row, inds, chromidx, markidx) # Kill the row so we don't end up with the whole dataset in memory twice genotypes[raw] = None return pop