def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1]*ngenos) a.genotypes[0][1] = Alleles([2]*ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [InheritanceSpan(a, 0, 0, 0, ngenos//2), InheritanceSpan(b, 0, 1, ngenos//2, ngenos)] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1]*(ngenos//2) + [4] * (ngenos//2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def mate(self, pop=None, sex=None, label=None): """ Generate offspring from the clique. If more than one father or mother is available, they are randomly selected. :param pop: Population for the new individual :param sex: Sex of the offspring if specified, otherwise offspring randomly sex is chosen :param label: Label for the offspring individual :type pop: Population :type sex: 0,1 :returns: offspring individual :rtype: Individual """ if not self.children_possible(): raise ValueError("Children not possible from this clique") fa = self.get_male() ma = self.get_female() if sex is None: sex = np.random.randint(0, 2) child = Individual(pop, label, fa, ma, sex) return child
def founder_individual(self, register=True, sex=None): "Creates a new founder individual and adds to the population" if sex is not None: sex = sex.lower() sexd = {'m': 0, 'f': 1, None: np.random.choice([0, 1])} i = Individual(self, self.size(), None, None, sexd[sex]) if register: self.register_individual(i) return i
def test_labelledalleles(): IS = InheritanceSpan ngenos = 50 p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) actual = LabelledAlleles.founder_chromosome(a, 0, 0, chromobj=c) expected = LabelledAlleles(spans=[IS(a, 0, 0, 0, ngenos)], chromobj=c) assert actual == expected
def _vcf_parseheader(fileobj): pop = Population() for line in fileobj: if line.startswith('##'): continue elif line.startswith('#'): ind_ids = line.strip().split()[9:] inds = [Individual(pop, ind_id) for ind_id in ind_ids] for ind in inds: pop.register_individual(ind) return pop, inds else: raise FileFormatError("No header line in VCF")
def mate(self, ind1, ind2, indlab, sex=None): """ Creates an individual as the child of two specificied individual objects and randomly chooses a sex. :param ind1: The first parent :param ind2: The second parent :type ind1: Individual :type ind2: Individual :param indlab: ID label for the child :param sex: Sex of child, randomly chosen if not specified :type sex: {0,1} :return: An individual with ind1 and ind2 as parents :rtype: Individual """ if sex is None: sex = np.random.choice([0, 1]) child = Individual(self, indlab, ind1, ind2, sex) return child
def read_beagle_genotypefile(filename, pop, missingcode='0'): ''' Reads BEAGLE formatted genotype files Arguments :param filename: Filename of BEAGLE genotype file :param pop: the population to add these individuals to :param missingcode: The value that indicates a missing genotype :type missingcode: string :rtype: void ''' with smartopen(filename) as f: for line in f: rec = BeagleGenotypeRecord(line) if rec.identifier == 'I': inds = [Individual(pop, label) for label in rec.data[::2]] elif rec.is_phenotype_record: for ind, pheno_status in zip(inds, rec.data[::2]): if rec.identifier == 'A': pheno_status = pheno_status == '2' else: try: pheno_status = float(pheno_status) except ValueError: pass ind.phenotypes[rec.label] = pheno_status else: # We've reached the genotypes, and we're skipping out break f.seek(0) gtrows = [ list(grouper(BeagleGenotypeRecord(x).data, 2)) for x in f if x.startswith('M') ] genotypes = zip(*gtrows) for ind, sequentialalleles in zip(inds, genotypes): ind.genotypes = gt_from_seq(ind.chromosomes, sequentialalleles, missing_code=missingcode)
def create_individual(self, population=None): """ Creates an Individual object from a Pedigree Record. The individual will have the id tuple of (fam_id, ind_id) :param population: Population for the individual to belong to :type population: Population :rtype: Individual """ # Give a special ind_id for now to prevent overwriting duplicated # ind_ids between families temp_id = (self.fam, self.ind_id) ind = Individual(population, temp_id, self.fa, self.mo, sex_codes[self.sex]) return ind
def next_generation(self, pop, gensize): """ Create individuals for the next generation by random mating :param pop: Parent population :param gensize: Size of next generation :type pop: Population :type gensize: int """ males = pop.males() females = pop.females() fathers = np.random.randint(0, len(males), gensize) mothers = np.random.randint(0, len(females), gensize) sexes = np.random.randint(0, 2, gensize) progeny = [ Individual(pop, i, males[fathers[i]], females[mothers[i]], sexes[i]) for i in range(gensize) ] return progeny
def test_labelledallele_delabeler(): ngenos = 10 # Number of genotypes per chromosome if ngenos % 2 == 1: raise ValueError('Even number of genotypes needed') p = Population() c = ChromosomeTemplate() for i in range(ngenos): c.add_genotype() p.add_chromosome(c) a = Individual(p, 1) a._init_genotypes(blankchroms=False) a.genotypes[0][0] = Alleles([1] * ngenos) a.genotypes[0][1] = Alleles([2] * ngenos) b = Individual(p, 2) b._init_genotypes(blankchroms=False) b.genotypes[0][0] = Alleles([3] * ngenos) b.genotypes[0][1] = Alleles([4] * ngenos) chromatid_spans = [ InheritanceSpan(a, 0, 0, 0, ngenos // 2), InheritanceSpan(b, 0, 1, ngenos // 2, ngenos) ] chromatid = LabelledAlleles(spans=chromatid_spans, chromobj=c) expected_value = [1] * (ngenos // 2) + [4] * (ngenos // 2) expected_value = Alleles(expected_value) actual_value = chromatid.delabel() assert all(actual_value == expected_value)
def read_ped(filename, population=None, delimiter=None, affected_labels=None, population_handler=None, data_handler=None, connect_inds=True, onlyinds=None): """ Reads a plink format pedigree file, ie: familyid indid father mother sex whatever whatever whatever into a pydigree pedigree object, with optional population to assign to pedigree members. If you don't provide a population you can't simulate genotypes! Arguments ----- filename: The file to be read population: The population to assign individuals to delimiter: a string defining the field separator, default: any whitespace affected_labels: The labels that determine affection status. population_handler: a function to set up the population data_handler: a function to turn the data into useful individual information connect_inds: build references between individuals. Requires all individuals be present in the file onlyinds: a list of individuals to be processed, allows skipping parts of a file Returns: An object of class PedigreeCollection """ sex_codes = {'1': 0, '2': 1, 'M': 0, 'F': 1, '0': None, '-9': None} if not affected_labels: affected_labels = {'1': 0, '2': 1, 'A': 1, 'U': 0, 'X': None, '-9': None} # Tries to get a phenotype and returns unknown on failure def getph(ph): try: return affected_labels[ph] except KeyError: return None population = Population() p = Pedigree() if isinstance(population_handler, Callable): population_handler(p) pc = PedigreeCollection() with open(filename) as f: # Parse the lines in the file for line in f: split = line.strip().split(delimiter) if len(split) > 5: fam, id, fa, mo, sex, aff = split[0:6] elif len(split) == 5: fam, id, fa, mo, sex = split[0:5] aff = None # Give a special id for now, to prevent overwriting duplicated # ids between families id = (fam, id) if onlyinds and (id not in onlyinds): continue p[id] = Individual(population, id, fa, mo, sex) p[id].phenotypes['affected'] = getph(aff) p[id].pedigree = p p[id].sex = sex_codes[p[id].sex] if isinstance(data_handler, Callable) and len(split) > 6: data = split[6:] data_handler(p[id], data) # Fix the individual-level data if connect_inds: for ind in p.individuals: fam, id = ind.label # Actually make the references instead of just pointing at strings ind.father = p[(fam, ind.father)] if ind.father != '0' else None ind.mother = p[(fam, ind.mother)] if ind.mother != '0' else None ind.register_with_parents() # Place individuals into pedigrees pedigrees = {} for ind in p.individuals: if ind.label[0] not in pedigrees: pedigrees[ind.label[0]] = [] pedigrees[ind.label[0]].append(ind) for pedigree_label, ped_inds in list(pedigrees.items()): ped = Pedigree(label=pedigree_label) if isinstance(population_handler, Callable): population_handler(ped) for ind in ped_inds: ind.label = ind.label[1] ped[ind.label] = ind ind.population = ped ind.pedigree = ped pc[pedigree_label] = ped return pc