Exemplo n.º 1
0
def test_chromosometemplate():
    # Test the marker finder
    c = ChromosomeTemplate()
    for i in range(1, 100):
        c.add_genotype(map_position=i, bp=(i * 1e6))

    assert c.closest_marker(0) == 0
    assert c.closest_marker(5000001) == 4
    assert c.closest_marker(5999999) == 5
    assert c.closest_marker(1e10) == c.nmark() - 1
def test_chromosometemplate():
	# Test the marker finder
	c = ChromosomeTemplate()
	for i in range(1,100):
		c.add_genotype(map_position=i, bp=(i*1e6))

	assert c.closest_marker(0) == 0
	assert c.closest_marker(5000001) == 4
	assert c.closest_marker(5999999) == 5
	assert c.closest_marker(1e10) == c.nmark() - 1 
Exemplo n.º 3
0
def read_vcf(filename, require_pass=False, freq_info=None):
    """
    Reads a VCF file and returns a Population object with the
    individuals represented in the file
    
    Genotypes generated by this function will be sparse

    :param require_pass: only allow variants with PASS under FILTER
    :type require_pass: bool
    :param freq_info: INFO field to get allele frequency from
    :param freq_info: string

    :returns: Individuals in the VCF
    :rtype: Population
    """
    with smartopen(filename) as f:

        genotypes = []

        pop, inds = _vcf_parseheader(f)

        last_chrom = None
        chromobj = None

        for line in f:
            record = VCFRecord(line)

            if require_pass and not record.filter_passed:
                continue

            if record.chrom != last_chrom:
                if last_chrom is not None:
                    pop.add_chromosome(chromobj)
                chromobj = ChromosomeTemplate(label=record.chrom)

            if freq_info is not None:
                freq = _vcf_get_infofreq(record.info, freq_info)
            else:
                freq = 0

            genorow = record.genotypes()
            genotypes.append(genorow)

            chromobj.add_genotype(bp=record.pos,
                                  label=record.label,
                                  frequency=freq)

            last_chrom = record.chrom

        pop.add_chromosome(chromobj)
        pop.chromosomes.finalize()
    for ind in inds:
        # Initialize new genotypes
        ind._init_genotypes(sparse=True)

    # Now actually sift through markers and assign them to individuals
    final_indices = []
    for chromidx, chromobj in enumerate(pop.chromosomes):
        indices = zip([chromidx] * chromobj.nmark(), range(chromobj.nmark()))
        final_indices.extend(indices)

    raw_indices = range(len(genotypes))

    for raw, final in zip(raw_indices, final_indices):
        chromidx, markidx = final
        row = genotypes[raw]
        assign_genorow(row, inds, chromidx, markidx)

        # Kill the row so we don't end up with the whole dataset in memory twice
        genotypes[raw] = None

    return pop
Exemplo n.º 4
0
def read_vcf(filename, require_pass=False, freq_info=None, info_filters=None):
    '''
    Reads a VCF file and returns a Population object with the
    individuals represented in the file
    '''
    if not info_filters:
        info_filters = []

    for filter in info_filters:
        if not callable(filter):
            raise ValueError('Filter not callable')

    with open(filename) as f:
        pop = Population()

        last_chrom = None
        genotypes = []

        for i, line in enumerate(f):

            if line.startswith('##'):
                continue

            elif line.startswith('#'):
                ind_ids = line.strip().split()[9:]
                inds = [Individual(pop, ind_id) for ind_id in ind_ids]
                for ind in inds:
                    pop.register_individual(ind)

                break
        
        for i, line in enumerate(f):
            record = VCFRecord(line)

            if info_filters and not all(filter(record) for filter in info_filters):
                continue

            if require_pass and not record.filter_passed:
                continue

            if record.chrom != last_chrom:
                if last_chrom is not None:
                    chromobj.finalize()
                    pop.add_chromosome(chromobj)
                chromobj = ChromosomeTemplate(label=record.chrom)


            if freq_info is not None and freq_info in record.info:
                freq = record.info[freq_info]
                if ',' in freq:
                    freq = freq.split(',')[0]
                freq = float(freq)
            else:
                freq = 0

            genorow = record.genotypes()
            genotypes.append(genorow)

            chromobj.add_genotype(bp=record.pos,
                                  label=record.label,
                                  frequency=freq)

            last_chrom = record.chrom

        chromobj.finalize()
        pop.add_chromosome(chromobj)

    for ind in inds:
        # Initialize new genotypes
        ind._init_genotypes(sparse=True)

    # Now actually sift through markers and assign them to individuals
    final_indices = []
    for chromidx, chromobj  in enumerate(pop.chromosomes):
        indices = zip([chromidx]*chromobj.nmark(), range(chromobj.nmark()))
        final_indices.extend(indices)

    raw_indices = range(len(genotypes))

    for raw, final in zip(raw_indices, final_indices):
        chromidx, markidx = final
        row = genotypes[raw]
        assign_genorow(row, inds, chromidx, markidx)

        # Kill the row so we don't end up with the whole dataset in memory twice
        genotypes[raw] = None
    
    return pop