示例#1
0
    def test_roh_mhmm_100pct(self):

        # values correspond to start/stop/length/is_marginal
        roh_expected = np.array([[1, 100, 100, True]], dtype=object)
        fraction_expected = 1.0
        gv = np.zeros((4, 2), dtype=np.int16)
        pos = [1, 10, 50, 100]
        roh, fraction = allel.roh_mhmm(gv, pos, contig_size=100)
        aeq(roh.values, roh_expected)
        assert fraction == fraction_expected
示例#2
0
    def test_roh_mhmm_0pct(self):

        fraction_expected = 0.0

        gv = np.zeros((4, 2), dtype=np.int16)
        gv[2, 0] = 1

        pos = [1, 10, 50, 100]
        roh, fraction = allel.roh_mhmm(gv, pos, contig_size=100)
        assert roh.shape[0] == 0
        assert fraction == fraction_expected
示例#3
0
def main():
    if len(sys.argv) != 3:
        print('vcf-file-name output-file')
        sys.exit(1)
    vcfFileName = sys.argv[1]
    outputFile = sys.argv[2]

    logger.debug('reading ' + vcfFileName)
    vcf = allel.read_vcf(vcfFileName,
                         fields=[
                             'samples', 'calldata/GT', 'variants/ALT',
                             'variants/CHROM', 'variants/FILTER_PASS',
                             'variants/ID', 'variants/POS', 'variants/QUAL',
                             'variants/REF', 'variants/INFO'
                         ])

    logger.debug('reading data into genotype array')
    genoArray = allel.GenotypeArray(vcf['calldata/GT'])
    '''logger.debug('counting allele frequencies')
    alleleFrequency = genoArray.count_alleles().to_frequencies()

    logger.debug('calculating expected heterozygosity')
    expectedHeterozygosity = allel.heterozygosity_expected(alleleFrequency, ploidy=2)
    #print('expected: ' + str(expectedHeterozygosity))

    logger.debug('calculating observed heterozygosity')
    observedHeterozygosity = allel.heterozygosity_observed(genoArray)
    #print('observed: ' + str(observedHeterozygosity))

    logger.debug('calculating inbreeding coefficient')
    inbreedingCoefficient = allel.inbreeding_coefficient(genoArray)
    #print('ibc: ' + str(inbreedingCoefficient))

    logger.debug('calculating delta heterozygosity')
    diff = list()
    for i in range(len(expectedHeterozygosity)):
        diff.append(expectedHeterozygosity[i] - observedHeterozygosity[i])'''

    logger.debug('finding runs of homozygosity')
    numVariants = len(genoArray)
    numSamples = len(vcf['samples'])
    posArray = np.asarray([i for i in range(numVariants)])
    isAccessible = np.asarray([True for i in range(numVariants)])
    runsOfHomozygosity = dict()
    for i in range(numSamples):
        genoVector = genoArray[:, i]
        roh = allel.roh_mhmm(genoVector, posArray, is_accessible=isAccessible)
        print('roh = ' + str(roh))
        if not roh[0].empty:
            runsOfHomozygosity[i] = dict()
            runsOfHomozygosity[i]['confidence'] = float(roh[1])
            for j in range(len(roh[0])):
                runsOfHomozygosity[i][j] = dict()
                runsOfHomozygosity[i][j]['start'] = int(
                    roh[0].iloc[j]['start'])
                runsOfHomozygosity[i][j]['stop'] = int(roh[0].iloc[j]['stop'])
                runsOfHomozygosity[i][j]['is_marginal'] = bool(
                    roh[0].iloc[j]['is_marginal'])
    '''logger.debug('saving ibc.txt')
    np.savetxt(outputDir + '/ibc.txt', inbreedingCoefficient)

    logger.debug('saving zygosityDelta.txt')
    with open(outputDir + '/zygosityDelta.txt', 'w') as f:
        for item in diff:
            f.write("%s\n" % item)
    f.close()'''

    logger.debug('writing to ' + outputFile)
    #roh = json.dumps(runsOfHomozygosity)
    with open(outputFile, 'w') as f:
        json.dump(runsOfHomozygosity, f)
    f.close()