示例#1
0
#  where
#    - N is [0-45]
#    - a "SNP with Complete Trio" = SNP has all 6 alleles for a given family
#  IOW: histogram with # of trios on the X-axis (0 to 45) and # of SNPs on the Y-axis
#
# Sanity Checks:
#   - there should be 158 +  12 SNPs with 0 complete trios
#   - running this with all data sets should give same results  (including both the 45-family sets and the 94-family sets)

DATA_FILENAME = "Data_ALL_0s.ped"
import logging
logging.basicConfig(format="%(asctime)s %(levelname)s %(msg)s",
                    level=logging.INFO)

import pedparse
everybody = pedparse.load_file(DATA_FILENAME)

# initialize a list with buckets for 0 to N
# where N is the total number of trios ...
# OR
#complete_trio_buckets = [0] * (len(everybody) + 1)
# N is ttotal number of tros minus the number of known incomplete trios
complete_trio_buckets = [0] * (
    len(everybody) - len(pedparse.FAMILIES_WITH_NO_COMPLETE_TRIOS) + 1)
# "+ 1" is for the 0-bucket

try:
    for snp in xrange(pedparse.TOTAL_SNPS):
        complete_trios = 0
        for fam in everybody.values():
            if fam.count_alleles(snp) == 6:
#!/usr/bin/env python

DATA_FILENAME = "Data_ALL_0s.ped"

import logging
logging.basicConfig(format="%(asctime)s %(levelname)s %(msg)s",
                    level=logging.INFO)
logging.info("Hi there.")

import pedparse

everybody = pedparse.load_file(DATA_FILENAME, limit_trios=1)

logging.info("There are %d trios" % len(everybody))

for fam in everybody.values():
    #print "Family ID: %s == %s" % ( everybody[fam].family_id, fam)
    #for snp in xrange(TOTAL_SNPS):
    #    everybody[fam].is_snp_complete(snp)
    #    #print "SNP %d complete? %s" % ( snp, everybody[fam].is_snp_complete(snp))
    #print "Complete SNP data for %d SNPs" % len(filter(lambda x: x, everybody[fam].snp_completeness.values()))
    counts = fam.do_counts()
    counts['family_id'] = fam.family_id
    counts['complete_pct'] = 100.0 * counts['complete'] / pedparse.TOTAL_SNPS
    counts['empty_pct'] = 100.0 * counts['empty'] / pedparse.TOTAL_SNPS
    counts['partial_pct'] = 100.0 * counts['partial'] / pedparse.TOTAL_SNPS

    logging.info(
        "Family ID: %(family_id)s -- Complete SNP data for %(complete)7d SNPs (%(complete_pct)6.2f%%) - Empty SNP data for %(empty)7d SNPs (%(empty_pct)6.2f%%) - Partial SNP data for %(partial)7d SNPs (%(partial_pct)6.2f%%)"
        % counts)
#!/usr/bin/env python

DATA_FILENAME = "Data_ALL_0s.ped"
import logging
logging.basicConfig(format="%(asctime)s %(levelname)s %(msg)s", level=logging.INFO)

import pedparse
everybody = pedparse.load_file(DATA_FILENAME)

#snps_with_complete_trios = []
snps_with_no_complete_trios = []

# here, snp's are just line numbers  - they have names, which we can find later in the .map file ... 
for snp in xrange(pedparse.TOTAL_SNPS):
    if snp % 500 == 0:
        logging.debug("Processing SNP # %d" % snp)
    for fam in everybody.values():
        if fam.count_alleles(snp) == 6:
            #snps_with_complete_trios += [snp]
            break
    else:
        # exhausted the family for loop, didn't find any complete trios for
        # the current SNP .. add this one to the "no complete trios" list
        snps_with_no_complete_trios += [snp]

logging.info("Found %d SNPs with NO complete trios" % len(snps_with_no_complete_trios))
logging.info("List of SNPs with NO complete trios: %s" % snps_with_no_complete_trios)

snps_with_no_complete_trios_but_some_data =  snps_with_no_complete_trios[:]

for snp in snps_with_no_complete_trios:
#!/usr/bin/env python

DATA_FILENAME = "Data_ALL_0s.ped"

import logging
logging.basicConfig(format="%(asctime)s %(levelname)s %(msg)s", level=logging.INFO)
logging.info("Hi there.")

import pedparse
      
everybody = pedparse.load_file(DATA_FILENAME, limit_trios=1)

logging.info( "There are %d trios" % len(everybody))

for fam in everybody.values():
    #print "Family ID: %s == %s" % ( everybody[fam].family_id, fam)
    #for snp in xrange(TOTAL_SNPS):
    #    everybody[fam].is_snp_complete(snp)
    #    #print "SNP %d complete? %s" % ( snp, everybody[fam].is_snp_complete(snp))
    #print "Complete SNP data for %d SNPs" % len(filter(lambda x: x, everybody[fam].snp_completeness.values()))
    counts = fam.do_counts()
    counts['family_id'] = fam.family_id
    counts['complete_pct'] = 100.0 * counts['complete'] / pedparse.TOTAL_SNPS
    counts['empty_pct'] = 100.0 * counts['empty'] / pedparse.TOTAL_SNPS
    counts['partial_pct'] = 100.0 * counts['partial'] / pedparse.TOTAL_SNPS
    
    logging.info( "Family ID: %(family_id)s -- Complete SNP data for %(complete)7d SNPs (%(complete_pct)6.2f%%) - Empty SNP data for %(empty)7d SNPs (%(empty_pct)6.2f%%) - Partial SNP data for %(partial)7d SNPs (%(partial_pct)6.2f%%)" % counts)