print 'Pedigree file must contain only case pedigrees for TDT.' sys.exit(1) if TEST == "fbat" and 1 not in pedPhenoDict.values(): print 'Pedigree file must contain control pedigrees for FBAT.' sys.exit(1) #read feature matrix one line at a time. First two lines have been read above for pedigree ids and member type for line in fmFile: #create new marker object #chrM and chrY are not tested if line.startswith('chrM') or line.startswith('chr25') or line.startswith('chrY') or line.startswith('chr24'): continue elif line.startswith('chrX') or line.startswith('chr23'): thisMarker = ChrXMarker() else: #TODO: more thorough check for validity of data format, like chr numbers?? thisMarker = AutosomalMarker() #get vcf columns vcfValues = line.strip().split('\t') #assert that all genotype values are numeric #TODO: verify that this assert works assert(all(v.isdigit() or v=="NA" for v in vcfValues[1:])) #1st column is variant id, 2nd onwards are sample genotypes #set this marker object's sample values thisMarker.markerID = vcfValues[0] thisMarker.getSampleGenotypes(vcfValues[1:],pedMemberIndices) #Note: thought of checking if chrX marker has heterozygous males, but it's not possible since the feature matrix comes in with encoded genotypes.
print 'Pedigree file must contain only case pedigrees for TDT.' sys.exit(1) if TEST == "fbat" and 1 not in NBPhenoDict.values(): print 'Pedigree file must contain control pedigrees for FBAT.' sys.exit(1) #read feature matrix one line at a time. First two lines have been read above for pedigree ids and member type for line in fmFile: #create new marker object #chrM and chrY are not tested if line.startswith('chrM') or line.startswith('chr25') or line.startswith('chrY') or line.startswith('chr24'): continue elif line.startswith('chrX') or line.startswith('chr23'): thisMarker = ChrXMarker() else: #TODO: more thorough check for validity of data format, like chr numbers?? thisMarker = AutosomalMarker() #get vcf columns vcfValues = line.strip().split('\t') #assert that all genotype values are numeric #TODO: verify that this assert works assert(all(v.isdigit() or v=="NA" for v in vcfValues[1:])) #1st column is variant id, 2nd onwards are sample genotypes #set this marker object's sample values thisMarker.markerID = vcfValues[0] thisMarker.getPedGenotypes(vcfValues[1:],sampleIDs,fIDs,mIDs) #Note: thought of checking if chrX marker has heterozygous males, but it's not possible since the feature matrix comes in with encoded genotypes.