savesort.write("\t" + str(used)) savesort.write("\t" + str(skipped)) savesort.write("\t" + str(split)) savesort.write("\n") writeTrees(trees, label) savesort.close() codeToSampleMap = getCodeToSampleMap() #Read in the 'optim' input files, and parse them to figure out where the splits are. allsets, labelSamples = readOptimInputs(codeToSampleMap) inputSplits = getOptimInputSplits(allsets) #Read in the deletions/CNVs (patientSampleMap, samplePatientMap) = lps.getPatientSampleMap() deletions, CNVs = lps.loadDeletionsAndCNVs(samplePatientMap) #Read in the mutations mutations = readMutations() sortMutations(mutations, allsets, inputSplits, deletions, CNVs, labelSamples) #writeAllSampleVAFs(mutations, patientSampleMap, deletions) count = 0 countByCall = {} for patient in mutations: for chrom in mutations[patient]: for pos in mutations[patient][chrom]: if len(list(mutations[patient][chrom][pos])) > 1:
localmin = val elif direction == "down": if val < localmin: localmin = val distance = 0 else: distance += 1 if distance >= 40 and val - localmin > 0.25: #print("Switched directions: going up at", key, distance, localmax, val) direction = "up" localmax = val maxkey = key return ret (patientSampleMap, samplePatientMap) = lsl.getPatientSampleMap( dipvtet_file="calling_evidence_odds.tsv") deletions, CNVs = lsl.loadDeletionsAndCNVs(samplePatientMap) summary = open("summary_smoothed_and_fit.tsv", "w") summary.write("Patient") summary.write("\tSample") summary.write("\tnPoints") summary.write("\tCall") summary.write("\tGroup") #summary.write("\tMean x2") #summary.write("\tStdev x2") summary.write("\tHistMax x2") #summary.write("\tHistMax height") summary.write("\tFitNormal x2") summary.write("\tFitNormal weight") summary.write("\tHistMax x2")
False if not. """ if patient not in deletions: return False if sample not in deletions[patient]: return False if chrom not in deletions[patient][sample]: return False for (start, end) in deletions[patient][sample][chrom]: if start <= pos and end >= pos: return True return False mutations = {} (__, samplePatientMap) = lsl.getPatientSampleMap() patientSampleMap = {} with open(mutation_file, 'r') as csvfile: for lvec in csv.reader(csvfile): if "DNANum" in lvec[0]: continue (sample, __, __, chr, pos, ref, alt, is_snv, is_2p) = lvec[0:9] if (is_snv == "f"): continue if (is_2p == "f"): continue # if ("N" in sample): # continue refcnt = int(lvec[-2]) bafcnt = int(lvec[-1])
) for patient in groupdata: for samples in groupdata[patient]: outfile.write(patient) outfile.write("\t" + groupdata[patient][samples]["matches_tree"]) outfile.write("\t" + str(groupdata[patient][samples]["count"])) outfile.write("\t" + str(groupdata[patient][samples]["percentage"])) outfile.write("\t" + str(groupdata[patient][samples]["cnv_count"])) outfile.write("\t" + str(groupdata[patient][samples]["cnv_percentage"])) for sample in samples: outfile.write("\t" + sample) outfile.write("\n") outfile.close() patientSampleMap, __ = lsl.getPatientSampleMap() #Now do some analysis types = ["Singleton", "Root", "Grouped", "Ungrouped"] outfile = open(groupdir + outfilename, "w") outfile.write("Patient\tSubclone SNV Threshhold\tnSNVmax\tGD Samples") for type in types: outfile.write("\t" + type + " counts") outfile.write("\t" + type + " total") outfile.write( "\tUngrouped potential subclone counts\tUngrouped potential subclone total\n" ) for patient in groupdata: smallestSNVcount = 100000 maxSNVcount = 0 GDsamples = set() for sample in patientSampleMap[patient]:
print("Invalid match grid for patient", patient, "samples", str(samples), "at chr", str(chr), str(segpair)) return (Nvec, Svec, allBalanced) #Main routine: CNlist = [] for (_, _, f) in walk(CN_input): CNlist += f BAFlist = [] for bafin in BAF_input: for (_, _, f) in walk(bafin): BAFlist += f (s2p, p2s) = lsl.getPatientSampleMap(dipvtet_file) for patient in p2s: if somepatientsonly and patient not in somepatients: continue samples = p2s[patient] samples.sort() segments = getSegmentCalls(patient, samples, s2p, CNlist) allA = open(BEAST_output + patient + "_allA.txt", "w") allB = open(BEAST_output + patient + "_allB.txt", "w") writeHeader(allA, samples) writeHeader(allB, samples) chrs = list(segments.keys()) chrs.sort() for chr in chrs: shouldSwitch = True prevN = [-1] * len(samples)