def main(): assert (len(argv) == 2), "need the sampleID and nothing else" sampleId = argv[1] saveConvergence = True explainFailure = True path = "kmer_histograms" # clear the convergence file, in case we have a failure (we don't want # previous results to leak through) if (saveConvergence): f = file(path+"/"+sampleId+".mixed.fit","wt") f.close() # perform haploid fit to the haploid component hFitter = HaploidFitter(path+"/"+sampleId+".haploid_from_mixed.kmer_dist") hParamNames = hFitter.paramNames hFitParams = hFitter.fit() if (hFitParams == None): print "(haploid: failure or non-convergence)" if (explainFailure): print "... return code ..." print hFitter.retCode print "... stdout ..." print hFitter.stdout print "... stderr ..." print hFitter.stderr else: print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:") # perform diploid fit to the diploid component dFitter = DiploidFitter(path+"/"+sampleId+".diploid_from_mixed.kmer_dist") dParamNames = dFitter.paramNames dFitParams = dFitter.fit() if (dFitParams == None): print "(diploid: failure or non-convergence)" if (explainFailure): print "... return code ..." print dFitter.retCode print "... stdout ..." print dFitter.stdout print "... stderr ..." print dFitter.stderr else: print params_to_text(dParamNames,dFitParams,prefix="cvrg.diploid:") # create an initial vector for the enrichment model, combining elements # from the component fits with the usual defaults hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist") hdParamNames = hdFitter.paramNames hdDefaultParams = hdFitter.default_params() if (hdDefaultParams == None): print "(hap-dip: failed to get default params)" if (explainFailure): print "... return code ..." print hdFitter.retCode print "... stdout ..." print hdFitter.stdout print "... stderr ..." print hdFitter.stderr else: print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:") assert (hFitParams != None) and (dFitParams != None) and (hdDefaultParams != None), \ "(no point in trying to fit the hap-dip model)" hdInitParams = {} hdInitParams["zp.copy.y" ] = hFitParams["zp.copy"] hdInitParams["zp.copy.hom"] = dFitParams["zp.copy"] hdInitParams["zp.copy.het"] = dFitParams["zp.copy.het"] hdInitParams["p.e" ] = hFitParams["p.e"] hdInitParams["shape.e" ] = hFitParams["shape.e"] hdInitParams["scale.e" ] = hFitParams["scale.e"] hdInitParams["p.y" ] = hdDefaultParams["p.y"] hdInitParams["u.y" ] = hFitParams["u.v"] hdInitParams["sd.y" ] = hFitParams["sd.v"] hdInitParams["shape.y" ] = hFitParams["shape.v"] hdInitParams["p.hom" ] = 1 - float(dFitParams["p.d"]) hdInitParams["u.hom" ] = dFitParams["u.v"] hdInitParams["sd.hom" ] = dFitParams["sd.v"] hdInitParams["var.het" ] = dFitParams["var.w"] # perform hap-dip fit to the mixed components hdFitParams = hdFitter.fit(hdInitParams) if (hdFitParams == None): print "(hap-dip: failure or non-convergence)" print params_to_text(hdParamNames,hdInitParams,prefix="init.hapdip:") if (explainFailure): print "... return code ..." print hdFitter.retCode print "... stdout ..." print hdFitter.stdout print "... stderr ..." print hdFitter.stderr else: print params_to_text(hdParamNames,hdInitParams,hdFitParams, prefix="init.hapdip:",prefix2="cvrg.hapdip:") # write the convergence file if (saveConvergence): f = file(path+"/"+sampleId+".mixed.fit","wt") print >>f, params_to_text(hdParamNames,hdFitParams) f.close()
def main(): assert (len(argv) == 1), "give me no arguments" numTrials = 1000 random_seed("acorn") explainFailure = False path = "kmer_histograms" #sampleId = "mixedB" #defaultParams = {"zp.copy.y" : 3.000, # "zp.copy.hom" : 3.000, # "zp.copy.het" : 3.000, # "p.e" : 0.942, # "shape.e" : 3.000, # "scale.e" : 1.000, # "p.y" : 0.900, # "u.y" : 64.000, # "sd.y" : 14.826, # "shape.y" : 0.000, # "p.hom" : 0.800, # "u.hom" : 5.120, # "sd.hom" : 1.186, # "var.het" : 1.407} #goodParams = {"zp.copy.y" : 2.042, # "zp.copy.hom" : 3.157, # "zp.copy.het" : 17.795, # "p.e" : 0.935, # "shape.e" : 0.096, # "scale.e" : 0.465, # "p.y" : 0.621, # "u.y" : 68.084, # "sd.y" : 8.626, # "shape.y" : 0.057, # "p.hom" : 0.853, # "u.hom" : 11.101, # "sd.hom" : 3.600, # "var.het" : 10.916} sampleId = "apple_E12_L150_D80_K25" defaultParams = {"zp.copy.y" : 3.000, "zp.copy.hom" : 3.000, "zp.copy.het" : 3.000, "p.e" : 0.940, "shape.e" : 3.000, "scale.e" : 1.000, "p.y" : 0.900, "u.y" : 62.000, "sd.y" : 16.309, "shape.y" : 0.000, "p.hom" : 0.800, "u.hom" : 4.960, "sd.hom" : 1.305, "var.het" : 1.702} goodParams = {"zp.copy.y" : 2.047, "zp.copy.hom" : 3.390, "zp.copy.het" : 1.137, "p.e" : 0.937, "shape.e" : 0.114, "scale.e" : 0.452, "p.y" : 0.630, "u.y" : 65.974, "sd.y" : 8.666, "shape.y" : 0.228, "p.hom" : 0.818, "u.hom" : 13.622, "sd.hom" : 4.086, "var.het" : 15.274} fitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist") paramNames = fitter.paramNames convergenceCount = 0 for trialNumber in xrange(numTrials): print "=== trial %d of %d ===" \ % (1+trialNumber,numTrials) # choose initial params as a random point in hypercube between "good" # and "bad" initParams = dict(goodParams) norm2Init = 0.0 for (paramIx,name) in enumerate(paramNames): step = unit_random() initParams[name] += step*(defaultParams[name]-goodParams[name]) norm2Init += step*step normInit = sqrt(norm2Init) / len(paramNames) fitter.set_params(initParams) fitParams = fitter.fit() if (fitParams == None): print params_to_text(paramNames,initParams,prefix="init-[%d]:" % trialNumber) print "normInit: %.8f" % normInit print "(failure or non-convergence)" if (explainFailure): print "... return code ..." print fitter.retCode print "... stdout ..." print fitter.stdout print "... stderr ..." print fitter.stderr continue print params_to_text(paramNames,initParams,fitParams, prefix="init+[%d]:" % trialNumber, prefix2="cvrg[%d]:" % trialNumber) fitParams = params_to_float(fitParams) dGood = vector_distance(fitParams,goodParams) print "normInit: %.8f" % normInit print "dGood: %.8f" % dGood convergenceCount += 1 print "%d of %d trials converged" % (convergenceCount,numTrials)
def main(): assert (len(argv) == 3), "need the sampleID and number of trials, and nothing else" sampleId = argv[1] numTrials = int(argv[2]) random_seed("acorn") explainFailure = False path = "kmer_histograms" # ask the curve fitter what the default paramters are fitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist") paramNames = fitter.paramNames defaultParams = fitter.default_params() if (defaultParams == None): print "(failed to get default params)" if (explainFailure): print "... return code ..." print hdFitter.retCode print "... stdout ..." print hdFitter.stdout print "... stderr ..." print hdFitter.stderr assert (False) defaultParams = params_to_float(defaultParams) # read the "good" parameters (usually produced by explore3_hap_dip) fitFilename = path+"/"+sampleId+".mixed.fit" f = file(fitFilename,"rt") goodParams = params_from_text([line for line in f]) f.close() for name in defaultParams: assert (name in goodParams), \ "parameter \"%s\" missing from %s" % (name,fitFilename) for name in goodParams: assert (name in defaultParams), \ "extra parameter \"%s\" in %s" % (name,fitFilename) goodParams = params_to_float(goodParams) print params_to_text(paramNames,goodParams,defaultParams, prefix="good:",prefix2="dflt:") # run the convergence trials convergenceCount = 0 for trialNumber in xrange(numTrials): print "=== trial %d of %d ===" \ % (1+trialNumber,numTrials) # choose initial params as a random point in hypercube between "good" # and "bad" initParams = dict(goodParams) norm2Init = 0.0 for (paramIx,name) in enumerate(paramNames): step = unit_random() initParams[name] += step*(defaultParams[name]-goodParams[name]) norm2Init += step*step normInit = sqrt(norm2Init) / len(paramNames) fitter.set_params(initParams) fitParams = fitter.fit() if (fitParams == None): print params_to_text(paramNames,initParams,prefix="init-[%d]:" % trialNumber) print "normInit: %.8f" % normInit print "(failure or non-convergence)" if (explainFailure): print "... return code ..." print fitter.retCode print "... stdout ..." print fitter.stdout print "... stderr ..." print fitter.stderr continue print params_to_text(paramNames,initParams,fitParams, prefix="init+[%d]:" % trialNumber, prefix2="cvrg[%d]:" % trialNumber) fitParams = params_to_float(fitParams) dGood = vector_distance(fitParams,goodParams) print "normInit: %.8f" % normInit print "dGood: %.8f" % dGood convergenceCount += 1 print "%d of %d trials converged" % (convergenceCount,numTrials)
def main(): assert len(argv) == 1, "give me no arguments" path = "kmer_histograms" sampleId = "mixedB" fitter = EnrichedHapDipFitter(path + "/" + sampleId + ".mixed.kmer_dist") paramNames = fitter.paramNames defaultParams = { "zp.copy.y": 3.000, "zp.copy.hom": 3.000, "zp.copy.het": 3.000, "p.e": 0.942, "shape.e": 3.000, "scale.e": 1.000, "p.y": 0.900, "u.y": 64.000, "sd.y": 14.826, "shape.y": 0.000, "p.hom": 0.800, "u.hom": 5.120, "sd.hom": 1.186, "var.het": 1.407, } goodParams = { "zp.copy.y": 2.042, "zp.copy.hom": 3.157, "zp.copy.het": 17.795, "p.e": 0.935, "shape.e": 0.096, "scale.e": 0.465, "p.y": 0.621, "u.y": 68.084, "sd.y": 8.626, "shape.y": 0.057, "p.hom": 0.853, "u.hom": 11.101, "sd.hom": 3.600, "var.het": 10.916, } numSteps = 10 for (paramIx, name) in enumerate(paramNames): if paramIx != 0: print for step in xrange(1, numSteps + 1): print '=== param %d of %s ("%s") step %d of %s ===' % (1 + paramIx, len(paramNames), name, step, numSteps) initParams = dict(goodParams) initParams[name] += step * (defaultParams[name] - goodParams[name]) / numSteps fitter.set_params(initParams) fitParams = fitter.fit() if fitParams == None: print params_to_text(paramNames, initParams, prefix="init:") print "(failure or non-convergence)" print "... return code ..." print fitter.retCode print "... stdout ..." print fitter.stdout print "... stderr ..." print fitter.stderr continue print params_to_text(paramNames, initParams, fitParams, prefix="init:", prefix2="cvrg:") fitParams = params_to_float(fitParams) distance = vector_distance(fitParams, goodParams) print "dGood: %.8f" % distance
def main(): assert (len(argv) == 2), "need the sampleID and nothing else" sampleId = argv[1] explainFailure = True path = "kmer_histograms" print sampleId # perform haploid fit to the sample (ignoring thge diploid component) hFitter = HaploidFitter(path+"/"+sampleId+".mixed.kmer_dist") hParamNames = hFitter.paramNames hFitParams = hFitter.fit() if (hFitParams == None): print >>stderr, "haploid: failure or non-convergence" print "(haploid: failure or non-convergence)" if (explainFailure): print "... return code ..." print hFitter.retCode print "... stdout ..." print hFitter.stdout print "... stderr ..." print hFitter.stderr else: print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:") # ask for default values for the hap-hap enrichment model hhFitter = EnrichedHapHapFitter(path+"/"+sampleId+".mixed.kmer_dist") hhParamNames = hhFitter.paramNames hhDefaultParams = hhFitter.default_params() if (hhDefaultParams == None): print >>stderr, "hap-hap: failed to get default params" print "(hap-hap: failed to get default params)" if (explainFailure): print "... return code ..." print hhFitter.retCode print "... stdout ..." print hhFitter.stdout print "... stderr ..." print hhFitter.stderr else: print params_to_text(hhParamNames,hhDefaultParams,prefix="dflt.haphap:") assert (hFitParams != None) and (hhDefaultParams != None), \ "(no point in trying to fit the hap-hap model)" # create an initial vector for the enrichment model, borrowing some # elements from the haploid model fit hhInitParams = dict(hhDefaultParams) hhInitParams["zp.copy.y"] = hFitParams["zp.copy"] hhInitParams["p.e" ] = hFitParams["p.e"] hhInitParams["shape.e" ] = hFitParams["shape.e"] hhInitParams["scale.e" ] = hFitParams["scale.e"] hhInitParams["u.y" ] = hFitParams["u.v"] hhInitParams["sd.y" ] = hFitParams["sd.v"] hhInitParams["shape.y" ] = hFitParams["shape.v"] pAuto = 1 - float(hhInitParams["p.y"]) hhInitParams["u.auto" ] = pAuto * float(hhInitParams["u.y"]) hhInitParams["sd.auto" ] = sdHom = pAuto * float(hhInitParams["sd.y"]) # perform hap-hap fit to the mixed components hhFitParams = hhFitter.fit(hhInitParams) if (hhFitParams == None): print >>stderr, "hap-hap: failure or non-convergence" print "(hap-hap: failure or non-convergence)" print params_to_text(hhParamNames,hhInitParams,prefix="smart.haphap:") if (explainFailure): print "... return code ..." print hhFitter.retCode print "... stdout ..." print hhFitter.stdout print "... stderr ..." print hhFitter.stderr else: print params_to_text(hhParamNames,hhInitParams,hhFitParams, prefix="smart.haphap:",prefix2="cvrg.haphap:") assert (hhFitParams != None), \ "(no point in trying to fit the hap-dip model)" # ask for default values for the hap-dip enrichment model hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist") hdParamNames = hdFitter.paramNames hdDefaultParams = hdFitter.default_params() if (hdDefaultParams == None): print >>stderr, "hap-dip: failed to get default params" print "(hap-dip: failed to get default params)" if (explainFailure): print "... return code ..." print hdFitter.retCode print "... stdout ..." print hdFitter.stdout print "... stderr ..." print hdFitter.stderr else: print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:") assert (hdDefaultParams != None), \ "(no point in trying to fit the hap-dip model)" # read the sample's "cheat" parameters for comparison (usually produced by # explore3_hap_dip) fitFilename = path+"/"+sampleId+".mixed.fit" f = file(fitFilename,"rt") hdCheatParams = params_from_text([line for line in f]) f.close() for name in hdDefaultParams: assert (name in hdCheatParams), \ "parameter \"%s\" missing from %s" % (name,fitFilename) for name in hdCheatParams: assert (name in hdDefaultParams), \ "extra parameter \"%s\" in %s" % (name,fitFilename) # create an initial vector for the hap-dip enrichment model, borrowing some # elements from the hap-hap model fit hdInitParams = dict(hdDefaultParams) hdInitParams["zp.copy.y"] = hhFitParams["zp.copy.y"] hdInitParams["p.e" ] = hhFitParams["p.e"] hdInitParams["shape.e" ] = hhFitParams["shape.e"] hdInitParams["scale.e" ] = hhFitParams["scale.e"] hdInitParams["p.y" ] = hhFitParams["p.y"] hdInitParams["u.y" ] = hhFitParams["u.y"] hdInitParams["sd.y" ] = hhFitParams["sd.y"] hdInitParams["shape.y" ] = hhFitParams["shape.y"] pAuto = 1 - float(hdInitParams["p.y"]) pHom = float(hdInitParams["p.hom"]) hdInitParams["u.hom" ] = pAuto * pHom * float(hdInitParams["u.y"]) hdInitParams["sd.hom" ] = sdHom = pAuto * pHom * float(hdInitParams["sd.y"]) hdInitParams["var.het" ] = sdHom * sdHom # perform hap-dip fit to the mixed components hdFitParams = hdFitter.fit(hdInitParams) if (hdFitParams == None): print >>stderr, "hap-dip: failure or non-convergence" print "(hap-dip: failure or non-convergence)" print params_to_text(hdParamNames,hdInitParams,hdCheatParams, prefix="smart.hapdip:",prefix2="cheat.hapdip:") if (explainFailure): print "... return code ..." print hdFitter.retCode print "... stdout ..." print hdFitter.stdout print "... stderr ..." print hdFitter.stderr else: print params_to_text(hdParamNames,hdInitParams,hdFitParams, prefix="smart.hapdip:",prefix2="cvrg.hapdip:") print params_to_text(hdParamNames,hdCheatParams,prefix="cheat.hapdip:") # if convergence failed, try moving the initial parameters toward the # cheat parameters in small steps until we get convergence # $$$ a binary search would be "better" numSteps = 100 step = 0 while (hdFitParams == None): step += 1 if (step == numSteps): break print >>stderr, "step %d" % step hdStepParams = {} for name in hdInitParams: if (name in ["u.hom","sd.hom","var.het"]): continue param = float(hdInitParams[name]) param += (step * (float(hdCheatParams[name]) - param)) / numSteps hdStepParams[name] = param pAuto = 1 - float(hdStepParams["p.y"]) pHom = float(hdStepParams["p.hom"]) hdStepParams["u.hom" ] = pAuto * pHom * float(hdStepParams["u.y"]) hdStepParams["sd.hom" ] = sdHom = pAuto * pHom * float(hdStepParams["sd.y"]) hdStepParams["var.het" ] = sdHom * sdHom hdFitParams = hdFitter.fit(hdStepParams) if (hdFitParams == None): print params_to_text(hdParamNames,hdStepParams, prefix="step[%d].hapdip:" % step) #if (explainFailure): # print "... return code ..." # print hdFitter.retCode # print "... stdout ..." # print hdFitter.stdout # print "... stderr ..." # print hdFitter.stderr else: print params_to_text(hdParamNames,hdStepParams,hdFitParams, prefix="step[%d].hapdip:" % step,prefix2="cvrg.hapdip:")