Python EnrichedHapDipFitter.fit示例，kmervature.EnrichedHapDipFitter.fit Python示例

示例#1

0

显示文件

文件： explore3_hap_dip.py 项目： rsharris/kmervature

def main():
	assert (len(argv) == 2), "need the sampleID and nothing else"
	sampleId = argv[1]
	saveConvergence = True
	explainFailure = True
	path = "kmer_histograms"

	# clear the convergence file, in case we have a failure (we don't want
	# previous results to leak through)

	if (saveConvergence):
		f = file(path+"/"+sampleId+".mixed.fit","wt")
		f.close()

	# perform haploid fit to the haploid component

	hFitter = HaploidFitter(path+"/"+sampleId+".haploid_from_mixed.kmer_dist")
	hParamNames = hFitter.paramNames

	hFitParams = hFitter.fit()
	if (hFitParams == None):
		print "(haploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print hFitter.retCode
			print "... stdout ..."
			print hFitter.stdout
			print "... stderr ..."
			print hFitter.stderr
	else:
		print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:")

	# perform diploid fit to the diploid component

	dFitter = DiploidFitter(path+"/"+sampleId+".diploid_from_mixed.kmer_dist")
	dParamNames = dFitter.paramNames

	dFitParams = dFitter.fit()
	if (dFitParams == None):
		print "(diploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print dFitter.retCode
			print "... stdout ..."
			print dFitter.stdout
			print "... stderr ..."
			print dFitter.stderr
	else:
		print params_to_text(dParamNames,dFitParams,prefix="cvrg.diploid:")

	# create an initial vector for the enrichment model, combining elements
	# from the component fits with the usual defaults

	hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hdParamNames = hdFitter.paramNames

	hdDefaultParams = hdFitter.default_params()
	if (hdDefaultParams == None):
		print "(hap-dip: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:")

	assert (hFitParams != None) and (dFitParams != None) and (hdDefaultParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	hdInitParams = {}
	hdInitParams["zp.copy.y"  ] = hFitParams["zp.copy"]
	hdInitParams["zp.copy.hom"] = dFitParams["zp.copy"]
	hdInitParams["zp.copy.het"] = dFitParams["zp.copy.het"]
	hdInitParams["p.e"        ] = hFitParams["p.e"]
	hdInitParams["shape.e"    ] = hFitParams["shape.e"]
	hdInitParams["scale.e"    ] = hFitParams["scale.e"]
	hdInitParams["p.y"        ] = hdDefaultParams["p.y"]
	hdInitParams["u.y"        ] = hFitParams["u.v"]
	hdInitParams["sd.y"       ] = hFitParams["sd.v"]
	hdInitParams["shape.y"    ] = hFitParams["shape.v"]
	hdInitParams["p.hom"      ] = 1 - float(dFitParams["p.d"])
	hdInitParams["u.hom"      ] = dFitParams["u.v"]
	hdInitParams["sd.hom"     ] = dFitParams["sd.v"]
	hdInitParams["var.het"    ] = dFitParams["var.w"]

	# perform hap-dip fit to the mixed components

	hdFitParams = hdFitter.fit(hdInitParams)
	if (hdFitParams == None):
		print "(hap-dip: failure or non-convergence)"
		print params_to_text(hdParamNames,hdInitParams,prefix="init.hapdip:")
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdInitParams,hdFitParams,
		                     prefix="init.hapdip:",prefix2="cvrg.hapdip:")

	# write the convergence file

	if (saveConvergence):
		f = file(path+"/"+sampleId+".mixed.fit","wt")
		print >>f, params_to_text(hdParamNames,hdFitParams)
		f.close()

示例#2

0

显示文件

文件： explore2_hap_dip.py 项目： rsharris/kmervature

def main():
	assert (len(argv) == 1), "give me no arguments"

	numTrials = 1000
	random_seed("acorn")
	explainFailure = False
	path = "kmer_histograms"

	#sampleId = "mixedB"
	#defaultParams = {"zp.copy.y"   :  3.000,
	#                 "zp.copy.hom" :  3.000,
	#                 "zp.copy.het" :  3.000,
	#                 "p.e"         :  0.942,
	#                 "shape.e"     :  3.000,
	#                 "scale.e"     :  1.000,
	#                 "p.y"         :  0.900,
	#                 "u.y"         : 64.000,
	#                 "sd.y"        : 14.826,
	#                 "shape.y"     :  0.000,
	#                 "p.hom"       :  0.800,
	#                 "u.hom"       :  5.120,
	#                 "sd.hom"      :  1.186,
	#                 "var.het"     :  1.407}
	#goodParams    = {"zp.copy.y"   :  2.042,
	#                 "zp.copy.hom" :  3.157,
	#                 "zp.copy.het" : 17.795,
	#                 "p.e"         :  0.935,
	#                 "shape.e"     :  0.096,
	#                 "scale.e"     :  0.465,
	#                 "p.y"         :  0.621,
	#                 "u.y"         : 68.084,
	#                 "sd.y"        :  8.626,
	#                 "shape.y"     :  0.057,
	#                 "p.hom"       :  0.853,
	#                 "u.hom"       : 11.101,
	#                 "sd.hom"      :  3.600,
	#                 "var.het"     : 10.916}

	sampleId = "apple_E12_L150_D80_K25"
	defaultParams = {"zp.copy.y"   :  3.000,
	                 "zp.copy.hom" :  3.000,
	                 "zp.copy.het" :  3.000,
	                 "p.e"         :  0.940,
	                 "shape.e"     :  3.000,
	                 "scale.e"     :  1.000,
	                 "p.y"         :  0.900,
	                 "u.y"         : 62.000,
	                 "sd.y"        : 16.309,
	                 "shape.y"     :  0.000,
	                 "p.hom"       :  0.800,
	                 "u.hom"       :  4.960,
	                 "sd.hom"      :  1.305,
	                 "var.het"     :  1.702}
	goodParams    = {"zp.copy.y"   :  2.047,
	                 "zp.copy.hom" :  3.390,
	                 "zp.copy.het" :  1.137,
	                 "p.e"         :  0.937,
	                 "shape.e"     :  0.114,
	                 "scale.e"     :  0.452,
	                 "p.y"         :  0.630,
	                 "u.y"         : 65.974,
	                 "sd.y"        :  8.666,
	                 "shape.y"     :  0.228,
	                 "p.hom"       :  0.818,
	                 "u.hom"       : 13.622,
	                 "sd.hom"      :  4.086,
	                 "var.het"     : 15.274}

	fitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	paramNames = fitter.paramNames

	convergenceCount = 0
	for trialNumber in xrange(numTrials):
		print "=== trial %d of %d ===" \
		    % (1+trialNumber,numTrials)

		# choose initial params as a random point in hypercube between "good"
		# and "bad"

		initParams = dict(goodParams)
		norm2Init = 0.0
		for (paramIx,name) in enumerate(paramNames):
			step = unit_random()
			initParams[name] += step*(defaultParams[name]-goodParams[name])
			norm2Init += step*step
		normInit = sqrt(norm2Init) / len(paramNames)

		fitter.set_params(initParams)
		fitParams = fitter.fit()
		if (fitParams == None):
			print params_to_text(paramNames,initParams,prefix="init-[%d]:" % trialNumber)
			print "normInit: %.8f" % normInit
			print "(failure or non-convergence)"
			if (explainFailure):
				print "... return code ..."
				print fitter.retCode
				print "... stdout ..."
				print fitter.stdout
				print "... stderr ..."
				print fitter.stderr
			continue

		print params_to_text(paramNames,initParams,fitParams,
		                     prefix="init+[%d]:" % trialNumber,
		                     prefix2="cvrg[%d]:" % trialNumber)
		fitParams = params_to_float(fitParams)
		dGood = vector_distance(fitParams,goodParams)
		print "normInit: %.8f" % normInit
		print "dGood: %.8f" % dGood
		convergenceCount += 1

	print "%d of %d trials converged" % (convergenceCount,numTrials)

示例#3

0

显示文件

文件： explore4_hap_dip.py 项目： rsharris/kmervature

def main():
	assert (len(argv) == 3), "need the sampleID and number of trials, and nothing else"
	sampleId = argv[1]
	numTrials = int(argv[2])

	random_seed("acorn")
	explainFailure = False
	path = "kmer_histograms"

	# ask the curve fitter what the default paramters are

	fitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	paramNames = fitter.paramNames

	defaultParams = fitter.default_params()
	if (defaultParams == None):
		print "(failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
		assert (False)

	defaultParams = params_to_float(defaultParams)

	# read the "good" parameters (usually produced by explore3_hap_dip)

	fitFilename = path+"/"+sampleId+".mixed.fit"

	f = file(fitFilename,"rt")
	goodParams = params_from_text([line for line in f])
	f.close()

	for name in defaultParams:
		assert (name in goodParams), \
		       "parameter \"%s\" missing from %s" % (name,fitFilename)

	for name in goodParams:
		assert (name in defaultParams), \
		       "extra parameter \"%s\" in %s" % (name,fitFilename)

	goodParams = params_to_float(goodParams)

	print params_to_text(paramNames,goodParams,defaultParams,
	                     prefix="good:",prefix2="dflt:")

	# run the convergence trials

	convergenceCount = 0
	for trialNumber in xrange(numTrials):
		print "=== trial %d of %d ===" \
		    % (1+trialNumber,numTrials)

		# choose initial params as a random point in hypercube between "good"
		# and "bad"

		initParams = dict(goodParams)
		norm2Init = 0.0
		for (paramIx,name) in enumerate(paramNames):
			step = unit_random()
			initParams[name] += step*(defaultParams[name]-goodParams[name])
			norm2Init += step*step
		normInit = sqrt(norm2Init) / len(paramNames)

		fitter.set_params(initParams)
		fitParams = fitter.fit()
		if (fitParams == None):
			print params_to_text(paramNames,initParams,prefix="init-[%d]:" % trialNumber)
			print "normInit: %.8f" % normInit
			print "(failure or non-convergence)"
			if (explainFailure):
				print "... return code ..."
				print fitter.retCode
				print "... stdout ..."
				print fitter.stdout
				print "... stderr ..."
				print fitter.stderr
			continue

		print params_to_text(paramNames,initParams,fitParams,
		                     prefix="init+[%d]:" % trialNumber,
		                     prefix2="cvrg[%d]:" % trialNumber)
		fitParams = params_to_float(fitParams)
		dGood = vector_distance(fitParams,goodParams)
		print "normInit: %.8f" % normInit
		print "dGood: %.8f" % dGood
		convergenceCount += 1

	print "%d of %d trials converged" % (convergenceCount,numTrials)

示例#4

0

显示文件

文件： explore_hap_dip.py 项目： rsharris/kmervature

def main():
    assert len(argv) == 1, "give me no arguments"
    path = "kmer_histograms"

    sampleId = "mixedB"
    fitter = EnrichedHapDipFitter(path + "/" + sampleId + ".mixed.kmer_dist")
    paramNames = fitter.paramNames

    defaultParams = {
        "zp.copy.y": 3.000,
        "zp.copy.hom": 3.000,
        "zp.copy.het": 3.000,
        "p.e": 0.942,
        "shape.e": 3.000,
        "scale.e": 1.000,
        "p.y": 0.900,
        "u.y": 64.000,
        "sd.y": 14.826,
        "shape.y": 0.000,
        "p.hom": 0.800,
        "u.hom": 5.120,
        "sd.hom": 1.186,
        "var.het": 1.407,
    }

    goodParams = {
        "zp.copy.y": 2.042,
        "zp.copy.hom": 3.157,
        "zp.copy.het": 17.795,
        "p.e": 0.935,
        "shape.e": 0.096,
        "scale.e": 0.465,
        "p.y": 0.621,
        "u.y": 68.084,
        "sd.y": 8.626,
        "shape.y": 0.057,
        "p.hom": 0.853,
        "u.hom": 11.101,
        "sd.hom": 3.600,
        "var.het": 10.916,
    }

    numSteps = 10
    for (paramIx, name) in enumerate(paramNames):
        if paramIx != 0:
            print
        for step in xrange(1, numSteps + 1):
            print '=== param %d of %s ("%s") step %d of %s ===' % (1 + paramIx, len(paramNames), name, step, numSteps)

            initParams = dict(goodParams)
            initParams[name] += step * (defaultParams[name] - goodParams[name]) / numSteps
            fitter.set_params(initParams)
            fitParams = fitter.fit()
            if fitParams == None:
                print params_to_text(paramNames, initParams, prefix="init:")
                print "(failure or non-convergence)"
                print "... return code ..."
                print fitter.retCode
                print "... stdout ..."
                print fitter.stdout
                print "... stderr ..."
                print fitter.stderr
                continue

            print params_to_text(paramNames, initParams, fitParams, prefix="init:", prefix2="cvrg:")
            fitParams = params_to_float(fitParams)
            distance = vector_distance(fitParams, goodParams)
            print "dGood: %.8f" % distance

示例#5

0

显示文件

文件： explore6_hap_dip.py 项目： rsharris/kmervature

def main():
	assert (len(argv) == 2), "need the sampleID and nothing else"
	sampleId = argv[1]
	explainFailure = True
	path = "kmer_histograms"

	print sampleId

	# perform haploid fit to the sample (ignoring thge diploid component)

	hFitter = HaploidFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hParamNames = hFitter.paramNames

	hFitParams = hFitter.fit()
	if (hFitParams == None):
		print >>stderr, "haploid: failure or non-convergence"
		print "(haploid: failure or non-convergence)"
		if (explainFailure):
			print "... return code ..."
			print hFitter.retCode
			print "... stdout ..."
			print hFitter.stdout
			print "... stderr ..."
			print hFitter.stderr
	else:
		print params_to_text(hParamNames,hFitParams,prefix="cvrg.haploid:")

	# ask for default values for the hap-hap enrichment model

	hhFitter = EnrichedHapHapFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hhParamNames = hhFitter.paramNames

	hhDefaultParams = hhFitter.default_params()
	if (hhDefaultParams == None):
		print >>stderr, "hap-hap: failed to get default params"
		print "(hap-hap: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhDefaultParams,prefix="dflt.haphap:")

	assert (hFitParams != None) and (hhDefaultParams != None), \
	       "(no point in trying to fit the hap-hap model)"

	# create an initial vector for the enrichment model, borrowing some
	# elements from the haploid model fit

	hhInitParams = dict(hhDefaultParams)
	hhInitParams["zp.copy.y"] = hFitParams["zp.copy"]
	hhInitParams["p.e"      ] = hFitParams["p.e"]
	hhInitParams["shape.e"  ] = hFitParams["shape.e"]
	hhInitParams["scale.e"  ] = hFitParams["scale.e"]
	hhInitParams["u.y"      ] = hFitParams["u.v"]
	hhInitParams["sd.y"     ] = hFitParams["sd.v"]
	hhInitParams["shape.y"  ] = hFitParams["shape.v"]

	pAuto = 1 - float(hhInitParams["p.y"])
	hhInitParams["u.auto"   ] =         pAuto * float(hhInitParams["u.y"])
	hhInitParams["sd.auto"  ] = sdHom = pAuto * float(hhInitParams["sd.y"])

	# perform hap-hap fit to the mixed components

	hhFitParams = hhFitter.fit(hhInitParams)
	if (hhFitParams == None):
		print >>stderr, "hap-hap: failure or non-convergence"
		print "(hap-hap: failure or non-convergence)"
		print params_to_text(hhParamNames,hhInitParams,prefix="smart.haphap:")
		if (explainFailure):
			print "... return code ..."
			print hhFitter.retCode
			print "... stdout ..."
			print hhFitter.stdout
			print "... stderr ..."
			print hhFitter.stderr
	else:
		print params_to_text(hhParamNames,hhInitParams,hhFitParams,
		                     prefix="smart.haphap:",prefix2="cvrg.haphap:")

	assert (hhFitParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	# ask for default values for the hap-dip enrichment model

	hdFitter = EnrichedHapDipFitter(path+"/"+sampleId+".mixed.kmer_dist")
	hdParamNames = hdFitter.paramNames

	hdDefaultParams = hdFitter.default_params()
	if (hdDefaultParams == None):
		print >>stderr, "hap-dip: failed to get default params"
		print "(hap-dip: failed to get default params)"
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdDefaultParams,prefix="dflt.hapdip:")

	assert (hdDefaultParams != None), \
	       "(no point in trying to fit the hap-dip model)"

	# read the sample's "cheat" parameters for comparison (usually produced by
	# explore3_hap_dip)

	fitFilename = path+"/"+sampleId+".mixed.fit"

	f = file(fitFilename,"rt")
	hdCheatParams = params_from_text([line for line in f])
	f.close()

	for name in hdDefaultParams:
		assert (name in hdCheatParams), \
		       "parameter \"%s\" missing from %s" % (name,fitFilename)

	for name in hdCheatParams:
		assert (name in hdDefaultParams), \
		       "extra parameter \"%s\" in %s" % (name,fitFilename)

	# create an initial vector for the hap-dip enrichment model, borrowing some
	# elements from the hap-hap model fit

	hdInitParams = dict(hdDefaultParams)
	hdInitParams["zp.copy.y"] = hhFitParams["zp.copy.y"]
	hdInitParams["p.e"      ] = hhFitParams["p.e"]
	hdInitParams["shape.e"  ] = hhFitParams["shape.e"]
	hdInitParams["scale.e"  ] = hhFitParams["scale.e"]
	hdInitParams["p.y"      ] = hhFitParams["p.y"]
	hdInitParams["u.y"      ] = hhFitParams["u.y"]
	hdInitParams["sd.y"     ] = hhFitParams["sd.y"]
	hdInitParams["shape.y"  ] = hhFitParams["shape.y"]

	pAuto = 1 - float(hdInitParams["p.y"])
	pHom  =     float(hdInitParams["p.hom"])
	hdInitParams["u.hom"    ] =         pAuto * pHom * float(hdInitParams["u.y"])
	hdInitParams["sd.hom"   ] = sdHom = pAuto * pHom * float(hdInitParams["sd.y"])
	hdInitParams["var.het"  ] = sdHom * sdHom

	# perform hap-dip fit to the mixed components

	hdFitParams = hdFitter.fit(hdInitParams)
	if (hdFitParams == None):
		print >>stderr, "hap-dip: failure or non-convergence"
		print "(hap-dip: failure or non-convergence)"
		print params_to_text(hdParamNames,hdInitParams,hdCheatParams,
		                     prefix="smart.hapdip:",prefix2="cheat.hapdip:")
		if (explainFailure):
			print "... return code ..."
			print hdFitter.retCode
			print "... stdout ..."
			print hdFitter.stdout
			print "... stderr ..."
			print hdFitter.stderr
	else:
		print params_to_text(hdParamNames,hdInitParams,hdFitParams,
		                     prefix="smart.hapdip:",prefix2="cvrg.hapdip:")
		print params_to_text(hdParamNames,hdCheatParams,prefix="cheat.hapdip:")

	# if convergence failed, try moving the initial parameters toward the
	# cheat parameters in small steps until we get convergence
	# $$$ a binary search would be "better"

	numSteps = 100
	step = 0

	while (hdFitParams == None):
		step += 1
		if (step == numSteps): break
		print >>stderr, "step %d" % step

		hdStepParams = {}
		for name in hdInitParams:
			if (name in ["u.hom","sd.hom","var.het"]): continue
			param = float(hdInitParams[name])
			param += (step * (float(hdCheatParams[name]) - param)) / numSteps
			hdStepParams[name] = param

		pAuto = 1 - float(hdStepParams["p.y"])
		pHom  =     float(hdStepParams["p.hom"])
		hdStepParams["u.hom"    ] =         pAuto * pHom * float(hdStepParams["u.y"])
		hdStepParams["sd.hom"   ] = sdHom = pAuto * pHom * float(hdStepParams["sd.y"])
		hdStepParams["var.het"  ] = sdHom * sdHom

		hdFitParams = hdFitter.fit(hdStepParams)
		if (hdFitParams == None):
			print params_to_text(hdParamNames,hdStepParams,
			                     prefix="step[%d].hapdip:" % step)
			#if (explainFailure):
			#	print "... return code ..."
			#	print hdFitter.retCode
			#	print "... stdout ..."
			#	print hdFitter.stdout
			#	print "... stderr ..."
			#	print hdFitter.stderr
		else:
			print params_to_text(hdParamNames,hdStepParams,hdFitParams,
			                     prefix="step[%d].hapdip:" % step,prefix2="cvrg.hapdip:")