Exemplo n.º 1
0
def lognormal_adsolver(pts):
    ec = util.ecdf(np.array(pts), issorted=False)
    x = ec[:, 0]
    xrev = util.reverse(x)
    n = float((len(x)))
    i = np.array(range(len(x)), dtype=float)

    l1 = logn.Lognormal.fromFit(pts)
    imu = l1.mu()
    isig = l1.sigma()

    ivs = [imu, isig]
    ovs = (i, x, xrev, n)

    print ovs

    (fvals, infodict, ier, mesg) = opt.fsolve(solve_admin, ivs, ovs, None, 1,
                                              0)

    f_mu = fvals[0]
    f_sigma = fvals[1]

    if ier != 1:
        raise logn.LognormalConvergenceError(mesg, (f_mu, f_sigma))

    return logn.Lognormal(f_mu, f_sigma)
Exemplo n.º 2
0
def lognormal_adsolver(pts):
	ec = util.ecdf(np.array(pts), issorted=False)
	x = ec[:,0]
	xrev = util.reverse(x)
	n = float((len(x)))
	i = np.array(range(len(x)), dtype=float)

	l1 = logn.Lognormal.fromFit(pts)
	imu = l1.mu()
	isig = l1.sigma()

	ivs = [imu, isig]
	ovs = (i,x,xrev,n)

	print ovs

	(fvals, infodict, ier, mesg) = opt.fsolve(solve_admin, ivs, ovs, None, 1, 0)

	f_mu = fvals[0]
	f_sigma = fvals[1]

	if ier != 1:
		raise logn.LognormalConvergenceError(mesg, (f_mu, f_sigma))

	return logn.Lognormal(f_mu, f_sigma)
Exemplo n.º 3
0
def paroptfit(x1, hi, lo, n, fit1, mt1):
	"""
	x1: Sorted array of points
	hi: Max. xmax to estimate to
	lo: Min. xmax to estimate to
	n: #. of points from hi to lo for the estimation
	fit1: Type of fit - mlefit or mmefit
	mt1: Mirror xform - True or False
	"""

	global x, fit, mt, c, max_of_x

	## Initialize globals prior to the parallel run
	##
	
	x = x1
	max_of_x = max(x)
	fit = fit1
	mt = mt1
	c = util.ecdf(x)
	
	xmax_pts = util.gen_points(lo, hi, n)
	l_xmax_pts = xmax_pts.tolist()
	l_xmax_pts.append(max_of_x)
	l_xmax_pts.sort()
	xmax_pts = np.array(l_xmax_pts)

	ncpus = mp.cpu_count()
	proc_pool = Pool(ncpus)
	result = proc_pool.map(optfit, xmax_pts)
	
	FIT_COMP_IDX = 2

	return best_fit(result, FIT_COMP_IDX)
Exemplo n.º 4
0
def catm_cdf():
	try:
		plt.show()
	except:
		pass

		
	# These are already computed values.
	# The script is only to plot the data.
	# Please refer to the notes on how to re-compute.
	# xmax is the value from the optimization run for mme
	x = util.read_data("/home/gautam/research/modlav-plots/seslen/catm_ses")
	mle=ml.ModLav(0.9398,3003.4797,0.1488)
	mme=ml.ModLav(1.0,3243.2566,0.1894)
	xmax = 17123.2972

	ec = util.ecdf(x)
	mleec = mle.cdf(ec[:,0])
	mmeec = mme.cdf(ec[:,0])

	plt.plot(ec[:,0],ec[:,1],'k-',label='Data',linestyle='steps')
	plt.plot(ec[:,0],mleec,'k--',label='MLE fit')
	plt.plot(ec[:,0],mmeec,'k-.',label='MME fit')
	plt.grid()
	plt.xlabel("Session length [seconds]")
	plt.ylabel("P(X <= x)")
	plt.ylim((0.0,1.0))
	plt.legend(loc=4)
Exemplo n.º 5
0
def main():

	cats = [None, 'catm\n', 'catd\n', 'cath\n']

	for cat in cats:
		r = acct_util(cat)
		
		tuf = "tu_all.dat"
		duf = "du_all.dat"
		if cat != None:
			tuf = "tu_" + cat.strip() + ".dat"
			duf = "du_" + cat.strip() + ".dat"
		
		tu = r[1]
		du = r[2]
		# Need unsorted values
		if cat == None:
			tudu = np.zeros((len(tu),2))
			tudu[:,0] = tu
			tudu[:,1] = du
			util.write_data("tudu.dat", tudu)

		tu.sort()
		du.sort()

		ctu = util.ecdf(tu,zdisp=True)
		cdu = util.ecdf(du,zdisp=True)

		util.write_data(tuf, ctu)
		util.write_data(duf, cdu)
		

		pcat = "ALL"
		if cat != None:
			pcat = cat.strip()

		print "CATEGORY: ", pcat
		print "Time utilization"
		print "------------------------------------------------"
		util.pstats(tu)
		print
		print "Data utilization"
		print "------------------------------------------------"
		util.pstats(du)
		print
		print
Exemplo n.º 6
0
def process_datasets(tag_file_map):

    tags = tag_file_map.keys()
    report_map = {}
    for tag in tags:
        data_file = tag_file_map[tag]
        x = util.read_data(data_file)
        x.sort()
        ec = util.ecdf(x)
        cc = util.ccdf(x)

        fit_map = compute_fits(x)
        insert_db_record(tag, fit_map)

        ## Figure out best fit
        bfit = best_fits(fit_map)
        report_map[tag] = (bfit["best_body"], bfit["best_tail"])

        ## Write files out to the directory
        util.write_data(data_file + "_ecdf", ec)
        util.write_data(data_file + "_ccdf", cc)

        ccpts = np.power(
            10, util.gen_points(math.log10(min(x)), math.log10(max(x)), 2000))
        ecpts = ec[:, 0]

        lmme = fit_map["MME"][0]
        lmle = fit_map["MLE"][0]
        lfit = fit_map["FITMIN"][0]

        mme_ec = np.array([ecpts, lmme.cdf(ecpts)]).transpose()
        mme_cc = np.array([ccpts, lmme.ccdf(ccpts)]).transpose()
        util.write_data(data_file + "_ecdf.lognmme", mme_ec)
        util.write_data(data_file + "_ccdf.lognmme", mme_cc)

        mle_ec = np.array([ecpts, lmle.cdf(ecpts)]).transpose()
        mle_cc = np.array([ccpts, lmle.ccdf(ccpts)]).transpose()
        util.write_data(data_file + "_ecdf.lognmle", mle_ec)
        util.write_data(data_file + "_ccdf.lognmle", mle_cc)

        fit_ec = np.array([ecpts, lfit.cdf(ecpts)]).transpose()
        fit_cc = np.array([ccpts, lfit.ccdf(ccpts)]).transpose()
        util.write_data(data_file + "_ecdf.lognfitmin", fit_ec)
        util.write_data(data_file + "_ccdf.lognfitmin", fit_cc)

    for k in report_map:
        print k + " BODY: " + report_map[k][0] + " TAIL: " + report_map[k][1]

    return report_map
Exemplo n.º 7
0
def process_datasets(tag_file_map):

	tags = tag_file_map.keys()
	report_map = {}
	for tag in tags:
		data_file = tag_file_map[tag]
		x = util.read_data(data_file)
		x.sort()
		ec = util.ecdf(x)
		cc = util.ccdf(x)

		fit_map = compute_fits(x)
		insert_db_record(tag, fit_map)

		## Figure out best fit
		bfit = best_fits(fit_map)
		report_map[tag] = (bfit["best_body"], bfit["best_tail"])
		
		## Write files out to the directory
		util.write_data(data_file + "_ecdf", ec)
		util.write_data(data_file + "_ccdf", cc)

		ccpts = np.power(10, util.gen_points(math.log10(min(x)), math.log10(max(x)), 2000))	
		ecpts = ec[:,0]

		lmme = fit_map["MME"][0]
		lmle = fit_map["MLE"][0]
		lfit = fit_map["FITMIN"][0]

		mme_ec = np.array([ecpts, lmme.cdf(ecpts)]).transpose()
		mme_cc = np.array([ccpts, lmme.ccdf(ccpts)]).transpose()
		util.write_data(data_file + "_ecdf.lognmme", mme_ec)
		util.write_data(data_file + "_ccdf.lognmme", mme_cc)

		mle_ec = np.array([ecpts, lmle.cdf(ecpts)]).transpose()
		mle_cc = np.array([ccpts, lmle.ccdf(ccpts)]).transpose()
		util.write_data(data_file + "_ecdf.lognmle", mle_ec)
		util.write_data(data_file + "_ccdf.lognmle", mle_cc)

		fit_ec = np.array([ecpts, lfit.cdf(ecpts)]).transpose()
		fit_cc = np.array([ccpts, lfit.ccdf(ccpts)]).transpose()
		util.write_data(data_file + "_ecdf.lognfitmin", fit_ec)
		util.write_data(data_file + "_ccdf.lognfitmin", fit_cc)

	for k in report_map:
		print k + " BODY: " + report_map[k][0] + " TAIL: " + report_map[k][1]

	return report_map
Exemplo n.º 8
0
def lognormal_nrsolver(pts):
    ec = util.ecdf(np.array(pts), issorted=False)
    x = ec[:, 0]
    xrev = util.reverse(x)
    n = float(len(x))
    i = np.array(range(len(x)), dtype=float)

    l1 = logn.Lognormal.fromFit(pts)
    imu = l1.mu()
    isig = l1.sigma()

    ivs = [imu, isig]
    ovs = (i, x, xrev, n)

    [mu, sigma] = opt.root(solve_admin, ivs, ovs)

    return [mu, sigma]
Exemplo n.º 9
0
def lognormal_nrsolver(pts):
	ec = util.ecdf(np.array(pts), issorted=False)
	x = ec[:,0]
	xrev = util.reverse(x)
	n = float(len(x))
	i = np.array(range(len(x)), dtype=float)

	l1 = logn.Lognormal.fromFit(pts)
	imu = l1.mu()
	isig = l1.sigma()

	ivs = [imu, isig]
	ovs = (i, x, xrev, n)
	
	[mu, sigma] = opt.root(solve_admin, ivs, ovs)

	return [mu, sigma]
Exemplo n.º 10
0
def paroptfit(x1, hi, lo, n, fit1, mt1):
    """
	x1: Sorted array of points
	hi: Max. xmax to estimate to
	lo: Min. xmax to estimate to
	n: #. of points from hi to lo for the estimation
	fit1: Type of fit - mlefit or mmefit
	mt1: Mirror xform - True or False
	"""

    global x, fit, mt, c, max_of_x

    ## Initialize globals prior to the parallel run
    ##

    x = x1
    max_of_x = max(x)
    fit = fit1
    mt = mt1
    c = util.ecdf(x)

    xmax_pts = util.gen_points(lo, hi, n)
    l_xmax_pts = xmax_pts.tolist()
    l_xmax_pts.append(max_of_x)
    l_xmax_pts.sort()
    xmax_pts = np.array(l_xmax_pts)

    ncpus = mp.cpu_count()
    proc_pool = Pool(ncpus)
    result = proc_pool.map(optfit, xmax_pts)

    # definitions to compare fit and k-s values
    # 2 = index of k-s metric in each tuple of the result
    # 3 = index of fit metric in each tuple of the result
    # !!! REMOVING KS COMP !!!
    FIT_COMP_IDX = 2
    # KS_COMP_IDX = 3
    DIFF_COMP_IDX = 3

    return {
        "fit": best_fit(result, FIT_COMP_IDX),
        "diff": best_fit(result, DIFF_COMP_IDX)
    }
Exemplo n.º 11
0
def paroptfit(x1, hi, lo, n, fit1, mt1):
	"""
	x1: Sorted array of points
	hi: Max. xmax to estimate to
	lo: Min. xmax to estimate to
	n: #. of points from hi to lo for the estimation
	fit1: Type of fit - mlefit or mmefit
	mt1: Mirror xform - True or False
	"""

	global x, fit, mt, c, max_of_x

	## Initialize globals prior to the parallel run
	##
	
	x = x1
	max_of_x = max(x)
	fit = fit1
	mt = mt1
	c = util.ecdf(x)
	
	xmax_pts = util.gen_points(lo, hi, n)
	l_xmax_pts = xmax_pts.tolist()
	l_xmax_pts.append(max_of_x)
	l_xmax_pts.sort()
	xmax_pts = np.array(l_xmax_pts)

	ncpus = mp.cpu_count()
	proc_pool = Pool(ncpus)
	result = proc_pool.map(optfit, xmax_pts)
	
	# definitions to compare fit and k-s values
	# 2 = index of k-s metric in each tuple of the result
	# 3 = index of fit metric in each tuple of the result
	# !!! REMOVING KS COMP !!!
	FIT_COMP_IDX = 2
	# KS_COMP_IDX = 3
	DIFF_COMP_IDX = 3

	return {"fit": best_fit(result, FIT_COMP_IDX), "diff": best_fit(result, DIFF_COMP_IDX)}
Exemplo n.º 12
0
def solver_main(data_pts):
    ## Scrub the data points and remove 0 values

    ## Ideally we should get an np array
    ## But if not --
    dp = np.array(data_pts)
    dp_i = dp[np.where(dp > 0.0)]
    cdf_i = ecdf(dp_i, issorted=False)
    x_i = cdf_i[:, 0]
    a_i = cdf_i[:, 1]

    i_u, i_s = Lognormal.mmefit(dp_i)

    val_list = ['x', 'a', 'x2', 'a2', 'n', 'k', \
                'F', 'dF_du', 'dF_ds', \
                'ddF_dsdu', 'd2F_du2', 'd2F_ds2']
    svals = namedtuple('svals', val_list)
    cvals = svals(x=x_i,
                  a=a_i,
                  x2=np.power(x_i, 2.0), \
                  a2=np.power(a_i, 2.0), \
                  n=float(len(x_i)), \
                  k=k(a_i), \
                  F=None, \
                  dF_du=None, \
                  dF_ds=None, \
                  ddF_dsdu=None, \
                  d2F_du2=None, \
                  d2F_ds2=None)

    (fval, infodict, ier, msg) = opt.fsolve(logn_solver, \
                                            [i_u, i_s], \
                                            (cvals), \
                                            jacobian, \
                                            1, \
                                            0)
    if ier != 1:
        print "Failed to converge: ", msg
    print "u: ", fval[0]
    print "s: ", fval[1]
Exemplo n.º 13
0
def solver(pts):
	npts = np.array(pts)
	pts.sort()

	c = ecdf(pts)
	x_i = c[:,0]
	a_i = c[:,1]

	b = 1.0
	c = np.median(npts)
	d = c/float(pts.max())

	N = 2000
	n = 0
	tol = 1e-8

	tolc = float('Inf')
	tolb = float('Inf')
	told = float('inf')

	while (n < N and (tolc > tol or told > tol or tolb > tol)):
		b = solve_beta(x_i, a_i, d)
		c = solve_c(x_i, a_i, b, d)
		d = solve_d(x_i, a_i, b, c)

		tolc = abs(dqdc(x_i, a_i, b, c, d))
		told = abs(dqdd(x_i, a_i, b, c, d))
		tolb = abs(dqdb(x_i, a_i, b, c, d))

		print "n: ", n
		print "beta: ", b, " c: ", c, "d: ", d
		print "tolc: ", tolc, " told: ", told, "tolb: ", tolb

	if ( n >= N ):
		params = {"beta": b, "c": c, "d": d}
		raise RuntimeError("Cannot converge: " + str(params))
	
	return (b, c, d)
Exemplo n.º 14
0
def tlladmin_solver(pts):
    ec = util.ecdf(pts)
    x = ec[:, 1]
    xrev = util.reverse(x)
    i = np.array(range(len(x)), dtype=float)
    n = float(len(x))

    ib = 1.0
    ic = float(np.median(x))
    id = ic / float(x.max())

    ivs = [ib, ic, id]
    ovs = (i, x, xrev, n)

    (fvals, infodict, ier, mesg) = opt.fsolve(tll_admin, ivs, ovs, None, 1, 0)
    f_b = fvals[0]
    f_c = fvals[1]
    f_d = fvals[2]

    if ier != 1:
        raise ml.ModLavConvergenceError(mesg, (f_b, f_c, f_d))

    return ml.ModLav(f_b, f_c, f_d)
Exemplo n.º 15
0
def tlladmin_solver(pts):
	ec = util.ecdf(pts)
	x = ec[:,1]
	xrev = util.reverse(x)
	i = np.array(range(len(x)), dtype=float)
	n = float(len(x))

	ib = 1.0
	ic = float(np.median(x))
	id = ic/float(x.max())

	ivs = [ib,ic,id]
	ovs = (i,x,xrev,n)

	(fvals, infodict, ier, mesg) = opt.fsolve(tll_admin, ivs, ovs, None, 1, 0)
	f_b = fvals[0]
	f_c = fvals[1]
	f_d = fvals[2]

	if ier != 1:	
		raise ml.ModLavConvergenceError(mesg, (f_b,f_c,f_d))

	return ml.ModLav(f_b, f_c, f_d)
Exemplo n.º 16
0
def gen_cdf_ccdf():
	r = RunSQL("files_and_analysis.db")
	dsets = r.sqlq("select unique_id, filename from datasets")

	for dpair in dsets:
		dset_id = dpair[0]
		dset_file = dpair[1]

		print "Processing data set - ", dset_id

		x = read_data(dset_file)
		x.sort()
		ec = ecdf(x, issorted=True)
		pts = np.power(10, gen_points(math.log10(min(x)), math.log10(max(x)), 2000))
		
		# dist_list = ["LOGLOGISTIC", "LOGN", "TPARETO", "TRUNCLL"]
		dist_list = ["LOGN"]
		fext_map = {"LOGLOGISTIC": "ll", "LOGN": "lgn", "TPARETO": "tp", "TRUNCLL": "tll"}

		for distname in dist_list:
			print "\t Getting distribution - ", distname
			dist = get_dist(dset_id, distname)
			dec = dist.cdf(ec[:,0])
			dcc = dist.ccdf(pts)

			fdec = np.array([ec[:,0], dec]).transpose()
			fdcc = np.array([pts, dcc]).transpose()

			op_dir = os.path.dirname(dset_file)
			op_ec_file = op_dir + "/" + os.path.basename(dset_file)+"_ecdf" + "." + fext_map[distname]
			op_cc_file = op_dir + "/" + os.path.basename(dset_file)+"_ccdf" + "." + fext_map[distname]
			
			print "\t Writing CDF - ", op_ec_file
			write_data(op_ec_file, fdec)

			print "\t Writing CCF - ", op_cc_file
			write_data(op_cc_file, fdcc)
Exemplo n.º 17
0
def fitlogn(dataf):
    x = np.array(read_data(dataf))
    x.sort()

    l1 = Lognormal.fromFit(x)
    l2 = Lognormal.fromFit(x, mmefit=False)

    ec = ecdf(x)
    cc = ccdf(x)

    q1 = l1.fitmetric(cdf=ec)
    q2 = l2.fitmetric(cdf=ec)

    print "File: " + dataf
    if q1 <= q2:
        print "Type: MME"
        print "Lognormal: " + str(l1)
        print "FIT: ", q1
        print "K-S: ", l1.ksmetric(cdf=ec)
    else:
        print "Type: MLE"
        print "Lognormal: " + str(l2)
        print "FIT: ", q2
        print "K-S: ", l2.ksmetric(cdf=ec)
Exemplo n.º 18
0
def fitlogn(dataf):
	x=np.array(read_data(dataf))
	x.sort()

	l1 = Lognormal.fromFit(x)
	l2 = Lognormal.fromFit(x,mmefit=False)

	ec = ecdf(x)
	cc = ccdf(x)

	q1 = l1.fitmetric(cdf=ec)
	q2 = l2.fitmetric(cdf=ec)

	print "File: " + dataf
	if q1 <= q2:
		print "Type: MME"
		print "Lognormal: " + str(l1)
		print "FIT: ", q1
		print "K-S: ", l1.ksmetric(cdf=ec)
	else:
		print "Type: MLE"
		print "Lognormal: " + str(l2)
		print "FIT: ", q2
		print "K-S: ", l2.ksmetric(cdf=ec)
Exemplo n.º 19
0
def pardd(fname):
	
	inpf = fname
	x1 = util.read_data(inpf)
	x1.sort()
	xmx = x1.max()

	n = 500
	lo = 0.1*xmx
	hi = 10*xmx

	ccf = inpf + "_ccdf"
	ecf = inpf + "_ecdf"

	cc = util.ccdf(x1)
	ec = util.ecdf(x1)

	util.write_data(ccf, cc)
	util.write_data(ecf, ec)

	mle = ml.ModLav.fromFit(x1, fit="mlefit")
	mme = ml.ModLav.fromFit(x1, fit="mmefit")
	mle_mt = ml.ModLav.fromFit(x1, fit="mlefit", mt=True)
	mme_mt = ml.ModLav.fromFit(x1, fit="mmefit", mt=True)

	no_mle = (mle, xmx, mle.fitmetric(cdf=ec), mle.ksmetric(cdf=ec), mle.difference(cdf=ec))
	no_mme = (mme, xmx, mme.fitmetric(cdf=ec), mme.ksmetric(cdf=ec), mme.difference(cdf=ec))
	no_mle_mt = (mle_mt, xmx, mle_mt.fitmetric(cdf=ec), mle_mt.ksmetric(cdf=ec), mle_mt.difference(cdf=ec))
	no_mme_mt = (mme_mt, xmx, mme_mt.fitmetric(cdf=ec), mme_mt.ksmetric(cdf=ec), mme_mt.difference(cdf=ec))

	omle = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", False)
	omle_mt = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", True)
	omme = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", False)
	omme_mt = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", True)

	mle_opt = omle["fit"]
	mle_opt_mt = omle_mt["fit"]
	mme_opt = omme["fit"]
	mme_opt_mt = omme_mt["fit"]

	k_mle_opt = omle["ks"]
	k_mle_opt_mt = omle_mt["ks"]
	k_mme_opt = omme["ks"]
	k_mme_opt_mt = omme_mt["ks"]
	
	d_mle_opt = omle["diff"]
	d_mle_opt_mt = omle_mt["diff"]
	d_mme_opt = omme["diff"]
	d_mme_opt_mt = omme_mt["diff"]

	fitlist = [("MLE", no_mle), \
				  ("MME", no_mme), \
				  ("MLE-MT", no_mle_mt), \
				  ("MME-MT", no_mme_mt), \
				  ("MLE-OPT", mle_opt), \
				  ("MLE-OPT-MT", mle_opt_mt), \
				  ("MME-OPT", mme_opt), \
				  ("MME-OPT-MT", mme_opt_mt), \
				  ("KS-MLE-OPT", k_mle_opt), \
				  ("KS-MLE-OPT-MT", k_mle_opt_mt), \
				  ("KS-MME-OPT", k_mme_opt), \
				  ("KS-MME-OPT-MT", k_mme_opt_mt), \
				  ("D-MLE-OPT", d_mle_opt), \
				  ("D-MLE-OPT-MT", d_mle_opt_mt), \
				  ("D-MME-OPT", d_mme_opt), \
				  ("D-MME-OPT-MT", d_mme_opt_mt)]

	n,amin,amax,mu,sigma = len(x1), x1.min(), xmx, x1.mean(), x1.std()
	cv = sigma/mu
	q = ms.mquantiles(x1, [0.1, 0.5, 0.9])
	
	op1_str = []
	op_str = []
	op_str.append("BASIC STATISTICS")
	op_str.append("--------------------------------------------------------------------------")
	op_str.append("Size: " + str(n))
	op_str.append("Range: " + str(amin) + " - " + str(amax))
	op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) + " 90% - " + str(q[2]))
	op_str.append("Mean: " + str(mu))
	op_str.append("Sigma: " + str(sigma))
	op_str.append("CV: " + str(cv))
	op_str.append("\n")
	
	best_fit_map = dict()

	for f in fitlist:
		lbl = f[0]
		m = f[1][0]
		mx = f[1][1]
		fitm = f[1][2]
		ksm = f[1][3]
		diffm = f[1][4]

		best_fit_map[lbl] = (m, mx, fitm, ksm, diffm)

		op_str.append(lbl)
		op_str.append("--------------------------------------------------------------------------")
		op_str.append("Modlav params: " + str(m))
		op_str.append("Xmax: " + str(mx))
		op_str.append("Xmax/Max: " + str(mx/xmx))
		## op_str.append("FIT Metric: " + str(m.fitmetric(points = x1)))
		## op_str.append("K-S Metric: " + str(m.ksmetric(points = x1)))
		op_str.append("FIT Metric: " + str(fitm))
		op_str.append("K-S Metric: " + str(ksm))
		op_str.append("DIFF Metric: " + str(diffm))
		op_str.append("--------------------------------------------------------------------------")
		op_str.append("\n")

		flbl = lbl.lower().replace("-", "_")
		fname_pfx = inpf + "_" + flbl

		lx = util.gen_points(math.log10(x1.min()), math.log10(mx), 2000)
		ex = np.power(10, lx)

		mcc = m.ccdf(ex)
		mec = m.cdf(ec[:,0])

		fmcc = np.array([ex, mcc]).transpose()
		fmec = np.array([ec[:,0], mec]).transpose()

		util.write_data(fname_pfx + "_ccdf", fmcc)
		util.write_data(fname_pfx + "_ecdf", fmec)

	recom = best_fit(best_fit_map, xmx)
	for s1 in op_str:
		op1_str.append(s1 + "\n")
	op1_str.append("RECOMMENDATIONS: " + str(recom) + "\n")

	txf = open(inpf + "_metric", "w+")
	txf.writelines(op1_str)
	txf.close()
Exemplo n.º 20
0
def pardd(fname):

    inpf = fname
    x1 = util.read_data(inpf)
    x1.sort()
    xmx = x1.max()

    n = 500
    lo = 0.1 * xmx
    hi = 10 * xmx

    ccf = inpf + "_ccdf"
    ecf = inpf + "_ecdf"

    cc = util.ccdf(x1)
    ec = util.ecdf(x1)

    util.write_data(ccf, cc)
    util.write_data(ecf, ec)

    mle = ml.ModLav.fromFit(x1, fit="mlefit")
    mme = ml.ModLav.fromFit(x1, fit="mmefit")
    mle_mt = ml.ModLav.fromFit(x1, fit="mlefit", mt=True)
    mme_mt = ml.ModLav.fromFit(x1, fit="mmefit", mt=True)

    no_mle = (mle, xmx, mle.fitmetric(cdf=ec), mle.ksmetric(cdf=ec),
              mle.difference(cdf=ec))
    no_mme = (mme, xmx, mme.fitmetric(cdf=ec), mme.ksmetric(cdf=ec),
              mme.difference(cdf=ec))
    no_mle_mt = (mle_mt, xmx, mle_mt.fitmetric(cdf=ec),
                 mle_mt.ksmetric(cdf=ec), mle_mt.difference(cdf=ec))
    no_mme_mt = (mme_mt, xmx, mme_mt.fitmetric(cdf=ec),
                 mme_mt.ksmetric(cdf=ec), mme_mt.difference(cdf=ec))

    omle = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", False)
    omle_mt = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", True)
    omme = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", False)
    omme_mt = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", True)

    mle_opt = omle["fit"]
    mle_opt_mt = omle_mt["fit"]
    mme_opt = omme["fit"]
    mme_opt_mt = omme_mt["fit"]

    k_mle_opt = omle["ks"]
    k_mle_opt_mt = omle_mt["ks"]
    k_mme_opt = omme["ks"]
    k_mme_opt_mt = omme_mt["ks"]

    d_mle_opt = omle["diff"]
    d_mle_opt_mt = omle_mt["diff"]
    d_mme_opt = omme["diff"]
    d_mme_opt_mt = omme_mt["diff"]

    fitlist = [("MLE", no_mle), \
         ("MME", no_mme), \
         ("MLE-MT", no_mle_mt), \
         ("MME-MT", no_mme_mt), \
         ("MLE-OPT", mle_opt), \
         ("MLE-OPT-MT", mle_opt_mt), \
         ("MME-OPT", mme_opt), \
         ("MME-OPT-MT", mme_opt_mt), \
         ("KS-MLE-OPT", k_mle_opt), \
         ("KS-MLE-OPT-MT", k_mle_opt_mt), \
         ("KS-MME-OPT", k_mme_opt), \
         ("KS-MME-OPT-MT", k_mme_opt_mt), \
         ("D-MLE-OPT", d_mle_opt), \
         ("D-MLE-OPT-MT", d_mle_opt_mt), \
         ("D-MME-OPT", d_mme_opt), \
         ("D-MME-OPT-MT", d_mme_opt_mt)]

    n, amin, amax, mu, sigma = len(x1), x1.min(), xmx, x1.mean(), x1.std()
    cv = sigma / mu
    q = ms.mquantiles(x1, [0.1, 0.5, 0.9])

    op1_str = []
    op_str = []
    op_str.append("BASIC STATISTICS")
    op_str.append(
        "--------------------------------------------------------------------------"
    )
    op_str.append("Size: " + str(n))
    op_str.append("Range: " + str(amin) + " - " + str(amax))
    op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) +
                  " 90% - " + str(q[2]))
    op_str.append("Mean: " + str(mu))
    op_str.append("Sigma: " + str(sigma))
    op_str.append("CV: " + str(cv))
    op_str.append("\n")

    best_fit_map = dict()

    for f in fitlist:
        lbl = f[0]
        m = f[1][0]
        mx = f[1][1]
        fitm = f[1][2]
        ksm = f[1][3]
        diffm = f[1][4]

        best_fit_map[lbl] = (m, mx, fitm, ksm, diffm)

        op_str.append(lbl)
        op_str.append(
            "--------------------------------------------------------------------------"
        )
        op_str.append("Modlav params: " + str(m))
        op_str.append("Xmax: " + str(mx))
        op_str.append("Xmax/Max: " + str(mx / xmx))
        ## op_str.append("FIT Metric: " + str(m.fitmetric(points = x1)))
        ## op_str.append("K-S Metric: " + str(m.ksmetric(points = x1)))
        op_str.append("FIT Metric: " + str(fitm))
        op_str.append("K-S Metric: " + str(ksm))
        op_str.append("DIFF Metric: " + str(diffm))
        op_str.append(
            "--------------------------------------------------------------------------"
        )
        op_str.append("\n")

        flbl = lbl.lower().replace("-", "_")
        fname_pfx = inpf + "_" + flbl

        lx = util.gen_points(math.log10(x1.min()), math.log10(mx), 2000)
        ex = np.power(10, lx)

        mcc = m.ccdf(ex)
        mec = m.cdf(ec[:, 0])

        fmcc = np.array([ex, mcc]).transpose()
        fmec = np.array([ec[:, 0], mec]).transpose()

        util.write_data(fname_pfx + "_ccdf", fmcc)
        util.write_data(fname_pfx + "_ecdf", fmec)

    recom = best_fit(best_fit_map, xmx)
    for s1 in op_str:
        op1_str.append(s1 + "\n")
    op1_str.append("RECOMMENDATIONS: " + str(recom) + "\n")

    txf = open(inpf + "_metric", "w+")
    txf.writelines(op1_str)
    txf.close()
Exemplo n.º 21
0
def main(dt):
	slen = {"catm": "select seslen from data_log where ucat_term='catm\n' and seslen>0 and seslen<18000 order by seslen", \
			  "catd": "select seslen from data_log where ucat_term='catd\n' and seslen>0 and seslen<18000 order by seslen", \
			  "cath": "select seslen from data_log where ucat_term='cath\n' and seslen>0 and seslen<18000 order by seslen", \
			  "all": "select seslen from data_log where seslen>0 and seslen<18000 order by seslen"}

	inb = {"catm": "select bin from data_log where ucat_term='catm\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \
			  "catd": "select bin from data_log where ucat_term='catd\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \
			  "cath": "select bin from data_log where ucat_term='cath\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \
			  "all": "select bin from data_log where bin>0 and seslen>0 and seslen<18000 order by bin"}
	
	outb = {"catm": "select bout from data_log where ucat_term='catm\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \
			  "catd": "select bout from data_log where ucat_term='catd\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \
			  "cath": "select bout from data_log where ucat_term='cath\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \
			  "all": "select bout from data_log where bout>0 and seslen>0 and seslen<18000 order by bout"}

	to_inb = {"all": "select bin from data_log where seslen >= 18000 and bin>0 order by bin"}

	to_outb = {"all": "select bout from data_log where seslen >= 18000 and bout>0 order by bout"}
	
	tslen = {"catm": "select sum(seslen) t from data_log where ucat_term='catm\n' and seslen>0 and seslen<18000 group by user order by t", \
			  "catd": "select sum(seslen) t from data_log where ucat_term='catd\n' and seslen>0 and seslen<18000 group by user order by t", \
			  "cath": "select sum(seslen) t from data_log where ucat_term='cath\n' and seslen>0 and seslen<18000 group by user order by t", \
			  "all": "select sum(seslen) t from data_log where seslen>0 and seslen<18000 group by user order by t"}

	tinb = {"catm": "select sum(bin) t from data_log where ucat_term='catm\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \
			  "catd": "select sum(bin) t from data_log where ucat_term='catd\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \
			  "cath": "select sum(bin) t from data_log where ucat_term='cath\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \
			  "all": "select sum(bin) t from data_log where bin > 0 and seslen>0 and seslen<18000 group by user order by t"}
	
	toutb = {"catm": "select sum(bout) t from data_log where ucat_term='catm\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \
			  "catd": "select sum(bout) t from data_log where ucat_term='catd\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \
			  "cath": "select sum(bout) t from data_log where ucat_term='cath\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \
			  "all": "select sum(bout) t from data_log where bout>0 and seslen>0 and seslen<18000 group by user order by t"}

	dtmap = {"slen": slen, "inb": inb, "outb": outb, "to_inb": to_inb, "to_outb": to_outb, "tslen": tslen, "tinb": tinb, "toutb": toutb}

	if dt not in dtmap:
		raise NotImplementedError("Type - " + dt + " - is not implemented")
	
	qmap = dtmap[dt]
	
	s = sql.RunSQL("azure.db")
	for i in qmap.items():
		q = i[1]
		y = s.sqlq(q)
		x = np.array(y)
		x.sort() # just making sure

		df = i[0] + "_" + dt
		ccf = i[0] + "_ccdf"
		ecf = i[0] + "_ecdf"

		cc = util.ccdf(x)
		ec = util.ecdf(x)

		util.write_data(df, x)
		util.write_data(ccf, cc)
		util.write_data(ecf, ec)


		mle = ml.ModLav.fromFit(x,fit="mlefit")
		mme = ml.ModLav.fromFit(x,fit="mmefit")
		mle_mt = ml.ModLav.fromFit(x,fit="mlefit",mt=True)
		mme_mt = ml.ModLav.fromFit(x,fit="mmefit",mt=True)

		omle = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=True,mt=False);
		omle_mt = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=True,mt=True);
		omme = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=False,mt=False);
		omme_mt = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=False,mt=True);

		mle_opt = omle["fit"][0]
		xm_mle_opt = omle["fit"][1]

		mle_opt_mt = omle_mt["fit"][0]
		xm_mle_opt_mt = omle_mt["fit"][1]

		mme_opt = omme["fit"][0]
		xm_mme_opt = omme["fit"][1]

		mme_opt_mt = omme_mt["fit"][0]
		xm_mme_opt_mt = omme_mt["fit"][1]

		yyy = [("MLE", mle, x.max()), ("MME", mme, x.max()), ("MLE-MT", mle_mt, x.max()), ("MME-MT", mme_mt, x.max()), ("MLE-OPT", mle_opt, xm_mle_opt), ("MLE-OPT-MT", mle_opt_mt, xm_mle_opt_mt), ("MME-OPT", mme_opt, xm_mme_opt), ("MME-OPT-MT", mme_opt_mt, xm_mme_opt_mt)]
		
		n,amin,amax,mu,sigma = len(x), x.min(), x.max(), x.mean(), x.std()
		cv = sigma/mu
		q = ms.mquantiles(x, [0.1, 0.5, 0.9])
		op_str = []
		op_str.append("BASIC STATISTICS")
		op_str.append("----------------------------------------------------------------------")
		op_str.append("Size: " + str(n))
		op_str.append("Range: " + str(amin) + " - " + str(amax))
		op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) + " 90% - " + str(q[2]))
		op_str.append("Mean: " + str(mu))
		op_str.append("Sigma: " + str(sigma))
		op_str.append("CV: " + str(cv))
		op_str.append("\n")

		for yy in yyy:
			typ = i[0]
			lbl = yy[0]
			m = yy[1]
			xmx = yy[2]

			op_str.append(lbl)
			op_str.append("----------------------------------------------------------------------")
			op_str.append("Modlav params: " + str(m))
			op_str.append("Xmax: " + str(xmx))
			op_str.append("Xmax/Max: " + str(xmx/amax))
			op_str.append("FIT metric: " + str(m.fitmetric(points=x)))
			op_str.append("K-S metric: " + str(m.ksmetric(points=x)))
			op_str.append("----------------------------------------------------------------------")
			op_str.append("\n")

			flbl = lbl.lower().replace("-", "_")
			fname_pfx = typ + "_" + flbl 

			lx = util.gen_points(math.log10(x.min()),math.log10(xmx),2000)
			ex = np.power(10, lx)

			mcc = m.ccdf(ex)
			mec = m.cdf(ec[:,0])

			fmcc = np.array([ex, mcc]).transpose()
			fmec = np.array([ec[:,0], mec]).transpose()

			util.write_data(fname_pfx+"_ccdf", fmcc)
			util.write_data(fname_pfx+"_ecdf", fmec)
	
		op1_str = []
		for s1 in op_str:
			op1_str.append(s1 + "\n")
		
		txf = open(typ+"_metric", "w+")
		txf.writelines(op1_str)
		txf.close()