def catm_ccdf(): try: plt.show() except: pass # These are already computed values. # The script is only to plot the data. # Please refer to the notes on how to re-compute. # xmax is the value from the optimization run for mme x = util.read_data("/home/gautam/research/modlav-plots/seslen/catm_ses") mle=ml.ModLav(0.9398,3003.4797,0.1488) mme=ml.ModLav(1.0,3243.2566,0.1894) xmax = 17123.2972 cc = util.ccdf(x) lx=util.gen_points(math.log10(x.min()),math.log10(x.max()),2000) ex=np.power(lx,10) mlecc = mle.ccdf(ex) mme_lx=util.gen_points(math.log10(x.min()),math.log10(xmax),2000) mme_ex=np.power(mme_lx,10) mmecc = mme.ccdf(mme_ex) plt.loglog(cc[:,0],cc[:,1],'k-',label='Data',linestyle='steps') plt.loglog(ex,mlecc,'k--',label='MLE fit') plt.loglog(mme_ex,mmecc,'k-.',label='MME fit') plt.xlim((x.min(),x.max()*10)) plt.ylim((1e-4,1.1)) plt.grid() plt.ylabel("P(X > x)") plt.xlabel("Session length [seconds]") plt.legend(loc=3)
def paroptfit(x1, hi, lo, n, fit1, mt1): """ x1: Sorted array of points hi: Max. xmax to estimate to lo: Min. xmax to estimate to n: #. of points from hi to lo for the estimation fit1: Type of fit - mlefit or mmefit mt1: Mirror xform - True or False """ global x, fit, mt, c, max_of_x ## Initialize globals prior to the parallel run ## x = x1 max_of_x = max(x) fit = fit1 mt = mt1 c = util.ecdf(x) xmax_pts = util.gen_points(lo, hi, n) l_xmax_pts = xmax_pts.tolist() l_xmax_pts.append(max_of_x) l_xmax_pts.sort() xmax_pts = np.array(l_xmax_pts) ncpus = mp.cpu_count() proc_pool = Pool(ncpus) result = proc_pool.map(optfit, xmax_pts) FIT_COMP_IDX = 2 return best_fit(result, FIT_COMP_IDX)
def cath_ccdf(): try: plt.show() except: pass # These are already computed values. Please look at the notes # on discussion as to why and how they were chosen. The CDF # and CCDF values for the fitted distributions are different. # The reasoning for this is also in the notes. m2 = ml.ModLav(0.7438, 657.1194, 0.0191) m3 = ml.ModLav(0.7294, 644.6747, 0.0104) x = util.read_data("/home/gautam/research/modlav-plots/seslen/cath_ses") cc = util.read_data("/home/gautam/research/modlav-plots/seslen/cath_ccdf") plt.loglog(cc[:,0],cc[:,1],'k-',label='Data',linestyle='steps') lx=util.gen_points(math.log10(x.min()),math.log10(x.max()),2000) ex=np.power(lx,10) m2cc=m2.ccdf(ex) m3cc=m3.ccdf(ex) plt.loglog(ex,m3cc,'k--',label='MLE,MT, No OPT') plt.loglog(ex,m2cc,'k-.',label='MLE, MT, OPT') plt.grid() plt.xlim((1.0,1e5)) plt.ylim((1e-4,1.1)) plt.ylabel("P(X > x)") plt.xlabel("Session length [seconds]") plt.legend(loc=3)
def process_datasets(tag_file_map): tags = tag_file_map.keys() report_map = {} for tag in tags: data_file = tag_file_map[tag] x = util.read_data(data_file) x.sort() ec = util.ecdf(x) cc = util.ccdf(x) fit_map = compute_fits(x) insert_db_record(tag, fit_map) ## Figure out best fit bfit = best_fits(fit_map) report_map[tag] = (bfit["best_body"], bfit["best_tail"]) ## Write files out to the directory util.write_data(data_file + "_ecdf", ec) util.write_data(data_file + "_ccdf", cc) ccpts = np.power( 10, util.gen_points(math.log10(min(x)), math.log10(max(x)), 2000)) ecpts = ec[:, 0] lmme = fit_map["MME"][0] lmle = fit_map["MLE"][0] lfit = fit_map["FITMIN"][0] mme_ec = np.array([ecpts, lmme.cdf(ecpts)]).transpose() mme_cc = np.array([ccpts, lmme.ccdf(ccpts)]).transpose() util.write_data(data_file + "_ecdf.lognmme", mme_ec) util.write_data(data_file + "_ccdf.lognmme", mme_cc) mle_ec = np.array([ecpts, lmle.cdf(ecpts)]).transpose() mle_cc = np.array([ccpts, lmle.ccdf(ccpts)]).transpose() util.write_data(data_file + "_ecdf.lognmle", mle_ec) util.write_data(data_file + "_ccdf.lognmle", mle_cc) fit_ec = np.array([ecpts, lfit.cdf(ecpts)]).transpose() fit_cc = np.array([ccpts, lfit.ccdf(ccpts)]).transpose() util.write_data(data_file + "_ecdf.lognfitmin", fit_ec) util.write_data(data_file + "_ccdf.lognfitmin", fit_cc) for k in report_map: print k + " BODY: " + report_map[k][0] + " TAIL: " + report_map[k][1] return report_map
def process_datasets(tag_file_map): tags = tag_file_map.keys() report_map = {} for tag in tags: data_file = tag_file_map[tag] x = util.read_data(data_file) x.sort() ec = util.ecdf(x) cc = util.ccdf(x) fit_map = compute_fits(x) insert_db_record(tag, fit_map) ## Figure out best fit bfit = best_fits(fit_map) report_map[tag] = (bfit["best_body"], bfit["best_tail"]) ## Write files out to the directory util.write_data(data_file + "_ecdf", ec) util.write_data(data_file + "_ccdf", cc) ccpts = np.power(10, util.gen_points(math.log10(min(x)), math.log10(max(x)), 2000)) ecpts = ec[:,0] lmme = fit_map["MME"][0] lmle = fit_map["MLE"][0] lfit = fit_map["FITMIN"][0] mme_ec = np.array([ecpts, lmme.cdf(ecpts)]).transpose() mme_cc = np.array([ccpts, lmme.ccdf(ccpts)]).transpose() util.write_data(data_file + "_ecdf.lognmme", mme_ec) util.write_data(data_file + "_ccdf.lognmme", mme_cc) mle_ec = np.array([ecpts, lmle.cdf(ecpts)]).transpose() mle_cc = np.array([ccpts, lmle.ccdf(ccpts)]).transpose() util.write_data(data_file + "_ecdf.lognmle", mle_ec) util.write_data(data_file + "_ccdf.lognmle", mle_cc) fit_ec = np.array([ecpts, lfit.cdf(ecpts)]).transpose() fit_cc = np.array([ccpts, lfit.ccdf(ccpts)]).transpose() util.write_data(data_file + "_ecdf.lognfitmin", fit_ec) util.write_data(data_file + "_ccdf.lognfitmin", fit_cc) for k in report_map: print k + " BODY: " + report_map[k][0] + " TAIL: " + report_map[k][1] return report_map
def paroptfit(x1, hi, lo, n, fit1, mt1): """ x1: Sorted array of points hi: Max. xmax to estimate to lo: Min. xmax to estimate to n: #. of points from hi to lo for the estimation fit1: Type of fit - mlefit or mmefit mt1: Mirror xform - True or False """ global x, fit, mt, c, max_of_x ## Initialize globals prior to the parallel run ## x = x1 max_of_x = max(x) fit = fit1 mt = mt1 c = util.ecdf(x) xmax_pts = util.gen_points(lo, hi, n) l_xmax_pts = xmax_pts.tolist() l_xmax_pts.append(max_of_x) l_xmax_pts.sort() xmax_pts = np.array(l_xmax_pts) ncpus = mp.cpu_count() proc_pool = Pool(ncpus) result = proc_pool.map(optfit, xmax_pts) # definitions to compare fit and k-s values # 2 = index of k-s metric in each tuple of the result # 3 = index of fit metric in each tuple of the result # !!! REMOVING KS COMP !!! FIT_COMP_IDX = 2 # KS_COMP_IDX = 3 DIFF_COMP_IDX = 3 return { "fit": best_fit(result, FIT_COMP_IDX), "diff": best_fit(result, DIFF_COMP_IDX) }
def paroptfit(x1, hi, lo, n, fit1, mt1): """ x1: Sorted array of points hi: Max. xmax to estimate to lo: Min. xmax to estimate to n: #. of points from hi to lo for the estimation fit1: Type of fit - mlefit or mmefit mt1: Mirror xform - True or False """ global x, fit, mt, c, max_of_x ## Initialize globals prior to the parallel run ## x = x1 max_of_x = max(x) fit = fit1 mt = mt1 c = util.ecdf(x) xmax_pts = util.gen_points(lo, hi, n) l_xmax_pts = xmax_pts.tolist() l_xmax_pts.append(max_of_x) l_xmax_pts.sort() xmax_pts = np.array(l_xmax_pts) ncpus = mp.cpu_count() proc_pool = Pool(ncpus) result = proc_pool.map(optfit, xmax_pts) # definitions to compare fit and k-s values # 2 = index of k-s metric in each tuple of the result # 3 = index of fit metric in each tuple of the result # !!! REMOVING KS COMP !!! FIT_COMP_IDX = 2 # KS_COMP_IDX = 3 DIFF_COMP_IDX = 3 return {"fit": best_fit(result, FIT_COMP_IDX), "diff": best_fit(result, DIFF_COMP_IDX)}
def gen_cdf_ccdf(): r = RunSQL("files_and_analysis.db") dsets = r.sqlq("select unique_id, filename from datasets") for dpair in dsets: dset_id = dpair[0] dset_file = dpair[1] print "Processing data set - ", dset_id x = read_data(dset_file) x.sort() ec = ecdf(x, issorted=True) pts = np.power(10, gen_points(math.log10(min(x)), math.log10(max(x)), 2000)) # dist_list = ["LOGLOGISTIC", "LOGN", "TPARETO", "TRUNCLL"] dist_list = ["LOGN"] fext_map = {"LOGLOGISTIC": "ll", "LOGN": "lgn", "TPARETO": "tp", "TRUNCLL": "tll"} for distname in dist_list: print "\t Getting distribution - ", distname dist = get_dist(dset_id, distname) dec = dist.cdf(ec[:,0]) dcc = dist.ccdf(pts) fdec = np.array([ec[:,0], dec]).transpose() fdcc = np.array([pts, dcc]).transpose() op_dir = os.path.dirname(dset_file) op_ec_file = op_dir + "/" + os.path.basename(dset_file)+"_ecdf" + "." + fext_map[distname] op_cc_file = op_dir + "/" + os.path.basename(dset_file)+"_ccdf" + "." + fext_map[distname] print "\t Writing CDF - ", op_ec_file write_data(op_ec_file, fdec) print "\t Writing CCF - ", op_cc_file write_data(op_cc_file, fdcc)
def main(dt): slen = {"catm": "select seslen from data_log where ucat_term='catm\n' and seslen>0 and seslen<18000 order by seslen", \ "catd": "select seslen from data_log where ucat_term='catd\n' and seslen>0 and seslen<18000 order by seslen", \ "cath": "select seslen from data_log where ucat_term='cath\n' and seslen>0 and seslen<18000 order by seslen", \ "all": "select seslen from data_log where seslen>0 and seslen<18000 order by seslen"} inb = {"catm": "select bin from data_log where ucat_term='catm\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \ "catd": "select bin from data_log where ucat_term='catd\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \ "cath": "select bin from data_log where ucat_term='cath\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \ "all": "select bin from data_log where bin>0 and seslen>0 and seslen<18000 order by bin"} outb = {"catm": "select bout from data_log where ucat_term='catm\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \ "catd": "select bout from data_log where ucat_term='catd\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \ "cath": "select bout from data_log where ucat_term='cath\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \ "all": "select bout from data_log where bout>0 and seslen>0 and seslen<18000 order by bout"} to_inb = {"all": "select bin from data_log where seslen >= 18000 and bin>0 order by bin"} to_outb = {"all": "select bout from data_log where seslen >= 18000 and bout>0 order by bout"} tslen = {"catm": "select sum(seslen) t from data_log where ucat_term='catm\n' and seslen>0 and seslen<18000 group by user order by t", \ "catd": "select sum(seslen) t from data_log where ucat_term='catd\n' and seslen>0 and seslen<18000 group by user order by t", \ "cath": "select sum(seslen) t from data_log where ucat_term='cath\n' and seslen>0 and seslen<18000 group by user order by t", \ "all": "select sum(seslen) t from data_log where seslen>0 and seslen<18000 group by user order by t"} tinb = {"catm": "select sum(bin) t from data_log where ucat_term='catm\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \ "catd": "select sum(bin) t from data_log where ucat_term='catd\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \ "cath": "select sum(bin) t from data_log where ucat_term='cath\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \ "all": "select sum(bin) t from data_log where bin > 0 and seslen>0 and seslen<18000 group by user order by t"} toutb = {"catm": "select sum(bout) t from data_log where ucat_term='catm\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \ "catd": "select sum(bout) t from data_log where ucat_term='catd\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \ "cath": "select sum(bout) t from data_log where ucat_term='cath\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \ "all": "select sum(bout) t from data_log where bout>0 and seslen>0 and seslen<18000 group by user order by t"} dtmap = {"slen": slen, "inb": inb, "outb": outb, "to_inb": to_inb, "to_outb": to_outb, "tslen": tslen, "tinb": tinb, "toutb": toutb} if dt not in dtmap: raise NotImplementedError("Type - " + dt + " - is not implemented") qmap = dtmap[dt] s = sql.RunSQL("azure.db") for i in qmap.items(): q = i[1] y = s.sqlq(q) x = np.array(y) x.sort() # just making sure df = i[0] + "_" + dt ccf = i[0] + "_ccdf" ecf = i[0] + "_ecdf" cc = util.ccdf(x) ec = util.ecdf(x) util.write_data(df, x) util.write_data(ccf, cc) util.write_data(ecf, ec) mle = ml.ModLav.fromFit(x,fit="mlefit") mme = ml.ModLav.fromFit(x,fit="mmefit") mle_mt = ml.ModLav.fromFit(x,fit="mlefit",mt=True) mme_mt = ml.ModLav.fromFit(x,fit="mmefit",mt=True) omle = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=True,mt=False); omle_mt = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=True,mt=True); omme = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=False,mt=False); omme_mt = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=False,mt=True); mle_opt = omle["fit"][0] xm_mle_opt = omle["fit"][1] mle_opt_mt = omle_mt["fit"][0] xm_mle_opt_mt = omle_mt["fit"][1] mme_opt = omme["fit"][0] xm_mme_opt = omme["fit"][1] mme_opt_mt = omme_mt["fit"][0] xm_mme_opt_mt = omme_mt["fit"][1] yyy = [("MLE", mle, x.max()), ("MME", mme, x.max()), ("MLE-MT", mle_mt, x.max()), ("MME-MT", mme_mt, x.max()), ("MLE-OPT", mle_opt, xm_mle_opt), ("MLE-OPT-MT", mle_opt_mt, xm_mle_opt_mt), ("MME-OPT", mme_opt, xm_mme_opt), ("MME-OPT-MT", mme_opt_mt, xm_mme_opt_mt)] n,amin,amax,mu,sigma = len(x), x.min(), x.max(), x.mean(), x.std() cv = sigma/mu q = ms.mquantiles(x, [0.1, 0.5, 0.9]) op_str = [] op_str.append("BASIC STATISTICS") op_str.append("----------------------------------------------------------------------") op_str.append("Size: " + str(n)) op_str.append("Range: " + str(amin) + " - " + str(amax)) op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) + " 90% - " + str(q[2])) op_str.append("Mean: " + str(mu)) op_str.append("Sigma: " + str(sigma)) op_str.append("CV: " + str(cv)) op_str.append("\n") for yy in yyy: typ = i[0] lbl = yy[0] m = yy[1] xmx = yy[2] op_str.append(lbl) op_str.append("----------------------------------------------------------------------") op_str.append("Modlav params: " + str(m)) op_str.append("Xmax: " + str(xmx)) op_str.append("Xmax/Max: " + str(xmx/amax)) op_str.append("FIT metric: " + str(m.fitmetric(points=x))) op_str.append("K-S metric: " + str(m.ksmetric(points=x))) op_str.append("----------------------------------------------------------------------") op_str.append("\n") flbl = lbl.lower().replace("-", "_") fname_pfx = typ + "_" + flbl lx = util.gen_points(math.log10(x.min()),math.log10(xmx),2000) ex = np.power(10, lx) mcc = m.ccdf(ex) mec = m.cdf(ec[:,0]) fmcc = np.array([ex, mcc]).transpose() fmec = np.array([ec[:,0], mec]).transpose() util.write_data(fname_pfx+"_ccdf", fmcc) util.write_data(fname_pfx+"_ecdf", fmec) op1_str = [] for s1 in op_str: op1_str.append(s1 + "\n") txf = open(typ+"_metric", "w+") txf.writelines(op1_str) txf.close()
def pardd(fname): inpf = fname x1 = util.read_data(inpf) x1.sort() xmx = x1.max() n = 500 lo = 0.1*xmx hi = 10*xmx ccf = inpf + "_ccdf" ecf = inpf + "_ecdf" cc = util.ccdf(x1) ec = util.ecdf(x1) util.write_data(ccf, cc) util.write_data(ecf, ec) mle = ml.ModLav.fromFit(x1, fit="mlefit") mme = ml.ModLav.fromFit(x1, fit="mmefit") mle_mt = ml.ModLav.fromFit(x1, fit="mlefit", mt=True) mme_mt = ml.ModLav.fromFit(x1, fit="mmefit", mt=True) no_mle = (mle, xmx, mle.fitmetric(cdf=ec), mle.ksmetric(cdf=ec), mle.difference(cdf=ec)) no_mme = (mme, xmx, mme.fitmetric(cdf=ec), mme.ksmetric(cdf=ec), mme.difference(cdf=ec)) no_mle_mt = (mle_mt, xmx, mle_mt.fitmetric(cdf=ec), mle_mt.ksmetric(cdf=ec), mle_mt.difference(cdf=ec)) no_mme_mt = (mme_mt, xmx, mme_mt.fitmetric(cdf=ec), mme_mt.ksmetric(cdf=ec), mme_mt.difference(cdf=ec)) omle = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", False) omle_mt = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", True) omme = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", False) omme_mt = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", True) mle_opt = omle["fit"] mle_opt_mt = omle_mt["fit"] mme_opt = omme["fit"] mme_opt_mt = omme_mt["fit"] k_mle_opt = omle["ks"] k_mle_opt_mt = omle_mt["ks"] k_mme_opt = omme["ks"] k_mme_opt_mt = omme_mt["ks"] d_mle_opt = omle["diff"] d_mle_opt_mt = omle_mt["diff"] d_mme_opt = omme["diff"] d_mme_opt_mt = omme_mt["diff"] fitlist = [("MLE", no_mle), \ ("MME", no_mme), \ ("MLE-MT", no_mle_mt), \ ("MME-MT", no_mme_mt), \ ("MLE-OPT", mle_opt), \ ("MLE-OPT-MT", mle_opt_mt), \ ("MME-OPT", mme_opt), \ ("MME-OPT-MT", mme_opt_mt), \ ("KS-MLE-OPT", k_mle_opt), \ ("KS-MLE-OPT-MT", k_mle_opt_mt), \ ("KS-MME-OPT", k_mme_opt), \ ("KS-MME-OPT-MT", k_mme_opt_mt), \ ("D-MLE-OPT", d_mle_opt), \ ("D-MLE-OPT-MT", d_mle_opt_mt), \ ("D-MME-OPT", d_mme_opt), \ ("D-MME-OPT-MT", d_mme_opt_mt)] n,amin,amax,mu,sigma = len(x1), x1.min(), xmx, x1.mean(), x1.std() cv = sigma/mu q = ms.mquantiles(x1, [0.1, 0.5, 0.9]) op1_str = [] op_str = [] op_str.append("BASIC STATISTICS") op_str.append("--------------------------------------------------------------------------") op_str.append("Size: " + str(n)) op_str.append("Range: " + str(amin) + " - " + str(amax)) op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) + " 90% - " + str(q[2])) op_str.append("Mean: " + str(mu)) op_str.append("Sigma: " + str(sigma)) op_str.append("CV: " + str(cv)) op_str.append("\n") best_fit_map = dict() for f in fitlist: lbl = f[0] m = f[1][0] mx = f[1][1] fitm = f[1][2] ksm = f[1][3] diffm = f[1][4] best_fit_map[lbl] = (m, mx, fitm, ksm, diffm) op_str.append(lbl) op_str.append("--------------------------------------------------------------------------") op_str.append("Modlav params: " + str(m)) op_str.append("Xmax: " + str(mx)) op_str.append("Xmax/Max: " + str(mx/xmx)) ## op_str.append("FIT Metric: " + str(m.fitmetric(points = x1))) ## op_str.append("K-S Metric: " + str(m.ksmetric(points = x1))) op_str.append("FIT Metric: " + str(fitm)) op_str.append("K-S Metric: " + str(ksm)) op_str.append("DIFF Metric: " + str(diffm)) op_str.append("--------------------------------------------------------------------------") op_str.append("\n") flbl = lbl.lower().replace("-", "_") fname_pfx = inpf + "_" + flbl lx = util.gen_points(math.log10(x1.min()), math.log10(mx), 2000) ex = np.power(10, lx) mcc = m.ccdf(ex) mec = m.cdf(ec[:,0]) fmcc = np.array([ex, mcc]).transpose() fmec = np.array([ec[:,0], mec]).transpose() util.write_data(fname_pfx + "_ccdf", fmcc) util.write_data(fname_pfx + "_ecdf", fmec) recom = best_fit(best_fit_map, xmx) for s1 in op_str: op1_str.append(s1 + "\n") op1_str.append("RECOMMENDATIONS: " + str(recom) + "\n") txf = open(inpf + "_metric", "w+") txf.writelines(op1_str) txf.close()
def pardd(fname): inpf = fname x1 = util.read_data(inpf) x1.sort() xmx = x1.max() n = 500 lo = 0.1 * xmx hi = 10 * xmx ccf = inpf + "_ccdf" ecf = inpf + "_ecdf" cc = util.ccdf(x1) ec = util.ecdf(x1) util.write_data(ccf, cc) util.write_data(ecf, ec) mle = ml.ModLav.fromFit(x1, fit="mlefit") mme = ml.ModLav.fromFit(x1, fit="mmefit") mle_mt = ml.ModLav.fromFit(x1, fit="mlefit", mt=True) mme_mt = ml.ModLav.fromFit(x1, fit="mmefit", mt=True) no_mle = (mle, xmx, mle.fitmetric(cdf=ec), mle.ksmetric(cdf=ec), mle.difference(cdf=ec)) no_mme = (mme, xmx, mme.fitmetric(cdf=ec), mme.ksmetric(cdf=ec), mme.difference(cdf=ec)) no_mle_mt = (mle_mt, xmx, mle_mt.fitmetric(cdf=ec), mle_mt.ksmetric(cdf=ec), mle_mt.difference(cdf=ec)) no_mme_mt = (mme_mt, xmx, mme_mt.fitmetric(cdf=ec), mme_mt.ksmetric(cdf=ec), mme_mt.difference(cdf=ec)) omle = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", False) omle_mt = parmlfit.paroptfit(x1, hi, lo, n, "mlefit", True) omme = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", False) omme_mt = parmlfit.paroptfit(x1, hi, lo, n, "mmefit", True) mle_opt = omle["fit"] mle_opt_mt = omle_mt["fit"] mme_opt = omme["fit"] mme_opt_mt = omme_mt["fit"] k_mle_opt = omle["ks"] k_mle_opt_mt = omle_mt["ks"] k_mme_opt = omme["ks"] k_mme_opt_mt = omme_mt["ks"] d_mle_opt = omle["diff"] d_mle_opt_mt = omle_mt["diff"] d_mme_opt = omme["diff"] d_mme_opt_mt = omme_mt["diff"] fitlist = [("MLE", no_mle), \ ("MME", no_mme), \ ("MLE-MT", no_mle_mt), \ ("MME-MT", no_mme_mt), \ ("MLE-OPT", mle_opt), \ ("MLE-OPT-MT", mle_opt_mt), \ ("MME-OPT", mme_opt), \ ("MME-OPT-MT", mme_opt_mt), \ ("KS-MLE-OPT", k_mle_opt), \ ("KS-MLE-OPT-MT", k_mle_opt_mt), \ ("KS-MME-OPT", k_mme_opt), \ ("KS-MME-OPT-MT", k_mme_opt_mt), \ ("D-MLE-OPT", d_mle_opt), \ ("D-MLE-OPT-MT", d_mle_opt_mt), \ ("D-MME-OPT", d_mme_opt), \ ("D-MME-OPT-MT", d_mme_opt_mt)] n, amin, amax, mu, sigma = len(x1), x1.min(), xmx, x1.mean(), x1.std() cv = sigma / mu q = ms.mquantiles(x1, [0.1, 0.5, 0.9]) op1_str = [] op_str = [] op_str.append("BASIC STATISTICS") op_str.append( "--------------------------------------------------------------------------" ) op_str.append("Size: " + str(n)) op_str.append("Range: " + str(amin) + " - " + str(amax)) op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) + " 90% - " + str(q[2])) op_str.append("Mean: " + str(mu)) op_str.append("Sigma: " + str(sigma)) op_str.append("CV: " + str(cv)) op_str.append("\n") best_fit_map = dict() for f in fitlist: lbl = f[0] m = f[1][0] mx = f[1][1] fitm = f[1][2] ksm = f[1][3] diffm = f[1][4] best_fit_map[lbl] = (m, mx, fitm, ksm, diffm) op_str.append(lbl) op_str.append( "--------------------------------------------------------------------------" ) op_str.append("Modlav params: " + str(m)) op_str.append("Xmax: " + str(mx)) op_str.append("Xmax/Max: " + str(mx / xmx)) ## op_str.append("FIT Metric: " + str(m.fitmetric(points = x1))) ## op_str.append("K-S Metric: " + str(m.ksmetric(points = x1))) op_str.append("FIT Metric: " + str(fitm)) op_str.append("K-S Metric: " + str(ksm)) op_str.append("DIFF Metric: " + str(diffm)) op_str.append( "--------------------------------------------------------------------------" ) op_str.append("\n") flbl = lbl.lower().replace("-", "_") fname_pfx = inpf + "_" + flbl lx = util.gen_points(math.log10(x1.min()), math.log10(mx), 2000) ex = np.power(10, lx) mcc = m.ccdf(ex) mec = m.cdf(ec[:, 0]) fmcc = np.array([ex, mcc]).transpose() fmec = np.array([ec[:, 0], mec]).transpose() util.write_data(fname_pfx + "_ccdf", fmcc) util.write_data(fname_pfx + "_ecdf", fmec) recom = best_fit(best_fit_map, xmx) for s1 in op_str: op1_str.append(s1 + "\n") op1_str.append("RECOMMENDATIONS: " + str(recom) + "\n") txf = open(inpf + "_metric", "w+") txf.writelines(op1_str) txf.close()