Пример #1
0
def new():
    print("\033[0;31m What function does your problem require?")
    print(
        " Type one of these symbols:\n *  ---  Multiplication\n /  ---  Division\n +  ---  Addition\n -  ---  Subtraction\n h --- History\n n --- New Math Problem")
    functionType = input()

    # Specific Opetator Excecution
    if ("*" in functionType or "multiplication" in functionType or "multiply" in functionType or "Multiplication" in functionType or "Multiply" in functionType or "MULTIPLICATION" in functionType or "MULTIPLY" in functionType):
        print("\033[0;34mMultiplication")
        from multiply import multiplySetup

    elif ("/" in functionType or "division" in functionType or "divide" in functionType or "Division" in functionType or "Divide" in functionType or "DIVISION" in functionType or "DIVIDE" in functionType):
        print("\033[0;35mDivision")
        from divide import divideSetup

    elif ("+" in functionType or "addition" in functionType or "add" in functionType or "Addition" in functionType or "Add" in functionType or "ADDITION" in functionType or "ADD" in functionType):
        print("\033[0;32mAddition")
        from add import addSetup

    elif ("-" in functionType or "subtraction" in functionType or "subtract" in functionType or "Subtraction" in functionType or "Subtract" in functionType or "SUBTRACTION" in functionType or "SUBTRACT" in functionType):
        print("\033[0;33mSubtraction")
        from subtract import subtractSetup

    elif ("h" in functionType or "history" in functionType or "History" in functionType or "HISTORY" in functionType):
        from hist import hist
        hist()

    elif ("n" in functionType or "New" in functionType or "new" in functionType or "NEW" in functionType or "New Math Problem" in functionType or "new math problem" in functionType or "NEW MATH PROBLEM" in functionType):
        from problem import new
        new()
Пример #2
0
def getSignals(iHP, iHF, iBin, iBase):
    lPSigs = []
    lFSigs = []
    lPHists = []
    lFHists = []
    lVars = [125]  #50,75,100,125,150,200,250,300]
    for i0 in range(0, len(lVars)):
        lPHists.append(iHP[i0 + 4])
        lFHists.append(iHF[i0 + 4])
    print(str(lPHists))
    print("asdSig")
    lPHist = hist(lVars, lPHists)
    lFHist = hist(lVars, lFHists)
    masses = [
        125
    ]  #50,60,75,90,100,112,125,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290]
    for i0 in range(0, len(masses)):
        if len(masses) > 1:
            pHP = lPHist.morph(masses[i0])
            pHF = lFHist.morph(masses[i0])
        else:
            pHP = lPHist
            pHF = lFHist
            for i1 in range(0, len(lVars)):
                if lVars[i1] == masses[i0]:
                    pHP = iHP[i1 + 4]
                    pHF = iHF[i1 + 4]
            lSig = histFunc([pHP, pHF], iBase, "hqq_" + str(masses[i0]), iBin)
            lPSigs.append(lSig[4])
            lFSigs.append(lSig[5])
    return (lPSigs, lFSigs)
Пример #3
0
	def exportAsHist(self):
		## exports the event yields as a hist instance

		h = hist.hist(self.mypaf, self.name, clist.clist(1, 0, 1), [self.name, self.obj])
		h.build(self.sources, self.categs)

		for sidx in range(len(self.sources)):
			for cidx in range(len(self.categs)):
				h.setBinContent(sidx, cidx, 1, self.yields[sidx][cidx])
Пример #4
0
	def exportAsHist(self):
		## exports the event yields as a hist instance

		self.vb.call("evyield", "exportAsHist", [self], "Exporting the evyield as a hist.")
		h = hist.hist(self.mypaf, self.name, clist.clist(1, 0, 1), [self.name, "events"])
		h.build(self.sources, self.categs)

		for sidx in range(len(self.sources)):
			for cidx in range(len(self.categs)):
				h.setBinContent(sidx, cidx, 1, self.yields[sidx][cidx])
Пример #5
0
def proj(name, schemes, alist):

	dim = "x"
	if alist.has("dim"):
		dim = alist.get("dim")

	h1 = hist.hist(schemes[0].getHist().mypaf, name)
	h1.inject(schemes[0].getHist().getProj("x"))
	
	return h1
Пример #6
0
def choose():
    chooseInput = input(
        "Press:\n1 --- Exit the calculator\n2 --- Enter another math problem\n3 --- Check the history\n"
    )
    chooseInput = int(chooseInput)
    if chooseInput == 1:
        print("See you later!")
        print("Application closing in 3 seconds")
        time.sleep(1)
        print("Application closing in 2 seconds")
        time.sleep(1)
        print("Application closing in 1 second")
        time.sleep(1)
        sys.exit()
    elif chooseInput == 2:
        from problem import new
        new()
    elif chooseInput == 3:
        from hist import hist
        hist()
Пример #7
0
def sub(name, schemes, alist):

	schemes[0].mypaf.divideCanv(1, 1, False)

	## need to fix the coeffs
	if len(schemes) > 1:
		h1 = hist.hist(schemes[0].mypaf, name, schemes[0].getHist().getDim(), schemes[0].getHist().alist.argstring)
		h1.reinit(schemes[0].getHist())
		h1.injectHist(schemes[0].getHist())
		h1.setArgs(alist.argstring)
		for i in range(1,len(schemes)):
			h1.subHist(schemes[i].getHist())
	return h1
Пример #8
0
def filter(name, schemes, alist):

	if len(schemes) == 1 and alist.has("cut"):
		h1 = hist.hist(schemes[0].mypaf, name, schemes[0].getHist().binargs, "")
		h1.reinit(schemes[0].getHist())
		h1.injectHist(schemes[0].getHist())
		h1.setArgs(alist.argstring)
		for sidx in range(len(h1.schemes)):
			for cidx in range(len(h1.categs)):
				for i in range(1,h1.getH(sidx, cidx).GetNbins()):
					if not eval("h1.getH(sidx, cidx).GetBinContent(" + i + ") " + alist.get("cut")):
						h1.getH(sidx, cidx).SetBinContent(i, 0)
		return h1
Пример #9
0
def getSignals(iHP,iHF,iBin,iBase):
    lPSigs  = []
    lFSigs  = []
    lPHists = [] 
    lFHists = [] 
    lVars=[50,75,100,125,150,200,250,300]
    for i0 in range(0,len(lVars)):
        lPHists.append(iHP[i0+3])
        lFHists.append(iHF[i0+3])
    lPHist = hist(lVars,lPHists)
    lFHist = hist(lVars,lFHists)
    masses=[50,60,75,90,100,112,125,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290]
    for i0 in range(0,len(masses)):
        pHP   = lPHist.morph(masses[i0])
        pHF   = lFHist.morph(masses[i0])
        for i1 in range(0,len(lVars)):
            if lVars[i1] == masses[i0]:
                pHP=iHP[i1+3]
                pHF=iHF[i1+3]
        lSig = histFunc([pHP,pHF],iBase,"zqq"+str(masses[i0]),iBin)
        lPSigs.append(lSig[4])
        lFSigs.append(lSig[5])
    return (lPSigs,lFSigs)
Пример #10
0
	def exportAsHist(self, var = "pt"):

		self.close()

		alist = args.args("var=" + var)
		i = lib.findElm(self.vars, var)
		binargs, names = lib.prepareHistInfo(self.db, alist)
		h = hist.hist(self.mypaf, self.name, binargs, names)
		h.build(self.sources, self.categs)
		for sidx in range(len(self.sources)):
			for cidx in range(len(self.categs)):
				f     = open(self.paths[sidx][cidx], "r")
				lines = f.readlines()
				for entry in lines:
					h.fill(sidx, cidx, float(entry.split(":=")[i].strip()))
				f.close()
		return h
Пример #11
0
    img_matched = np.zeros((H, W)).astype(int)  # 0~255
    for i in range(H):
        for j in range(W):
            img_matched[i][j] = inv_correspond[img[i][j]]
    return img_matched


if __name__ == "__main__":
    import cv2
    from hist import hist, acc_hist
    import matplotlib.pyplot as plt

    img_gray = cv2.imread('./images/low_contrast.jpg', cv2.IMREAD_GRAYSCALE)
    H, W = img_gray.shape[0], img_gray.shape[1]

    hist = hist(img_gray)
    # plt.bar([i for i in range(256)], hist)
    # plt.show()
    # plt.savefig('./output_images/chap3_3_hist.jpg')
    # plt.close()
    hist_acc = acc_hist(hist)
    # plt.bar([i for i in range(256)], hist_acc)
    # plt.show()
    # plt.savefig('./output_images/chap3_3_acc_hist.jpg')
    # plt.close()
    pdf_acc = list(map(lambda x: x / (H * W), hist_acc))
    pdf_acc_reference = [(i + 1) / 256 for i in range(256)]
    correspond = match_hist(
        pdf_acc, pdf_acc_reference)  # use pdf_acc -> to fit pdf_acc_reference
    pdf_acc_match = list(map(lambda ind: pdf_acc[ind],
                             correspond))  # pdf_acc[correspond]
Пример #12
0
 bbj = s1.Get("no_cut_entries")
 generatedEvents1 = bbj.GetBinContent(1)        
 tree1 = s1.Get("MyTree2") 
 writeplot(tree1, temp1,  VAR, sigregcut, "trigWeight*puWeight*SF")
 s2= TFile("files_7_5_2016/presel_silver_noSyst_grav%s.root"%(sim_masses[m_index+1]))
 bbj = s2.Get("no_cut_entries")
 generatedEvents2 = bbj.GetBinContent(1)        
 tree2 = s2.Get("MyTree2") 
 writeplot(tree2, temp2,  VAR, sigregcut, "trigWeight*puWeight*SF")
 temp1.Scale(lumi*SF_tau21*SF_tau21*0.01/generatedEvents1)
 temp2.Scale(lumi*SF_tau21*SF_tau21*0.01/generatedEvents2)
 
 #init interpolator
 vals=[sim_masses[m_index+1], sim_masses[m_index]]
 hists=[temp1, temp2]
 interpolation=hist.hist(vals, hists)
 
 #histo
 Signal_mX=interpolation.morph(m)
 Signal_mX=Signal_mX.Rebin(len(binBoundaries)-1, "Signal_mX_%s"%(m),  array('d',binBoundaries))
 Signal_mX.SetTitle(Signal_mX.GetName())
 Signal_mX.Scale(f1(float(m))/Signal_mX.Integral())
 #repeat
 writeplot(tree1, temp1, VAR, sigregcut, "trigWeight*puWeightUp*SF")
 writeplot(tree2, temp2, VAR, sigregcut, "trigWeight*puWeightUp*SF")
 temp1.Scale(lumi*SF_tau21*SF_tau21*0.01/generatedEvents1)
 temp2.Scale(lumi*SF_tau21*SF_tau21*0.01/generatedEvents2)
 vals=[sim_masses[m_index+1], sim_masses[m_index]]
 hists=[temp1, temp2]
 interpolation=hist.hist(vals, hists)
 Signal_mX_pu_up=interpolation.morph(m)
Пример #13
0
month = 4
for day in range(1, 27):
    filename = "posts-2018%02d%02d.shelf" % (month, day)
    print("Processing %s" % (filename))
    s = shelve.open(filename)
    posts.extend(s['posts'])
    s.close()

hf19_author_curator_shares = []
hf20_author_curator_shares = []
hf20v2_author_curator_shares = []
hf20v3_author_curator_shares = []
total_payouts = []
hf19_voting_times = []
hf19_rshares_age_hist = hist()

mismatches = 0
postcount = 0
hf19_total_author_tokens = 0
hf19_total_curator_tokens = 0
hf20_total_author_tokens = 0
hf20_total_curator_tokens = 0
hf20v2_total_author_tokens = 0
hf20v2_total_curator_tokens = 0
hf20v2_total_unclaimed_tokens = 0
hf20v3_total_author_tokens = 0
hf20v3_total_curator_tokens = 0

calc_ref_diffs = []
Пример #14
0
#plot the loss history
fig = plt.figure()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('MSE Loss')
plt.legend(['Train Loss', 'Validation Loss'], loc='upper right')
fig.savefig('loss_plot.png')
plt.show()

#test the network
output = model.predict(test_input)

#plot an histogram of the output
hist(output, 'Pt', title='Network output')

#save labels to csv files
root = 'ne_' + str(n_epochs) + '_lr_' + str(learning_rate) + '_nl_' + str(
    no_layers) + '_' + str(layer1) + '_' + str(layer2) + '_' + str(
        layer3) + '_'

# serialize model to JSON
model_json = model.to_json()
with open(root + "model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights(root + "model.h5")
print("Saved model to disk")

fname_train_lss = root + 'train_loss.csv'
Пример #15
0
        if file.startswith("stats-") and file.endswith(".shelf"):
            inputfiles.append(file)

if len(sys.argv) > 1:
    inputfiles = sys.argv[1:]

total_votes = {}
dust_votes = {}
zero_votes = {}
vote_rshares_hf19 = []
vote_rshares_hf20 = []
vote_pct_hf19 = []
vote_pct_hf20 = []
zero_vote_pcts = []

zero_voters = hist()
zero_votees = hist()

for file in inputfiles:
    print("Processing %s" % (file))
    s = shelve.open(file)
    rshares_by_day = s['rshares']
    percent_by_day = s['percentages']
    zero_voters_by_day = s['zero_voters']
    zero_votees_by_day = s['zero_votees']
    s.close()

    for day in rshares_by_day:
        if day not in total_votes:
            total_votes[day] = 0
        if day not in dust_votes:
Пример #16
0
    def morphSignal(self,
                    newname,
                    mass,
                    mass_shift,
                    mass_shift_unc,
                    mass_res,
                    mass_res_unc,
                    inputtedPeakCentral=None):

        DDTCUT = float(options.DDTcut)

        if not self._isMorphed:

            # -------------------------------------------------------------------------------------
            # make matched and unmatched shapes!
            self.h_jetmsd_passcut_unmatched = ROOT.TH1F(
                "h_jetmsd_passcut_unmatched" + self._name,
                "; soft drop mass (GeV);", self._nmassbins, 30, 330)
            self.h_jetmsd_passcut_matched = ROOT.TH1F(
                "h_jetmsd_passcut_matched" + self._name,
                "; soft drop mass (GeV);", self._nmassbins, 30, 330)
            # looping
            nent = self._tt.GetEntries()
            for i in range(self._tt.GetEntries()):

                # preamble
                self._tt.GetEntry(i)
                if (i % (1 * nent / 100) == 0):
                    sys.stdout.write("\r[" + "=" * int(20 * i / nent) + " " +
                                     str(round(100. * i / nent, 0)) + "% done")
                    sys.stdout.flush()

                jpt = getattr(self._tt, "bst8_PUPPIjet" + self._jetNum + "_pt")
                jmsd = getattr(self._tt,
                               "bst8_PUPPIjet" + self._jetNum + "_msd")
                if jmsd == 0.: jmsd = 0.01
                weight = self._scaleFactor * self._lumi * getattr(
                    self._tt, "scale1fb") * getattr(
                        self._tt, "kfactor") * getattr(self._tt, "puWeight")
                if self._isData: weight = 1

                if jpt < 500: continue
                jt21 = getattr(self._tt,
                               "bst8_PUPPIjet" + self._jetNum + "_tau21")
                rhP = math.log(jmsd * jmsd / jpt)
                jt21P = jt21 + 0.063 * rhP

                jphi = getattr(self._tt,
                               "bst8_PUPPIjet" + self._jetNum + "_phi")
                dphi = math.fabs(self._tt.genVPhi - jphi)
                dpt = math.fabs(self._tt.genVPt - jpt) / self._tt.genVPt
                dmass = math.fabs(mass - jmsd) / mass

                # print dphi,dpt
                if rhP > 0 and jt21P < DDTCUT and dphi < 0.8 and dpt < 0.5 and dmass < 0.35:
                    self.h_jetmsd_passcut_matched.Fill(jmsd, weight)
                elif rhP > 0 and jt21P < DDTCUT and (dphi > 0.8 or dpt > 0.5
                                                     or dmass > 0.35):
                    self.h_jetmsd_passcut_unmatched.Fill(jmsd, weight)
                else:
                    continue

            print "\n"
            # -------------------------------------------------------------------------------------

            # setattr(self,newname, getattr(self,"h_jetmsd_passcut").Clone());
            setattr(self, newname,
                    getattr(self, "h_jetmsd_passcut_matched").Clone())
            hist_container = hist([mass], [getattr(self, newname)])

        else:
            setattr(self, newname, inputtedPeakCentral)

        hist_container = hist([mass], [getattr(self, newname)])

        # get new central value
        shift_val = mass - mass * mass_shift
        tmp_shifted_h = hist_container.shift(getattr(self, newname), shift_val)
        # get new central value and new smeared value
        smear_val = mass_res - 1

        tmp_smeared_h = hist_container.smear(tmp_shifted_h[0], smear_val)
        if smear_val <= 0:
            setattr(self, newname + "_central", tmp_smeared_h[1])
        else:
            setattr(self, newname + "_central", tmp_smeared_h[0])

        # get shift up/down
        shift_unc = mass * mass_shift * mass_shift_unc
        hsys_shift = hist_container.shift(getattr(self, newname + "_central"),
                                          shift_unc)
        # get res up/down
        hsys_smear = hist_container.smear(getattr(self, newname + "_central"),
                                          mass_res_unc)

        # print shift_val, smear_val, shift_unc, mass_res_unc
        # print getattr(self,"h_jetmsd_passcut_unmatched").GetNbinsX(), hsys_shift[0].GetNbinsX(),hsys_shift[0].GetXaxis().GetBinCenter(1),hsys_shift[0].GetXaxis().GetBinCenter(60);

        setattr(self, newname + "_matched", getattr(self, newname).Clone())
        setattr(self, newname + "_shiftUp_matched", hsys_shift[0].Clone())
        setattr(self, newname + "_shiftDn_matched", hsys_shift[1].Clone())
        setattr(self, newname + "_smearUp_matched", hsys_smear[0].Clone())
        setattr(self, newname + "_smearDn_matched", hsys_smear[1].Clone())

        if not self._isMorphed:
            getattr(self,
                    newname).Add(getattr(self, "h_jetmsd_passcut_unmatched"))
            hsys_shift[0].Add(getattr(self, "h_jetmsd_passcut_unmatched"))
            hsys_shift[1].Add(getattr(self, "h_jetmsd_passcut_unmatched"))
            hsys_smear[0].Add(getattr(self, "h_jetmsd_passcut_unmatched"))
            hsys_smear[1].Add(getattr(self, "h_jetmsd_passcut_unmatched"))
        setattr(self, newname + "_shiftUp", hsys_shift[0])
        setattr(self, newname + "_shiftDn", hsys_shift[1])
        setattr(self, newname + "_smearUp", hsys_smear[0])
        setattr(self, newname + "_smearDn", hsys_smear[1])
        generatedEvents1 = bbj.GetBinContent(1)        
        tree1 = s1.Get("MyTree2") 
        writeplot(tree1, temp1,  VAR, sigregcut, "trigWeight*puWeight*SF")
        s2= TFile("files_7_5_2016/presel_silver_noSyst_grav%s.root"%(sim_masses[m_index+1]))
        bbj = s2.Get("no_cut_entries")
        generatedEvents2 = bbj.GetBinContent(1)        
        tree2 = s2.Get("MyTree2") 
        writeplot(tree2, temp2,  VAR, sigregcut, "trigWeight*puWeight*SF")
        temp1.Scale(lumi*SF_tau21*SF_tau21*0.01/generatedEvents1)
        temp2.Scale(lumi*SF_tau21*SF_tau21*0.01/generatedEvents2)
        print temp2.Integral(), temp1.Integral()
        
        #init interpolator
        vals=[sim_masses[m_index+1], sim_masses[m_index]]
        hists=[temp1, temp2]
        interpolation=hist.hist(vals, hists)
        
        #histo
        Signal_mX=interpolation.morph(m)
        print Signal_mX.GetBinCenter(Signal_mX.GetMaximumBin())
        
        Signal_mX.SetTitle(Signal_mX.GetName())
        s_int=Signal_mX.Integral()
        
        s_scale=(temp2.Integral()-temp1.Integral())*(m-sim_masses[m_index])/(sim_masses[m_index+1]-sim_masses[m_index])+temp1.Integral()
        
        m2=sim_masses[m_index+1]+sim_masses[m_index]-m
        S2=interpolation.morph(m2)
#        s_scale=S2.Integral()
        Signal_mX.Scale(s_scale/s_int)
        Sig_fake=copy.copy(Signal_mX)
Пример #18
0
#get reco pt hist

from hist import hist
from read_h5 import read_input, save_jets

#get recopt
dset = read_input('input.h5')
RecoPt = save_jets(dset, 0, dset.shape[0])

hist(RecoPt, 'recoPt', title='Reco Pt')
Пример #19
0
inpu1 = np.concatenate((recopt2, recoeta2, recophi2, recom2), axis=1)
inpu1 = normalize(inpu1, axis=0)

inpu2 = np.concatenate((lep1pt2, lep1eta2, lep1phi2, lep1m2), axis=1)
inpu2 = normalize(inpu2, axis=0)

inpu3 = np.concatenate((lep2pt2, lep2eta2, lep2phi2, lep2m2), axis=1)
inpu3 = normalize(inpu3, axis=0)

args1 = (inp1, inp2, lep1ele1, lep1mu1, lep1pr1, inp3, lep2ele1, lep2mu1,
         lep2pr1)
args2 = (inpu1, inpu2, lep1ele2, lep2mu2, lep1pr2, inpu3, lep2ele2, lep2mu2,
         lep2pr2)

in1 = np.concatenate(args1, axis=1)
in2 = np.concatenate(args2, axis=1)

print(in1[:20])

#use model to predict output pt
out1 = model.predict(in1)
out2 = model.predict(in2)

print(out1[:20])

#write the output data back into the ROOT file
fout = np.savetxt("BQuark1JetOutPt.csv", out1, delimiter=',')
fout = np.savetxt("BQuark2JetOutPt.csv", out2, delimiter=',')

hist(out1, 'pt')
Пример #20
0
def clean_base_age(base,
                   y_col,
                   X_col,
                   model,
                   params,
                   random_state=1234,
                   train_size=0.8,
                   test_size=0.2,
                   random_split=True,
                   scoring='neg_mean_squared_error',
                   refit=True,
                   verbose=3,
                   cv=10,
                   n_jobs=-1):
    """

    Clean Base Age Documentation

    Function Overview
    
    This function cleans and processes the missing age values.
    
    Defaults
    
    clean_base_age(base)
    
    Parameters
    
    base - DataFrame, the base data to fill age column
    
    Returns
    
    base - DataFrame, the base data with filled age column
    
    Example

    clean_base_age(base = base)

    """

    # split the training data on whether age is missing or not
    base_train = base[base[y_col[0]].notnull()]
    base_test = base[base[y_col[0]].isnull()]

    # plot age distribution before imputing
    hist(dataset=base,
         num_var=y_col,
         title='Histogram of {} - Pre Imputation'.format(y_col[0]))

    # run age na fill model
    base_out = fit_age_mod(base_train=base_train,
                           base_test=base_test,
                           y_col=y_col,
                           X_col=X_col,
                           model=model,
                           params=params,
                           random_state=random_state,
                           train_size=train_size,
                           test_size=test_size,
                           random_split=random_split,
                           scoring=scoring,
                           refit=refit,
                           cv=cv,
                           n_jobs=n_jobs,
                           verbose=verbose)

    # plot age distribution after imputing
    hist(dataset=base_out,
         num_var=y_col,
         title='Histogram of {} - Post Imputation'.format(y_col[0]))

    return base_out
Пример #21
0
import pandas as pd
from pandas import DataFrame
from pandas import Series
import numpy as np
import matplotlib.pyplot as plt
from hist import hist

df = DataFrame
sr = Series

ifn = "svc.preprocessed"
data = pd.read_csv(ifn, sep="\s+")
for i in data.columns:
    ofn = i + ".png"
    hist(np.array(data[i]), ofn=ofn)
Пример #22
0
            ctype = meta['tags'][1]
        elif len(meta['tags']) > 2 and \
           meta['tags'][2] in contribution_types:
            ctype = meta['tags'][2]
    # if repo and not ctype:
    #     print("***", meta['tags'], repo)
    if ctype and 'tutorial' in ctype:
        ctype = "tutorials"
    if ctype and 'bug' in ctype:
        ctype = "bug-hunting"
    return {'repo': repo, 'type': ctype}


repos = 0
failed = 0
repo_stats = hist()
report_type_stats = {}
contribs_per_repo = {}
repo_set = set()
dev_repo_set = set()
bug_repo_set = set()

first_contribs = hist()
first_dev_contribs = hist()
first_bug_contribs = hist()
num_contribs = hist()
start_date = graphics_tr
while start_date < date(2018, 6, 1):
    first_contribs.fill(start_date, 0)
    num_contribs.fill(start_date, 0)
    first_dev_contribs.fill(start_date, 0)
Пример #23
0
    lH0 = makehist('z50', dir + 'ZPrimeToQQ_50GeV_v4_mc.root')
    lH1 = makehist('z100', dir + 'ZPrimeToQQ_100GeV_v4_mc.root')
    lH2 = makehist('z150', dir + 'ZPrimeToQQ_150GeV_v4_mc.root')
    lH3 = makehist('z200', dir + 'ZPrimeToQQ_200GeV_v4_mc.root')
    lH4 = makehist('z250', dir + 'ZPrimeToQQ_250GeV_v4_mc.root')
    lH5 = makehist('z300', dir + 'ZPrimeToQQ_300GeV_v4_mc.root')
    lH = [lH0, lH1, lH2, lH3, lH4, lH5]
    lVar = [50, 100, 150, 200, 250, 300]
    return (lVar, lH)


if __name__ == "__main__":
    options = parser()
    #print options
    lVars, lHists = load()
    lHist = hist(lVars, lHists)
    if options.morph:
        lMorph = lHist.morph(150)
        lMorphA = [lHists[1], lMorph, lHists[2]]
        draw("morph", lMorphA)

    if options.shift:
        lShifts = lHist.shift(lHists[2], 5.)
        lShiftA = [lHists[2], lShifts[0], lShifts[1]]
        draw("shift", lShiftA)

    if options.smear:
        lSmears = lHist.smear(lHists[2], 0.1)
        lSmearA = [lHists[2], lSmears[0], lSmears[1]]
        lHists[2].Fit("gaus")
        lSmears[0].Fit("gaus")
Пример #24
0
from beem.account import Account, Accounts
from beem.utils import parse_time, addTzInfo
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import sys
import shelve
from datetime import datetime

created = []
reputation = []
effective_sp = []
own_sp = []
posts = []
creator = hist()
posting_rewards = []
curation_rewards = []

sp_limit = 50
post_limit = 200
created_limit = addTzInfo(datetime(2018, 1, 1))
reward_limit = 2

s = shelve.open("accounts.shelf")
accounts = s['accounts']
s.close()

print(len(accounts))

for a in accounts:
Пример #25
0
s.close()

dolphin = 10e6
orca = 100e6
whale = 1000e6

accs_with_votes = 0
accs_with_proxy = 0
accs_without = 0

gests_with_votes = 0
gests_with_proxy = 0
gests_without = 0

no_vote_gests_hist = []
top_gests_holders = hist()
top_gests_holders_novote = hist()

minnow_gests = 0
dolphin_gests = 0
orca_gests = 0
whale_gests = 0

minnow_accs = 0
dolphin_accs = 0
orca_accs = 0
whale_accs = 0

excludes = ['golosio', 'golos', 'steemit',
            'ned']  #, 'ned', 'dan', 'smooth', 'val-a', 'dantheman']
Пример #26
0
def fit_sur_mod(base_train,
                base_test,
                y_col,
                X_col,
                model_name,
                model,
                params,
                random_state=123,
                train_size=0.8,
                test_size=0.2,
                random_split=True,
                scoring='accuracy',
                cv=10,
                n_jobs=-1,
                refit=True,
                verbose=0):
    """
    
    Fit Survival Model Documentation
    
    Function Overview
    
    This function fits an sklearn model for the Titanic compeition.
    The process includes splitting the training data into training and validation (holdout) sets.
    SMOTE is applied to syntetically up sample the minor class of survived.
    Grid search cross validation is then applied to find the optimal parameters for the model.
    Once the optimal model is found, the model is validated using the validation (holdout) set.
    Learning curves, performance metrics and ROC curves are all use to evaluate the final model.
    The final model is then refitted to the entire training set and predictions are made for the test set.
    The final model and its predictions are saved for reproduceability and ensemble modelling.
    
    Defaults
    
    fit_sur_mod(base_train,
                base_test,
                y_col,
                X_col,
                model_name,
                model,
                params,
                random_state = 123,
                train_size = 0.8,
                test_size = 0.2,
                random_split = True,
                scoring = 'neg_mean_squared_error',
                cv = 10,
                n_jobs = -1,
                refit = True,
                verbose = 0
                )
    
    Parameters
    
    base_train - DataFrame, the base training data
    base_test - DataFrame, the base testing data
    y_col - List of Strings, the target y column
    X_col - List of Strings, the predictor X columns
    model_name - String, the name of the model to run, see model parameters in cons.py
    model - Sklearn Model, the model to fit, see model definition in cons.py
    params - Dictionary, the gbm model parameters to tune
    random_state - Integer, the random seed to set, default is 123
    train_size - Float, the proportion of data to have in training set, default is 0.8
    test_size - Float, the proportion of data to have in the testing set, default is 0.2
    random_split - Boolean, whether to randomise the data before splitting, default is True
    scoring - String, the type of scoring to perform on gbm model, default is 'accuracy'
    cv - Integer, the number of folds to use for cross fold validation when training the model, default is 10
    n_jobs - Integer, the number of cores to use when processing data, default is -1 for all cores
    refit - Boolean, whether to refit the best model following grid search cross validation hypter parameter tuning, default is True
    verbose - Integer, whether to print verbose updates when tuning model, default is 0
    
    Returns
    
    base - DataFrame, the base data with filled age column
    
    Example
 
    fit_sur_mod(base_train = train,
                base_test = test,
                y_col = ['Survived'],
                X_col = ['Pclass', 'SibSp', 'Parch', 'FamSize', 'Fare', 'Alone', 'Mr', 'Mrs', 'Ms', 'Priv', 'male', 'Embarked'],
                params = cons.test_age_gbm_params,
                random_state = 123,
                train_size = 0.8,
                test_size = 0.2,
                random_split = True,
                scoring = 'accuracy',
                cv = 10,
                n_jobs = -1,
                refit = True,
                verbose = 0
                )
    
    """

    # extract out target
    tar_col = y_col[0]

    # create predicted column name
    pred_col = '{}_pred'.format(tar_col)

    # create count plot of classifications
    hist(dataset=base_train,
         num_var=[tar_col],
         output_dir=cons.model_results_dir.format(model_name),
         output_fname=cons.hist_train_tar_fname.format(model_name))

    print('splitting data into training and validation sets ...')

    # split the training data
    X_train, X_valid, y_train, y_valid = train_test_split(
        base_train[X_col],
        base_train[y_col],
        train_size=train_size,
        test_size=test_size,
        shuffle=random_split,
        random_state=cons.random_state)

    print('running hyperparameter tuning ...')

    # create define a smote object
    smote = SMOTE(random_state=cons.random_state)

    # create pipeline with smote and model
    imba_pipeline = make_pipeline(smote, model)

    # extract out the full model name from the pipeline
    full_name = imba_pipeline.steps[1][0]

    # update the parameter dictionary with the full model name
    for key in list(params.keys()):
        params['{}__{}'.format(full_name, key)] = params.pop(key)

    # create grid search cross validation object
    mod_tuning = GridSearchCV(estimator=imba_pipeline,
                              param_grid=params,
                              cv=cv,
                              scoring=scoring,
                              n_jobs=n_jobs,
                              refit=refit,
                              verbose=verbose)

    # tune model
    mod_tuning.fit(X_train, y_train[y_col[0]])

    # extract out the model of best fit
    best_estimator = mod_tuning.best_estimator_
    best_model = best_estimator.named_steps[full_name]
    best_params = mod_tuning.best_params_
    best_score = mod_tuning.best_score_

    # print best parameters and best score
    print(best_params)
    print(best_score)

    # create learning curve
    learning_curve(model=best_model,
                   X_train=X_train,
                   y_train=y_train,
                   scoring='accuracy',
                   title='Learning Curve: {}'.format(model_name.upper()),
                   output_dir=cons.model_results_dir.format(model_name),
                   output_fname=cons.learning_curve_fnamt.format(model_name))

    # if the model is tree based
    if model_name in ['rfc', 'abc', 'etc', 'gbc']:

        # plot feature importance
        feat_imp(name=model_name,
                 model=best_model,
                 X_train=X_train,
                 output_dir=cons.model_results_dir.format(model_name),
                 output_fname=cons.feat_imp.format(model_name))

    print('predicting for validation set ...')

    # classify the validation set
    y_valid[pred_col] = best_model.predict(X_valid)

    print('evaluating validation predictions ...')

    # create count plot of classifications
    hist(dataset=y_valid,
         num_var=[pred_col],
         output_dir=cons.model_results_dir.format(model_name),
         output_fname=cons.hist_valid_preds_fname.format(model_name))

    # genrate the regression metrics
    val_metrics = perf_metrics(
        y_obs=y_valid[tar_col],
        y_pred=y_valid[pred_col],
        target_type='class',
        output_dir=cons.model_results_dir.format(model_name),
        output_fname=cons.metrics_fname.format(model_name))

    # print the validation metrics
    print(val_metrics)

    # create a ROC curve
    roc_curve(obs=tar_col,
              preds=pred_col,
              dataset=y_valid,
              output_dir=cons.model_results_dir.format(model_name),
              output_fname=cons.roc_fname.format(model_name))

    print('refitting to all training data ...')

    final_model = make_pipeline(smote, best_model)

    # refit model to all training data
    final_model.fit(base_train[X_col], base_train[y_col[0]])

    # pickle the best model
    joblib.dump(final_model, cons.best_model_fpath.format(model_name))

    # predict for the base_test set
    base_test[tar_col] = best_model.predict(base_test[X_col])

    # create count plot of classifications
    hist(dataset=base_test,
         num_var=[tar_col],
         output_dir=cons.model_results_dir.format(model_name),
         output_fname=cons.hist_test_preds_fname.format(model_name))

    return base_test
Пример #27
0
def execute(algs):
    # fetch shape of images
    img = images.fetch_disp("bm", 1, __library__)
    sh = (int(np.shape(img)[0]), int(np.shape(img)[1]))
    print sh
    shape = (1080, 1920)

    # check if ground is available
    img = images.fetch_ground(1, __library__)
    ground_avail = img is not None

    if ground_avail:
        # shape = list(np.array(shape) * 2)
        shape = (shape[0], shape[1])
    else:
        shape = (shape[0], shape[1] * 2)

    # for last in np.concatenate((np.array(range(100, __end__, 100)), [__end__])):

    
    print "starting..."
    writer = dict()
    overalls = dict()
    completeness = dict()
    ymax = dict()
    
    for alg in algs:
        bins = range(-__bin__[alg],__bin__[alg])
    
        writer[alg] = cv2.VideoWriter()
        overalls[alg] = np.zeros((__bin__[alg]*2-1,), dtype='float32')
        completeness[alg] = 0.
        ymax[alg] = -1
        if __analysis__:
            shape = shape[1], shape[0]
            fn = __library__ + "/video/analysis %s.avi" % alg
            if True:#not os.path.exists(fn):
                writer[alg].open(fn, cv2.cv.CV_FOURCC('I', 'Y', 'U', 'V'), __fps__[__library__], shape)
            else:
                writer[alg] = None
        else:
            writer[alg].open(__library__ + "/video/output %s.avi" % alg, cv2.cv.CV_FOURCC('I', 'Y', 'U', 'V'), __fps__[__library__], shape)

    global __begin__
    global __end__

    tm = timer(__begin__, __end__)
    mx = 0

    for i in range(__begin__, __end__+1):
        left = images.fetch_orig(i, __library__)
        ground = images.fetch_ground(i, __library__)
        #if ground is not None:
        #    top = images.side_by_side(orig, ground)
        #else:
        #    top = orig
        for alg in algs:
            disp = images.fetch_disp(alg, i, __library__)
            bins = range(-__bin__[alg],__bin__[alg])
            
            #if writer[alg] is None:
            #    continue
        
            if disp is not None and ground_avail:
                if len(disp.shape) == 3:
                    disp = disp[:,:,0]
                if len(ground.shape) == 3:
                    ground = ground[:,:,0]
                diff = ((ground * 1.) - (disp * 1.))

                # exclude not calculated
                # diff[disp == 0]     = 0
                # diff[ground == 0]   = 0
                # exclude incalculable values
                diff_flat = diff[np.where(ground < __bin__[alg])]
                disp_flat = disp[np.where(ground < __bin__[alg])]
                flat_ground = ground[np.where(ground < __bin__[alg])]

                diff_flat = diff_flat[np.where(disp_flat != 0)]
                flat_ground = flat_ground[np.where(disp_flat != 0)]
                diff_flat = diff_flat[np.where(flat_ground != 0)]
                
                top = images.side_by_side(left, np.abs(disp).astype('uint8'))
                
                if __analysis__:
                    flat = disp.flatten()
                    
                    p = np.size(np.nonzero(flat)) / float(np.size(flat)) * 100.
                    pp.figure(figsize=(sh[1]/80, sh[0]/80), dpi=80)
                    pp.title('Histogram of errors (%d%% completeness)' % (p))
                    pp.xlabel('Error')
                    pp.ylabel('Norm occurances')
                    # diff_flat = diff.flatten()
                    # count, bins = np.histogram(diff, bins=bins, normed=True)
                    # n, bins, patches = pp.hist(diff, bins=range(-96, 96, 10), normed=True)

                    neg_c, neg_b, pos_c, pos_b = hist(diff_flat.astype('int32'))
                    bns = np.concatenate((neg_b[::-1], pos_b))
                    cnt = np.concatenate((neg_c[::-1], pos_c))
                    cnt = (cnt / float(np.sum(cnt))).astype('float32')
                    
                    count = np.zeros(np.array(bins[:-1]).shape, dtype='float32')
                    offset = bns[0] - bins[0]
                    try:
                        if offset < 0:
                            cnt = cnt[-offset:]
                            bns = bns[-offset:]
                            count[:len(cnt)] = cnt
                        else:
                            if (len(bins) - len(bns) - offset -1) > 0:
                                count[offset:-(len(bins) - len(bns) - offset - 1)] = cnt
                            else:
                                count[offset:] = cnt
                        
                    except ValueError:
                        print 'alg', alg
                        print 'frame', i
                        print 'Skip frame'
                        print bins
                        print bns
                        raw_input()
                    

                    if ymax[alg] == -1: ymax[alg] = np.max(count) * 1.75

                    if np.average(count) == 0.0:
                        print "no count"
                        print cnt
                        print count
                        print bns
                        print bins
                        print offset
                        # raw_input()
                    # count[np.where(count == neg_b)] = neg_c
                    # count[np.where(count == pos_c)] = pos_c
                    pp.bar(bins[:-1], count, width=1.0)

                    # overalls[alg] = np.concatenate((overalls[alg], diff_flat))
                    overalls[alg] = (i * overalls[alg] + count) / (i+1)

                    pp.xlim([-__bin__[alg], __bin__[alg]])
                    pp.ylim([0, ymax[alg]])
                    pp.gray()
                    pp.savefig('tmp/tmp.png', bbox_inches=0)
                    pp.cla()
                    pp.close()
                    diff_img = cv2.imread('tmp/tmp.png')
                    diff_img = images.resize(diff_img, np.shape(diff))

                    completeness[alg] = (i * completeness[alg] + p) / (i+1)
                    
                    pp.figure(figsize=(sh[1]/80, sh[0]/80), dpi=80)
                    pp.title('Overall histogram of errors (%d%% completeness)' % int(completeness[alg]))
                    pp.xlabel('Error')
                    pp.ylabel('Norm occurances')
                    
                    #neg_c, neg_b, pos_c, pos_b = hist(overalls[alg].astype('int32'))
                    #bins = np.concatenate((neg_b, pos_b))
                    #count = np.concatenate((neg_c, pos_c))
                    pp.bar(bins[:-1], overalls[alg], width=1.0)
                    # n, bins, patches = pp.hist(images.remove_zero(overalls[alg]), bins=range(-96, 96, 10), normed=True)
                    
                    pp.xlim([-__bin__[alg], __bin__[alg]])
                    pp.ylim([0, ymax[alg]])
                    pp.gray()
                    pp.savefig('tmp/tmp.png', bbox_inches=0)
                    pp.cla()
                    pp.close()
                    overall_diff_img = cv2.imread('tmp/tmp.png')
                    overall_diff_img = images.resize(overall_diff_img, np.shape(diff))
                    
                    bottom = images.side_by_side(diff_img, overall_diff_img)
                else:
                    bottom = images.side_by_side(disp, diff)

            else:
                bottom = disp

            
            '''
            top = images.side_by_side(left, disp)
        
            diff = np.array(np.abs(((ground * 1.) - (disp * 1.))), dtype=ground.dtype)

            # exclude not calculated
            diff[disp == 0] = 0
            diff[ground == 0] = 0
            # exclude incalculable values
            # diff[ground > 16*6] = 0

            bottom = images.side_by_side(ground, diff)
            '''
            
            output = images.top_and_bottom(top, bottom)
            newi = images.resize(output, (1080, 1920))
            writer[alg].write(images.drawable(newi, inv=False)) 
        tm.progress(i)

    pp.close('all')
    print "done"
Пример #28
0
BACKGROUND_E = pygame.transform.scale(bg, (WIDTH, HEIGHT))
# Nome do jogo
pygame.display.set_caption(TITULO)

# Imprime instruções
print('*' * len(TITULO))
print(TITULO)
print('*' * len(TITULO))
print('Utilize as setas do teclado para andar e pular.')

# Comando para evitar travamentos.
try:
    while state != QUIT:
        state = intro(screen)
        if state == GAME:
            state = hist()
            if state == GAME:
                state, init_time = instru()
                if state == GAME:
                    sucesso, bank = game_screen(screen)
                    if sucesso == 1:
                        sucesso, bank, midtime1 = game_screen2(screen, bank)
                        if sucesso == 1:
                            state = hist2()
                            if state == GAME:
                                state, midtime2 = instru2()
                                if state == GAME:
                                    screen_s = pygame.display.set_mode(
                                        (WIDTH_S, HEIGHT_S))
                                    sucesso, bank, total_time = game_screen3(
                                        screen_s, bank)
Пример #29
0
def execute(algs):
    # fetch shape of images
    img = images.fetch_disp("bm", 1, __library__)
    sh = (int(np.shape(img)[0]), int(np.shape(img)[1]))
    print sh
    shape = (1080, 1920)

    # check if ground is available
    img = images.fetch_ground(1, __library__)
    ground_avail = img is not None

    if ground_avail:
        # shape = list(np.array(shape) * 2)
        shape = (shape[0], shape[1])
    else:
        shape = (shape[0], shape[1] * 2)

    # for last in np.concatenate((np.array(range(100, __end__, 100)), [__end__])):

    print "starting..."
    writer = dict()
    overalls = dict()
    completeness = dict()
    ymax = dict()

    for alg in algs:
        bins = range(-__bin__[alg], __bin__[alg])

        writer[alg] = cv2.VideoWriter()
        overalls[alg] = np.zeros((__bin__[alg] * 2 - 1, ), dtype='float32')
        completeness[alg] = 0.
        ymax[alg] = -1
        if __analysis__:
            shape = shape[1], shape[0]
            fn = __library__ + "/video/analysis %s.avi" % alg
            if True:  #not os.path.exists(fn):
                writer[alg].open(fn, cv2.cv.CV_FOURCC('I', 'Y', 'U', 'V'),
                                 __fps__[__library__], shape)
            else:
                writer[alg] = None
        else:
            writer[alg].open(__library__ + "/video/output %s.avi" % alg,
                             cv2.cv.CV_FOURCC('I', 'Y', 'U', 'V'),
                             __fps__[__library__], shape)

    global __begin__
    global __end__

    tm = timer(__begin__, __end__)
    mx = 0

    for i in range(__begin__, __end__ + 1):
        left = images.fetch_orig(i, __library__)
        ground = images.fetch_ground(i, __library__)
        #if ground is not None:
        #    top = images.side_by_side(orig, ground)
        #else:
        #    top = orig
        for alg in algs:
            disp = images.fetch_disp(alg, i, __library__)
            bins = range(-__bin__[alg], __bin__[alg])

            #if writer[alg] is None:
            #    continue

            if disp is not None and ground_avail:
                if len(disp.shape) == 3:
                    disp = disp[:, :, 0]
                if len(ground.shape) == 3:
                    ground = ground[:, :, 0]
                diff = ((ground * 1.) - (disp * 1.))

                # exclude not calculated
                # diff[disp == 0]     = 0
                # diff[ground == 0]   = 0
                # exclude incalculable values
                diff_flat = diff[np.where(ground < __bin__[alg])]
                disp_flat = disp[np.where(ground < __bin__[alg])]
                flat_ground = ground[np.where(ground < __bin__[alg])]

                diff_flat = diff_flat[np.where(disp_flat != 0)]
                flat_ground = flat_ground[np.where(disp_flat != 0)]
                diff_flat = diff_flat[np.where(flat_ground != 0)]

                top = images.side_by_side(left, np.abs(disp).astype('uint8'))

                if __analysis__:
                    flat = disp.flatten()

                    p = np.size(np.nonzero(flat)) / float(np.size(flat)) * 100.
                    pp.figure(figsize=(sh[1] / 80, sh[0] / 80), dpi=80)
                    pp.title('Histogram of errors (%d%% completeness)' % (p))
                    pp.xlabel('Error')
                    pp.ylabel('Norm occurances')
                    # diff_flat = diff.flatten()
                    # count, bins = np.histogram(diff, bins=bins, normed=True)
                    # n, bins, patches = pp.hist(diff, bins=range(-96, 96, 10), normed=True)

                    neg_c, neg_b, pos_c, pos_b = hist(
                        diff_flat.astype('int32'))
                    bns = np.concatenate((neg_b[::-1], pos_b))
                    cnt = np.concatenate((neg_c[::-1], pos_c))
                    cnt = (cnt / float(np.sum(cnt))).astype('float32')

                    count = np.zeros(np.array(bins[:-1]).shape,
                                     dtype='float32')
                    offset = bns[0] - bins[0]
                    try:
                        if offset < 0:
                            cnt = cnt[-offset:]
                            bns = bns[-offset:]
                            count[:len(cnt)] = cnt
                        else:
                            if (len(bins) - len(bns) - offset - 1) > 0:
                                count[offset:-(len(bins) - len(bns) - offset -
                                               1)] = cnt
                            else:
                                count[offset:] = cnt

                    except ValueError:
                        print 'alg', alg
                        print 'frame', i
                        print 'Skip frame'
                        print bins
                        print bns
                        raw_input()

                    if ymax[alg] == -1: ymax[alg] = np.max(count) * 1.75

                    if np.average(count) == 0.0:
                        print "no count"
                        print cnt
                        print count
                        print bns
                        print bins
                        print offset
                        # raw_input()
                    # count[np.where(count == neg_b)] = neg_c
                    # count[np.where(count == pos_c)] = pos_c
                    pp.bar(bins[:-1], count, width=1.0)

                    # overalls[alg] = np.concatenate((overalls[alg], diff_flat))
                    overalls[alg] = (i * overalls[alg] + count) / (i + 1)

                    pp.xlim([-__bin__[alg], __bin__[alg]])
                    pp.ylim([0, ymax[alg]])
                    pp.gray()
                    pp.savefig('tmp/tmp.png', bbox_inches=0)
                    pp.cla()
                    pp.close()
                    diff_img = cv2.imread('tmp/tmp.png')
                    diff_img = images.resize(diff_img, np.shape(diff))

                    completeness[alg] = (i * completeness[alg] + p) / (i + 1)

                    pp.figure(figsize=(sh[1] / 80, sh[0] / 80), dpi=80)
                    pp.title(
                        'Overall histogram of errors (%d%% completeness)' %
                        int(completeness[alg]))
                    pp.xlabel('Error')
                    pp.ylabel('Norm occurances')

                    #neg_c, neg_b, pos_c, pos_b = hist(overalls[alg].astype('int32'))
                    #bins = np.concatenate((neg_b, pos_b))
                    #count = np.concatenate((neg_c, pos_c))
                    pp.bar(bins[:-1], overalls[alg], width=1.0)
                    # n, bins, patches = pp.hist(images.remove_zero(overalls[alg]), bins=range(-96, 96, 10), normed=True)

                    pp.xlim([-__bin__[alg], __bin__[alg]])
                    pp.ylim([0, ymax[alg]])
                    pp.gray()
                    pp.savefig('tmp/tmp.png', bbox_inches=0)
                    pp.cla()
                    pp.close()
                    overall_diff_img = cv2.imread('tmp/tmp.png')
                    overall_diff_img = images.resize(overall_diff_img,
                                                     np.shape(diff))

                    bottom = images.side_by_side(diff_img, overall_diff_img)
                else:
                    bottom = images.side_by_side(disp, diff)

            else:
                bottom = disp
            '''
            top = images.side_by_side(left, disp)
        
            diff = np.array(np.abs(((ground * 1.) - (disp * 1.))), dtype=ground.dtype)

            # exclude not calculated
            diff[disp == 0] = 0
            diff[ground == 0] = 0
            # exclude incalculable values
            # diff[ground > 16*6] = 0

            bottom = images.side_by_side(ground, diff)
            '''

            output = images.top_and_bottom(top, bottom)
            newi = images.resize(output, (1080, 1920))
            writer[alg].write(images.drawable(newi, inv=False))
        tm.progress(i)

    pp.close('all')
    print "done"
Пример #30
0
                                                      fop_start,
                                                      len(follows)))
    while f['timestamp'] - p['created'] < AUTO_FOLLOW_TIME:
        if p['created'] > f['timestamp']:
            fop_start = fop_idx
        elif f['following'] == p['author']:
            # print(p['created'], p['author'], f)
            posts[post_idx]['fops'].append(f)

        fop_idx += 1
        f = follows[fop_idx]

    last_created = p['created']
    post_idx += 1

total_fops_per_day = hist()
for fop in follows:
    total_fops_per_day.fill(fop['timestamp'].date())

auto_fops_per_day = []
post_dates = []
mean_fops = []
median_fops = []
max_fops = []
last_date = None
posts_per_day = hist()
foplist = []
authorset = set()
unique_authors_per_day = []
all_authors = set()
unique_authors = []
Пример #31
0
def comp(name, schemes, alist):
	## compares two or more histograms with each other

	## we need 1 plot pad + 1 ratio pad
	if alist.has("ratio") and alist.get("ratio") == "y":
		schemes[0].mypaf.divideCanv(1, 1, True)
	else:
		schemes[0].mypaf.divideCanv(1, 1, False)

	## what to compare? schemes (histograms between schemes), sources (inside every scheme), categs (inside every scheme)
	comp = lib.useVal("schemes", alist.get("comp"))

	## for the drawing stuff, legend, etc. we need an empty histogram
	## that we draw on top of everything 
	h1 = hist.hist(schemes[0].mypaf, name, schemes[0].getHist().getDim(), alist.argstring)
	h1.reinit(schemes[0].getHist())

	a = []
	for scheme in schemes:
		a.append(scheme.getHist().alist.argstring)
		#scheme.getHist().resetArgs(alist.argstring)
		#scheme.getHist().applyArgs()

	h = []
	l = []
	d = []

	al = [alist.get("name" + str(i+1)) for i in range(len(schemes))]

	## compare sources per scheme and categ
	if comp == "sources":
		for i, scheme in enumerate(schemes):
			h.append([])
			l.append([])
			d.append([])
			for cidx in range(len(scheme.getHist().categs)):
				h[i].append([scheme.getHist()                       for sidx in range(len(scheme.getHist().sources))])
				l[i].append([scheme.getHist().getLNames(sidx, cidx) for sidx in range(len(scheme.getHist().sources))])
				d[i].append([sidx, cidx]                                                                             )

	## compare categs per scheme and source
	elif comp == "categs":
		for i, scheme in enumerate(schemes):
			h.append([])
			l.append([])
			d.append([])
			for sidx in range(len(scheme.getHist().sources)):
				h[i].append([scheme.getHist()                       for cidx in range(len(scheme.getHist().categs))])
				l[i].append([scheme.getHist().categs[cidx]          for cidx in range(len(scheme.getHist().categs))])
				d[i].append([sidx, cidx]                                                                            )

	## compare schemes per source and categ
	else:
		for sidx in range(len(schemes[0].getHist().sources)):
			h.append([])
			l.append([])
			d.append([])
			for cidx in range(len(schemes[0].getHist().categs)):
				h[sidx].append([scheme.getHist()                    for scheme in schemes])
				l[sidx].append([al[i] if al[i] is not "" else scheme.name for i, scheme in enumerate(schemes)])
				d[sidx].append([sidx, cidx]                                               )

	
	## do the loop
	for i, hrow in enumerate(h):
		for j, hline in enumerate(hrow):

			## main plot
			schemes[0].mypaf.pads[0].cd()
			hratio = rstuff.copyTH1F(hline[0].getH(d[i][j][0], d[i][j][1]))

			for k in range(len(hline)):
				hline[k].alist.reinit(alist)

			# draw scheme 2
			sa = styleargs.styleargs(alist.get("style"), "2", lib.useVal("ROOT.kBlack", alist.get("color2")))
			sa.set("labelsizex", "0")
			hline[1].alist.setArgs(sa.alist.argstring)
			hline[1].drawSingle(d[i][j][0], d[i][j][1], 0, "", False, False)

			# draw schemes > 2
			for k in range(2, len(hline)):
				sa.reinit(alist.get("style"), str(k+1), lib.useVal("ROOT.kBlack", alist.get("color" + str(k+1))))
				sa.set("labelsizex", "0")
				hline[k].alist.setArgs(sa.alist.argstring)
				hline[k].drawSingle(d[i][j][0], d[i][j][1], 0, "same", False, False)

			# draw scheme 1
			sa.reinit(alist.get("style"), "1", lib.useVal("ROOT.kBlack", alist.get("color1")))
			sa.set("labelsizex", "0" )
			sa.set("draw1mode" , "pe")
			hline[0].alist.setArgs(sa.alist.argstring)
			hline[0].runPreDraw(0)
			hline[0].drawH(d[i][j][0], d[i][j][1], 0, "same", False, False)
			# do not drawSingle as it resets global style


			sc = [0 for h in hline]
			if hline[0].getH(d[i][j][0], d[i][j][1]).Integral() > 0:
				sc = [round(h.getH(d[i][j][0], d[i][j][1]).Integral() / hline[0].getH(d[i][j][0], d[i][j][1]).Integral() * 100,1) for h in hline]
			lnames = l[i][j]
			if alist.has("sce2"):
				lnames = [l[i][j][0]]
				for li, ll in enumerate(l[i][j][1:]):
					if h.alist.has("sce" + str(li+1)): 
						add = " (" + str(sc[li+1]) + "% +/- " + str(round(100 * float(h.alist.get("sce" + str(li+1))),1) * sc[li+1]) + "%)"
					else: 
						add = ""
					lnames.append(ll + add)
			leg = rstuff.legend([h.getH(d[i][j][0], d[i][j][1]) for h in hline], lnames, [h.d for h in hline])
			leg.Draw("same")
			
			# ratio plot
			if alist.has("ratio") and alist.get("ratio") == "y":
				schemes[0].mypaf.pads[1].cd()
				schemes[0].mypaf.pads[1].SetLogy(0)

				hratio.Divide(hline[1].getH(d[i][j][0], d[i][j][1]))
				hratio = rstuff.setRatioStyle(hratio, l[i][j][0], l[i][j][1], hline[0].labels[0], alist)
				hratio.SetStats(0)
				hratio.Draw("pe")

				line = rstuff.line()
				line.DrawLine(hratio.GetXaxis().GetXmin(), 1.00, hratio.GetXaxis().GetXmax(), 1.00)
				#fit  = rstuff.fit(hratio, "line")
				#fit  = rstuff.lineStyle(fit, 2, 1, ROOT.kRed+1)
				#fit.Draw("l")

			## draw style stuff
			schemes[0].mypaf.saveCanv(name + "_" + schemes[0].getHist().sources[d[i][j][0]].name + "_" + schemes[0].getHist().categs[d[i][j][1]])
			del hratio	
	
	for i, scheme in enumerate(schemes):
		scheme.getHist().alist.resetArgs(a[i])

	return h1
Пример #32
0
def create(options):
    # load histograms
    lFile  = r.TFile.Open(options.ifile);
    signal = "catp2"
    lHSig = lFile.Get(signal).Clone() # this is the matched signal
    lHSig.SetDirectory(0)
    lHOthers = []
    for key in lFile.GetListOfKeys():
        lh = key.ReadObj();
        if lh.GetName() == signal or lh.GetName() == "catp2_scaleUp" or lh.GetName() == "catp2_scaleDown":
            print "skipping ",lh.GetName()
            continue
        lh.SetDirectory(0)
        lHOthers.append(lh)
    for h in lHOthers:
        h.SetDirectory(0)
    lFile.Close()

    # hist container
    mass = 80.4
    hist_container = hist( [mass],[lHSig] );

    # shift (to measure mass scale)
    mass_shift = 0.995
    shift_val = mass - mass*mass_shift;
    tmp_shifted_h = hist_container.shift( lHSig, shift_val);

    # smear (to measure mass resolution)
    #res_shift = 1.01
    res_shift = 1.1
    smear_val = res_shift - 1.;
    tmp_smeared_h =  hist_container.smear( tmp_shifted_h[0] , smear_val)

    # update central
    hmatched_new_central = tmp_smeared_h[0];
    hmatched_new_central.SetName("catp2"); hmatched_new_central.SetTitle("catp2");
    
    # get shift up and down variations
    #mass_sigma = 1000 # 1/mass_sigma goes in the datacard
    #mass_shift_unc = 0.01 * mass_sigma;
    scale_opt = options.scale
    hmatchedsys_shift = hist_container.shift(hmatched_new_central, scale_opt)
    hmatchedsys_shift[0].SetName("catp2_scaleUp"); hmatchedsys_shift[0].SetTitle("catp2_scaleUp")
    hmatchedsys_shift[1].SetName("catp2_scaleDown");  hmatchedsys_shift[1].SetTitle("catp2_scaleDown");

    # get res up/down
    #res_sigma = 10 # 1/res_sigma goes in the datacard
    #res_shift_unc = 0.05 * res_sigma; 
    smear_opt = options.smear
    hmatchedsys_smear = hist_container.smear(hmatched_new_central, smear_opt)
    hmatchedsys_smear[0].SetName("catp2_smearUp"); hmatchedsys_smear[0].SetTitle("catp2_smearUp");
    hmatchedsys_smear[1].SetName("catp2_smearDown"); hmatchedsys_smear[1].SetTitle("catp2_smearDown");

    # clone and save up and down variations in .root file
    lOutFile  = r.TFile.Open(options.ifile.replace('_pre.root','.root'),"RECREATE");
    hmatched_new_central.Write(); 
    hmatchedsys_shift[0].Write();
    hmatchedsys_shift[1].Write();
    hmatchedsys_smear[0].Write();
    hmatchedsys_smear[1].Write();
    #lHSig.Write();
    for val in lHOthers:
        val.Write();
    lOutFile.ls()
    lOutFile.Close()
Пример #33
0
def fit_age_mod(base_train,
                base_test,
                y_col,
                X_col,
                model,
                params,
                random_state=123,
                train_size=0.8,
                test_size=0.2,
                random_split=True,
                scoring='neg_mean_squared_error',
                cv=10,
                n_jobs=-1,
                refit=True,
                verbose=0):
    """
    
    Fit Age Model Documentation
    
    Function Overview
    
    This function fits an age model for the Titanic compeition in order to impute missing values.
    The process includes splitting the training data into training and validation (holdout) sets.
    Grid search cross validation is then applied to find the optimal parameters for the model.
    Once the optimal model is found, the model is validated using the validation (holdout) set.
    Performance metrics and residual plots are all use to evaluate the final model.
    The final model is then refitted to the entire training set and predictions are made for the test set.
    
    Defaults
    
    fit_age_mod(base_train,
                base_test,
                y_col,
                X_col,
                model,
                params,
                random_state = 123,
                train_size = 0.8,
                test_size = 0.2,
                random_split = True,
                scoring = 'neg_mean_squared_error',
                cv = 10,
                n_jobs = -1,
                refit = True,
                verbose = 0
                )
    
    Parameters
    
    base_train - DataFrame, the base training data
    base_test - DataFrame, the base testing data
    y_col - List of Strings, the target y column
    X_col - List of Strings, the predictor X columns
    params - Dictionary, the gbm model parameters to tune
    random_state - Integer, the random seed to set, default is 123
    train_size - Float, the proportion of data to have in training set, default is 0.8
    test_size - Float, the proportion of data to have in the testing set, default is 0.2
    random_split - Boolean, whether to randomise the data before splitting, default is True
    scoring - String, the type of scoring to perform on gbm model, default is 'neg_mean_squared_error'
    cv - Integer, the number of folds to use for cross fold validation when training the model, default is 10
    n_jobs - Integer, the number of cores to use when processing data, default is -1 for all cores
    refit - Boolean, whether to refit the best model following grid search cross validation hypter parameter tuning, default is True
    verbose - Integer, whether to print verbose updates when tuning model, default is 0
    
    Returns
    
    base - DataFrame, the base data with filled age column
    
    Example
 
    fit_age_mod(base_train = train,
                base_test = test,
                y_col = ['Age'],
                X_col = ['Pclass', 'SibSp', 'Parch', 'FamSize', 'Fare', 'Alone', 'Mr', 'Mrs', 'Ms', 'Priv', 'male', 'Embarked'],
                params = cons.test_age_gbm_params,
                random_state = 123,
                train_size = 0.8,
                test_size = 0.2,
                random_split = True,
                scoring = 'neg_mean_squared_error',
                cv = 10,
                n_jobs = -1,
                refit = True,
                verbose = 0
                )
    
    """

    # extract out target
    tar_col = y_col[0]

    # create predicted column name
    pred_col = '{}_pred'.format(tar_col)

    # split the training data
    X_train, X_valid, y_train, y_valid = train_test_split(
        base_train[X_col],
        base_train[y_col],
        train_size=train_size,
        test_size=test_size,
        shuffle=random_split,
        random_state=cons.random_state)

    # create grid search cross validation object
    mod_tuning = GridSearchCV(estimator=model,
                              param_grid=params,
                              cv=cv,
                              scoring=scoring,
                              n_jobs=n_jobs,
                              refit=refit,
                              verbose=verbose)

    # tune model
    mod_tuning.fit(X_train, y_train[y_col[0]])

    # extract out the model of best fit
    model = mod_tuning.best_estimator_
    best_params = mod_tuning.best_params_
    best_score = mod_tuning.best_score_

    # print tuning results
    print(best_params)
    print(best_score)

    # classify the validation set
    y_valid[pred_col] = model.predict(X_valid)

    # genrate the regression metrics
    reg_perf_metrics = perf_metrics(y_obs=y_valid[tar_col],
                                    y_pred=y_valid[pred_col],
                                    target_type='reg')

    # output performance metrics
    print(reg_perf_metrics)

    # refit model to all training data
    model.fit(base_train[X_col], base_train[y_col].values.ravel())

    # predict for the base_test set
    base_test[tar_col] = model.predict(base_test[X_col])

    # create prediction, observation and residual plots
    preds_obs_resids(obs=tar_col, preds=pred_col, dataset=y_valid)

    # plot predicted age
    hist(dataset=y_valid,
         num_var=[pred_col],
         title='Histogram of Predicted {} - Validation Set'.format(tar_col))

    # plot predicted age
    hist(dataset=base_test,
         num_var=[tar_col],
         title='Histogram of Predicted {} - Test Set'.format(tar_col))

    # re-concatenate the base training and base test to update base data
    base = pd.concat(objs=[base_train, base_test], axis=0)

    return base
Пример #34
0
#plot the test and train resolution in one graph
fig = plt.figure()

plt.plot(test_res, '--', color='olive', linewidth=1)
#plt.scatter(test_losses[i], marker=marker[i], s=25, facecolors='none', edgecolors='olive')
plt.plot(train_res, color='darkblue', linewidth=1)
#plt.scatter(test_losses[i], marker=marker[i], s=25, facecolors='none', edgecolors='darkblue')
plt.legend(['Validation', 'Training'], loc='upper right')
plt.xlabel('Number of epochs')
plt.ylabel('Resolution')
fig.savefig('final_res.png')
plt.show()

#plot an histogram of the targets
hist(targets, 'Pt', title='Targets')

#plot an histogram of the output
hist(output, 'Pt', title='Network output')

#get the recopt in order to make the stacked histogram of all resolutions
dset = read_input('input.h5')
RecoPt = save_jets(dset, 0, dset.shape[0])

#get the reco resolutions
res = []
res1 = (targets - output) / targets
res.append(res1)
res2 = (RecoPt[:29164] - output) / RecoPt[:29164]
res.append(res2)
res3 = (RecoPt[:29164] - targets) / RecoPt[:29164]
Пример #35
0
total_vote_count = m.Votes.count({'rshares': {"$exists": True}})
total_vote_rshares = 0
syv_count = {}
syv_rshares = {}
bot_count = 0
bot_rshares = 0
voters = {}
all_voters = set()

for signer in signers:
    syv_rshares[signer] = 0
    syv_count[signer] = 0
    voters[signer] = set()

keys = hist()
keys_val = hist()

for op in m.Votes.find({'rshares': {"$exists": True}}):
    # print(op)
    total_vote_rshares += op['rshares']
    all_voters |= set([op['voter']])
    if not op['signer']:
        continue
    for key in op['signer']:
        keys.fill(key)
        keys_val.fill(key, op['rshares'])
    if op['voter'] in bots:
        bot_count += 1
        bot_rshares += op['rshares']
        continue
Пример #36
0
def get_var_log_bins(nbins, minval, maxval):
    logmin = math.log10(minval)
    logmax = math.log10(maxval)
    binwidth = (logmax - logmin) / nbins
    return [
        float(math.pow(10, logmin + i * binwidth))
        for i in range(0, nbins + 1)
    ]


vote_start = addTzInfo(datetime(2018, 8, 1))
vote_end = addTzInfo(datetime(2018, 8, 8))
date = datetime(2018, 7, 25)

rshare_distribution_hf19 = []
rshares_per_day_hf19 = hist()
rshare_distribution_hf20 = []
rshares_per_day_hf20 = hist()

while date < datetime(2018, 8, 8):
    try:
        print("processing ", date)
        s = shelve.open('posts-2018-%02d-%02d.shelf' % (date.month, date.day))
        posts = s['posts']
        s.close()
    except:
        posts = []
        pass
    date += timedelta(days=1)

    for post in posts:
Пример #37
0
def main(): 
	
	#ROOT::Math::MinimizerOptions::SetDefaultMinimizer(fitter)
	# ROOT.Math.MinimizerOptions.SetDefaultMinimizer("Minuit2");

	# output directories
	if not os.path.exists('plots'+str(options.jetNum)+'/results'): os.makedirs('plots'+str(options.jetNum)+'/results')
	if not os.path.exists('plots'+str(options.jetNum)+'/yields'): os.makedirs('plots'+str(options.jetNum)+'/yields')
	if not os.path.exists('plots'+str(options.jetNum)+'/shapes'): os.makedirs('plots'+str(options.jetNum)+'/shapes')
	if not os.path.exists('plots'+str(options.jetNum)+'/rhalphabet'): os.makedirs('plots'+str(options.jetNum)+'/rhalphabet')
	if not os.path.exists('plots'+str(options.jetNum)+'/datacards'): os.makedirs('plots'+str(options.jetNum)+'/datacards')

	idir = "../sklimming/sklim-v0-Jun16";
	# idir = "/tmp/cmantill/"

	####################################################################################
	# do mc looping - a class that holds histograms
	bkgContainers = None;
	sigContainers = None;

	### signal corrections and other directors
	## Mass shift [GeV]     : -0.590 +/- 0.872
	## Mass resolution SF:  1.094 +/- 0.123	
	# -1.6 GeV +/- 1.2 GeV
	sig_mass_shift = 0.99;
	sig_mass_shift_unc = 0.015;
	# sig_res_shift = 0.95;
	sig_res_shift = 1.094;
	sig_res_shift_unc = 0.123;
	qcdSF = 100;
	
	if options.doMCLooping: 

		sigContainers = [];
		sigNames = [];

		sigNames.append("VectorDiJet1Jet_M50_mc.root")
		sigNames.append("VectorDiJet1Jet_M75_mc.root")
		sigNames.append("VectorDiJet1Jet_M100_mc.root")		
		sigNames.append("VectorDiJet1Jet_M125_mc.root")		
		sigNames.append("VectorDiJet1Jet_M150_mc.root")		
		sigNames.append("VectorDiJet1Jet_M200_mc.root")	
		sigNames.append("VectorDiJet1Jet_M250_mc.root")		
		sigNames.append("VectorDiJet1Jet_M300_mc.root")	
		sigLabels = ["Z\'(50 GeV)","Z\'(75 GeV)","Z\'(100 GeV)","Z\'(125 GeV)","Z\'(150 GeV)","Z\'(200 GeV)","Z\'(250 GeV)","Z\'(300 GeV)"]
		sigTags = ["Zprime50","Zprime75","Zprime100","Zprime125","Zprime150","Zprime200","Zprime250","Zprime300"];

		for i in range(0,len(sigNames)):
			sigContainers.append( MCContainer( idir+"/"+sigNames[i], float(options.lumi), sigLabels[i], sigTags[i], 1, False, options.jetNum,NMassBins[i] ) );
			# k-factor is 1.2
			sigContainers[i].morphSignal("h_peakshape",sigmass[i],
										               sig_mass_shift,sig_mass_shift_unc,
										               sig_res_shift,sig_res_shift_unc);

			# hsig = [];
			# hsig.append( getattr( sigContainers[i], "h_peakshape" ) );
			# hsig.append( getattr( sigContainers[i], "h_peakshape_shiftUp" ) );
			# hsig.append( getattr( sigContainers[i], "h_peakshape_smearUp" ) );
			# hsig.append( getattr( sigContainers[i], "h_peakshape_shiftDn" ) );
			# hsig.append( getattr( sigContainers[i], "h_peakshape_smearDn" ) );

			# makeCanvasShapeComparison(hsig,["cen","shiftup","smearup","shiftdn","smeardn"],"mcsignalshapes_"+sigTags[i],"plots"+str(options.jetNum)+"/shapes/");

			# dummyaxis = sigContainers[i].h_peakshape_matched.GetXaxis();

		###### creating interpolated sig containers
		signalMorphersHists  = [];
		signalMorphersMasses = [];
		for i in range(6):
			signalMorphersHists.append( sigContainers[i].h_peakshape_matched );
			signalMorphersMasses.append( sigmass[i] );

		morphedHistContainer = hist(signalMorphersMasses,signalMorphersHists);

		ctmp = ROOT.TCanvas("ctmp","ctmp",1000,800);
		htmp = morphedHistContainer.morph(165.);
		htmp.SetLineColor(6);
		htmp.Draw();
		morphedHistContainer.morph(180.).Draw("sames");
		ctmp.SaveAs("mtmp.pdf");


		
		interpolatedMasses = [60.0,90.0,110.0,135.0,165.0,180.0];
		additionalSigContainers = [];
		sigLabels = ["Z\'(60 GeV)","Z\'(90 GeV)","Z\'(110 GeV)","Z\'(135 GeV)","Z\'(165 GeV)","Z\'(180 GeV)"];
		sigTags = ["Zprime60","Zprime90","Zprime110","Zprime135","Zprime165","Zprime180"];
		interpolatedMasses_nbins = [60,60,60,60,60,60];
		isMorphed=True;
		for i in range(len(interpolatedMasses)):
			additionalSigContainers.append( MCContainer( 'notapplicable', float(options.lumi), sigLabels[i], sigTags[i], 1, False, options.jetNum,interpolatedMasses_nbins[i],isMorphed ) );
			

			print "interpolating, ",sigTags[i],interpolatedMasses[i],sigLabels[i]
			additionalSigContainers[i].morphSignal( "h_peakshape",interpolatedMasses[i],
										               sig_mass_shift,sig_mass_shift_unc,
										               sig_res_shift,sig_res_shift_unc,
										               morphedHistContainer.morph(interpolatedMasses[i]));

			# hsig = [];
			# hsig.append( getattr( additionalSigContainers[i], "h_peakshape" ) );
			# hsig.append( getattr( additionalSigContainers[i], "h_peakshape_shiftUp" ) );
			# hsig.append( getattr( additionalSigContainers[i], "h_peakshape_smearUp" ) );
			# hsig.append( getattr( additionalSigContainers[i], "h_peakshape_shiftDn" ) );
			# hsig.append( getattr( additionalSigContainers[i], "h_peakshape_smearDn" ) );

			# makeCanvasShapeComparison(hsig,["cen","shiftup","smearup","shiftdn","smeardn"],"mcsignalshapes_"+sigTags[i],"plots"+str(options.jetNum)+"/shapes/");

		for i in range(len(additionalSigContainers)):
			for j in range(len(sigmass)-1):
				if interpolatedMasses[i] > sigmass[j] and interpolatedMasses[i] < sigmass[j+1]:
					sigContainers.insert(j+1,additionalSigContainers[i]);
					sigmass.insert(j+1,interpolatedMasses[i]);
					NMassBins.insert(j+1,interpolatedMasses_nbins[i]);
					print "newsigmass = ", sigmass
					break;

		for s in sigContainers:
			print s._tag,s._name;


		bkgContainers = [];
		bkgNames = ["QCD.root","W.root","DY.root","TTT.root"];
		bkgLabels = ["QCD","W(qq)","Z+jets","top"];
		bkgTags = ["QCD","Winc","Zinc","top"];
		bkgmass = [0.0,80.4,91.2,80.4];
		bkgsf = [1.,0.95,0.95,0.95]; # put in the W tag SF! 
		# bkgNames = ["QCD.root","W.root","DY.root"];
		# bkgLabels = ["QCD","W(qq)","Z+jets"];
		# bkgTags = ["QCD","Winc","Zinc"];
		# bkgmass = [0.0,80.4,91.2];
		# bkgsf = [1.,0.95,0.95]; # put in the W tag SF! 
		for i in range(0,len(bkgNames)):
			tmpsf = qcdSF;
			if i > 0: tmpsf = 1;
			bkgContainers.append( MCContainer( idir+"/"+bkgNames[i], float(options.lumi)*bkgsf[i], bkgLabels[i], bkgTags[i], tmpsf, False, options.jetNum,NMassBins[sigmass.index(options.ZPrimeMass)] ) );
			# bkgContainers.append( MCContainer( idir+"/"+bkgNames[i], float(options.lumi), bkgLabels[i], bkgTags[i], tmpsf, False, options.jetNum, 60 ) );
			if i == 1 or i == 2: 
				bkgContainers[i].morphSignal("h_peakshape",bkgmass[i],
											               sig_mass_shift,sig_mass_shift_unc,
											               sig_res_shift,sig_res_shift_unc);		

	####################################################################################
	# do background estimation
	theRhalphabet = None;
	if options.doRhalphabet: 
		print "Now doing the rhalphabet!"
		if not options.qcdClosure:		
			isData = True;
			extractTFs = options.extractTF;
			# for i in range(len(sigmass)):
			theRhalphabet = rhalphabet(idir+"/"+"JetHT350.root",1,"rhalphabet",1, extractTFs, options.jetNum,int(options.ZPrimeMass),isData,NMassBins[sigmass.index(options.ZPrimeMass)]) ;
			theRhalphabet.GetPredictedDistributions( idir+"/"+"JetHT.root", 1, 1, isData);
			# extractTFs = False;

			# theRhalphabet.append( rhalphabet(idir+"/"+"JetHT.root",1,"rhalphabet",1, extractTFs, options.jetNum, 85. ); #add a prediction without the W
			# theRhalphabet[len(sigmass)].GetPredictedDistributions( idir+"/"+"JetHT.root", 1, 5, isData);

		# there is a flag to do a closure test as well
		if options.qcdClosure:
			isData = False;
			extractTFs = options.extractTF;
			theRhalphabet = rhalphabet(idir+"/"+"QCD.root",options.lumi,"rhalphabetClosure",1, extractTFs, options.jetNum,int(options.ZPrimeMass),isData,NMassBins[sigmass.index(options.ZPrimeMass)]) ;
			theRhalphabet.GetPredictedDistributions( idir+"/"+"QCD.root", options.lumi, 100, isData );

	####################################################################################
	# do the loop on data
	theData = None;
	if options.doData:
		print "Now doing the data!...."
		isData = True;
		theData = MCContainer( idir+"/"+"JetHT.root", 1, "data" ,"data" , 1, isData, options.jetNum, NMassBins[sigmass.index(options.ZPrimeMass)], False );

	####################################################################################
	# do some plotting
	if options.doCards: 
		buildDatacards(bkgContainers,sigContainers[sigmass.index(options.ZPrimeMass)],theRhalphabet,theData,options.jetNum);

	####################################################################################
	# do some plotting
	if options.doPlots: 
		BuildPlots(bkgContainers,sigContainers,theRhalphabet,theData);
Пример #38
0
def load(dir='signals/'):
    lH0 = makehist( 'z50',dir+'ZPrimeToQQ_50GeV_v4_mc.root')
    lH1 = makehist('z100',dir+'ZPrimeToQQ_100GeV_v4_mc.root')
    lH2 = makehist('z150',dir+'ZPrimeToQQ_150GeV_v4_mc.root')
    lH3 = makehist('z200',dir+'ZPrimeToQQ_200GeV_v4_mc.root')
    lH4 = makehist('z250',dir+'ZPrimeToQQ_250GeV_v4_mc.root')
    lH5 = makehist('z300',dir+'ZPrimeToQQ_300GeV_v4_mc.root')
    lH   = [lH0,lH1,lH2,lH3,lH4,lH5]
    lVar = [ 50,100,150,200,250,300]
    return (lVar,lH)

if __name__ == "__main__":
    options = parser()
    #print options
    lVars,lHists = load()
    lHist = hist(lVars,lHists)
    if options.morph:
        lMorph = lHist.morph(150)
        lMorphA = [lHists[1],lMorph,lHists[2]]
        draw("morph",lMorphA)

    if options.shift:
        lShifts = lHist.shift(lHists[2],5.)
        lShiftA = [lHists[2],lShifts[0],lShifts[1]]
        draw("shift",lShiftA)

    if options.smear:
        lSmears = lHist.smear(lHists[2],0.1)
        lSmearA = [lHists[2],lSmears[0],lSmears[1]]
        lHists[2].Fit("gaus")
        lSmears[0].Fit("gaus")
Пример #39
0
def tfit(name, schemes, alist):

	if schemes[0].getHist().getH(0, 0).Integral() == 0: return

	if len(schemes) > 10: maxs = 10
	else                : maxs = len(schemes)

	## init a trivial scheme for total fit
	h1 = hist.hist(schemes[0].mypaf, name + "_tfit", schemes[0].getHist().binargs, schemes[0].getHist().labels, schemes[1].getHist().alist.argstring)
	h1.alist.resetArgs(alist.argstring)
	setsources = [alist.get("source") for s in schemes[0].getHist().sources]
	alist.remove("source")
	if setsources=="": setsource = [s.name for s in schemes[0].getHist().sources]
	h1.build(setsources, schemes[0].getHist().categs)
	h1.applyArgs()
	h1.applySourceInfo()
	s1 = hscheme.hscheme(schemes[0].mypaf, "hist", name + "_tfit", "", alist.argstring)
	s1.setTrivial(h1)

	## do the fit for every sidx/cidx pair
	x = ROOT.RooRealVar("xx", "xx", schemes[0].getHist().getH(0,0).GetXaxis().GetXmin(), schemes[0].getHist().getH(0,0).GetXaxis().GetXmax())
	list = ROOT.RooArgList(x)
	set  = ROOT.RooArgSet(x)

	for sidx in range(len(schemes[0].getHist().sources)):
		for cidx in range(len(schemes[0].getHist().categs)):

			data = ROOT.RooDataHist("data", "data", list, schemes[0].getHist().getH(sidx, cidx))
			dint = schemes[0].getHist().getH(sidx, cidx).Integral()

			mcint = sum([schemes[i].getHist().getH(sidx, cidx).Integral() for i in range(1,len(schemes))])
			if mcint > dint:
				for i in range(1,len(schemes)):
					schemes[i].getHist().getH(sidx, cidx).Scale(dint/mcint)

			dh = []
			hi = []
			mc = []
			sc = []
			for i in range(1,len(schemes)):
				dh.append(ROOT.RooDataHist("mc" + str(i), "mc" + str(i), list, schemes[i].getHist().getH(sidx, cidx)))
				hi.append(schemes[i].getHist().getH(sidx, cidx).Integral())
				mc.append(ROOT.RooHistPdf("mcpdf" + str(i), "mcpdf" + str(i), set, dh[i-1]))
				sc.append(ROOT.RooRealVar("mcscale" + str(i), "mcscale" + str(i), hi[i-1]/dint, 0.0, 1.0))
			
			pdfs   = eval("ROOT.RooArgList(" + ", ".join(["mc[" + str(i) + "]" for i in range(len(mc))]) + ")")
			coeffs = eval("ROOT.RooArgList(" + ", ".join(["sc[" + str(i) + "]" for i in range(len(sc))]) + ")")

			totPdf = ROOT.RooAddPdf("totPdf", "totPdf", pdfs, coeffs)

			result = totPdf.fitTo(data, ROOT.RooFit.SumW2Error(ROOT.kFALSE), ROOT.RooFit.Extended(), ROOT.RooFit.PrintLevel(-1)) 

			## scale mc hists accordingly
			value = []
			syerr = []
			alerr = []
			auerr = []
			for i in range(1,len(schemes)):
				value.append(sc[i-1].getVal())
				syerr.append(sc[i-1].getError())
				alerr.append(sc[i-1].getAsymErrorLo())
				auerr.append(sc[i-1].getAsymErrorHi())
				schemes[i].getHist().getH(sidx, cidx).Scale(value[i-1])
				alist.set("sce" + str(i+1), str(sc[i-1].getError() / sc[i-1].getVal()))

			## ignore asymm errors??

			## delete params
			for i in range(len(mc)):
				mc[i].Delete()
				sc[i].Delete()
			data.Delete(), pdfs.Delete(), coeffs.Delete()
			del data, mc, sc, hi, pdfs, coeffs

			## write total fit hist
			h1.getH(sidx, cidx).SetLineColor(ROOT.kRed)
			for i in range(1,len(schemes)):
				h1.inject(schemes[i].getHist().getH(sidx, cidx), sidx, cidx)

	## plot everything
	ns = [schemes[0], s1]
	ns.extend(schemes[1:])
	alist.set("ratio", "y")
	comp(name, ns, alist)