def plotImportanceInteraction(fileName, typeInfs, nbData): tabGlob = [] for fold in range(5): print(fileName, typeInfs, nbData, fold) training, test, usrToInt, beta, betaIC, betaTrue = getData(fileName, typeInfs, nbData, fold) mat = getMatInter(training, 1) tabInterFold = [] for c in mat: for c2 in mat[c]: nbDistSample = len(beta[c][c2]) for dt in mat[c][c2]: qteSsInter = H(dt, 0, beta[c, c], nbDistSample=nbDistSample) qte = H(dt, 0, beta[c,c2], nbDistSample=nbDistSample) for r in range(sum(mat[c][c2][dt])): tabInterFold.append((qte-qteSsInter)/qteSsInter) tabGlob += tabInterFold plt.hist(tabGlob) plt.semilogy() plt.xlabel(r"$\frac{P_{ij}(t) - P_{ii}(t)}{P_{ii}(t)}$", fontsize=18) plt.ylabel("Density", fontsize=18) plt.tight_layout() plt.savefig("Misc/"+fileName+"/"+"ImportanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".png") plt.savefig("Misc/"+fileName+"/"+"ImportanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".pdf", dpi=600) #plt.show() plt.close()
def getTabsCalib(training, beta, betaIC, betaHawkes, betaIMMSBM, betaCoC, nbDistSample, cntSeq, cntFreq): probsBL = getProbsBL(training) probsBLC = getProbsBLC(training) tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabF, tabW = [], [], [], [], [], [], [], [], [], [] tabKeys = [] mat = getMatInter(training, reduit=False) for c in mat: for c2 in mat[c]: for tdiff in mat[c][c2]: s = sum(mat[c][c2][tdiff]) f = mat[c][c2][tdiff][1] / s p = H(tdiff, 0, beta[c,c2], nbDistSample) if betaIC is not None: if c==c2: pIC = H(tdiff, 0, betaIC[c, c2], nbDistSample) else: pIC = probsBLC[c][c2] else: pIC = -1 if betaHawkes is not None: pHawkes = np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*tdiff) else: pHawkes = -1 pBL = probsBL[c] try: pBLC = probsBLC[c][c2] except: pBLC = 0 if betaIMMSBM is not None: pIMMSBM = betaIMMSBM[c,c2,c] else: pIMMSBM = 0 if betaCoC is not None: pCoC = betaCoC[c,c2,int(tdiff)] else: pCoC = 0 pRand = random.random() tabP.append(p) tabPIC.append(pIC) tabPHawkes.append(pHawkes) tabPBL.append(pBL) tabPBLC.append(pBLC) tabPIMMSBM.append(pIMMSBM) tabPCoC.append(pCoC) tabPRand.append(pRand) tabF.append(f) tabW.append(s) tabKeys.append((c,c2,tdiff)) tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabF, tabW, tabKeys = np.array(tabP), np.array(tabPIC), np.array(tabPHawkes), np.array(tabPBL), np.array(tabPBLC), np.array(tabPIMMSBM), np.array(tabPCoC), np.array(tabPRand), np.array(tabF), np.array(tabW), np.array(tabKeys) return tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabF, tabW, tabKeys
def likelihoodFromMatrix(obs, usri, alphai, lgStep, N, nbDistSample=1): mat = getMatInter(obs, lgStep, usri, reduit=False) L = 0 for c2 in mat: for dt in mat[c2]: #L += logF(dt, 0, alphai[c2], nbDistSample) * mat[c2][dt][1] #L += logS(dt, 0, alphai[c2], nbDistSample) * mat[c2][dt][0] L += cp.log(H(dt, 0, alphai[c2], nbDistSample)) * mat[c2][dt][1] L += cp.log(1. - H(dt, 0, alphai[c2], nbDistSample)) * mat[c2][dt][0] return L
def plotDistanceInteraction(fileName, typeInfs, nbData): tabHMGlob = [] nameKeys = [] nbDistSample=0 for fold in range(5): print(fileName, typeInfs, nbData, fold) training, test, usrToInt, beta, betaIC, betaTrue = getData(fileName, typeInfs, nbData, fold) print(usrToInt) lgStep = 1 if fileName == "PD": lgStep=1.01 mat = getMatInter(training, lgStep=lgStep) P0 = getCntFreq(training) nbDistSample = len(beta[0,0]) tabHM=[] nameKeys = [] for c in range(len(beta)): for c2 in range(len(beta[c])): tabT = [] rng = range(1, len(beta[c,c2])) if fileName=="PD": rng = range(1, 11) for dt in rng: tabT.append(H(dt, 0, beta[c,c2], nbDistSample=nbDistSample) - P0[c]) tabHM.append(tabT) nameKeys.append(str(c)+"-"+str(c2)) tabHMGlob.append(tabHM) xtickslabels = list(range(1, nbDistSample+1)) tabHMGlob = np.array(tabHMGlob) tabHMGlob = tabHMGlob.mean(axis=0) maxAbsAmp = 0.2 maxAbsAmp = np.min([np.max([np.max(tabHMGlob), -np.min(tabHMGlob)]), 0.2]) ax = sns.heatmap(tabHMGlob, xticklabels=xtickslabels, yticklabels=nameKeys, cmap="RdBu_r", linewidths=.5, vmin=-maxAbsAmp, vmax=maxAbsAmp, cbar_kws={'label': r"$P_{ij}(\Delta t) - P_0$"}) # ax.vlines([0, nbDistSample], *ax.get_ylim()) ax.hlines([0, len(beta)**2], *ax.get_xlim()) ax.hlines([i*len(beta) for i in range(len(beta))], *ax.get_xlim()) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) plt.xlabel(r"$\Delta$t", fontsize=18) plt.ylabel("Information pairs", fontsize=18) plt.tight_layout() plt.savefig("Misc/"+fileName+"/"+"DistanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".png") plt.savefig("Misc/"+fileName+"/"+"DistanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".pdf", dpi=600) #plt.show() plt.close()
def getGlobRes(): runAll=True listTypeInfsInteressants = [ ("Synth", "20", 20000), ("Synth", "5", 20000), ("Ads", "Ads2", 1e6), ("PD", "All", 300000), ("Twitter", "URL", 1e6),] fileTree = getFileTree(listTypeInfsInteressants=listTypeInfsInteressants) for fileName in fileTree: if fileName!="Twitter": pass #continue for typeInfs in fileTree[fileName]: if typeInfs!="URL": pass #continue tabAllArr, tabAllGen = [], [] for fold in fileTree[fileName][typeInfs]: tabNamesMet, tabNamesCal, tabNamesDist=[], [], [] tabNs, tabF1s, tabAUCROCs, tabAUCPRs, tabL1s, tabPearsons, tabAccs, tabMSEs, tabJSs, tabBriers, tabCrossEntropys = [], [], [], [], [], [], [], [], [], [], [] for i, nbData in enumerate(fileTree[fileName][typeInfs][fold]): if nbData!=1e6: pass #continue nbDistSample = len(np.load("Output/"+fileName+"/"+fileName+"_"+typeInfs+"_"+str(nbData)+"_"+str(fold) + "_Fit_beta.npy")[-1][-1]) if runAll: GetResults.results(fileName, typeInfs, nbData, fold, nbDistSample) training, test, usrToInt, beta, betaIC, betaHawkes, betaCoC, betaTrue = GetResults.getData(fileName, typeInfs, nbData, fold) nom = "Output/"+fileName+"/" + fileName+"_"+typeInfs+"_"+str(nbData)+"_"+str(fold) print(nom) N_inter = len(training) seeDistrib=False if seeDistrib: plt.close() from LogS import H, HGen lgStep = 1 if fileName=="PD": lgStep=1.01 obs = training dicTemp = getMatInter(obs, lgStep, reduit=False) cntFreq = getCntFreq(training) betaMMSBM = getBetaIMMSBM(nom) training, test, usrToInt, beta, betaIC, betaHawkes, betaCoC, betaTrue = getData(fileName, typeInfs, nbData, fold) print(usrToInt) nbInfs = len(beta) for c in dicTemp: for c2 in dicTemp[c]: if c!=0 or c2 !=3: continue s=0 maxN = 0 for dt in dicTemp[c][c2]: if sum(dicTemp[c][c2][dt])>maxN: maxN=sum(dicTemp[c][c2][dt]) for dt in dicTemp[c][c2]: s+=sum(dicTemp[c][c2][dt]) r = dicTemp[c][c2][dt][1] / (sum(dicTemp[c][c2][dt])+1e-20) plt.bar(dt, r, width=.5, color="orange")#, alpha=sum(dicTemp[c][c2][dt])/maxN) #sm = plt.cm.ScalarMappable(cmap=plt.cm.Oranges, norm=plt.Normalize(0, maxN)) #sm.set_array([]) #cbar = plt.colorbar(sm) #cbar.set_label('Number of observations', rotation=270, labelpad=15) print(c, c2, betaHawkes[c][c2], s) a=np.linspace(1, max(dicTemp[c][c2]), 10000) arrH = np.array([H(a_val, 0, beta[c, c2], nbDistSample=nbDistSample) for a_val in a]) arrHIC = np.array([H(a_val, 0, betaIC[c, c2], nbDistSample=nbDistSample) for a_val in a]) arrHHawkes = np.array([np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*a_val) for a_val in a]) plt.plot(a, arrH, "b", label="IR-RBF") #plt.plot(a, arrHHawkes, "y", label="IR-EXP") #plt.plot(a, [betaMMSBM[c][c2][nbInfs+1] for i in range(len(a))], "r", label="IMMSBM") #plt.plot(a, [cntFreq[c] for i in range(len(a))], "g", label="Naive") try: arrHTrue = np.array([HGen(a_val, 0, betaTrue[c, c2], nbDistSample=nbDistSample) for a_val in a]) plt.plot(a, arrHTrue+cntFreq[c], "c", label="True") except Exception as e: pass plt.ylim([0,1]) plt.legend() plt.xlabel("Time separation", fontsize=18) plt.ylabel("Probability of contamination", fontsize=18) plt.rcParams['pdf.fonttype'] = 42 plt.rcParams['font.family'] = 'Calibri' plt.tight_layout() #plt.savefig("Misc/ExDist_SocialMedia_N=350000_Flickr-Flickr.pdf", dpi=600) plt.show() pause() tabModelesMet, tabF1, tabROCAUC, tabPRAUC, tabAcc = loadMetrics(nom) tabModelesCal, tabL1, tabPearson = loadCalib(nom) tabModelesDist, tabJS, tabBrierScore, tabCrossEntropy = loadMetricsDist(nom) try: tabModelesErr, tabMAE, tabMSE = loadErrs(nom) except: tabModelesErr, tabMAE, tabMSE = np.array(tabModelesMet), np.array([0 for i in range(len(tabModelesMet))]), np.array([0 for i in range(len(tabModelesMet))]) tabNamesMet=tabModelesMet tabNamesCal=tabModelesCal tabNamesDist=tabModelesDist tabNamesErr=tabModelesErr tabF1s.append(tabF1) tabL1s.append(tabL1) tabAUCROCs.append(tabROCAUC) tabAUCPRs.append(tabPRAUC) tabPearsons.append(tabPearson) tabAccs.append(tabAcc) tabMSEs.append(tabMSE) tabJSs.append(tabJS) tabBriers.append(tabBrierScore) tabCrossEntropys.append(tabCrossEntropy) tabNs.append([N_inter for _ in range(len(tabF1))]) tabNs, tabF1s, tabL1s, tabAUCROCs, tabAUCPRs, tabPearsons, tabAccs, tabMSEs, tabNamesMet, tabNamesCal, tabNamesErr = toNp(([tabNs, tabF1s, tabL1s, tabAUCROCs, tabAUCPRs, tabPearsons, tabAccs, tabMSEs, tabNamesMet, tabNamesCal, tabNamesErr])) tabJSs, tabBriers, tabCrossEntropys = toNp([tabJSs, tabBriers, tabCrossEntropys]) tabArrs = [tabF1s, tabL1s, tabAUCROCs, tabAUCPRs, tabPearsons, tabAccs, tabMSEs, tabJSs, tabBriers, tabCrossEntropys, tabNs] tabGen = [tabNamesMet, tabNamesCal, tabNamesDist, tabNamesErr] #plotRecap(tabGen, tabArrs, fileName, typeInfs, fold, tabStds=None) tabAllArr.append(tabArrs) tabAllGen.append(tabGen) tabAllGen = np.array(tabAllGen) tabAllArr = np.array(tabAllArr) tabGen = tabAllGen[-1] tabNamesMet, tabNamesCal, tabNamesErr = tabGen[0], tabGen[1], tabGen[2] tabAvgF1, tabStdF1 = np.average(tabAllArr[:, 0], axis=0), np.std(tabAllArr[:, 0], axis=0) tabAvgL1, tabStdL1 = np.average(tabAllArr[:, 1], axis=0), np.std(tabAllArr[:, 1], axis=0) tabAvgAUCROC, tabStdAUCROC = np.average(tabAllArr[:, 2], axis=0), np.std(tabAllArr[:, 2], axis=0) tabAvgAUCPR, tabStdAUCPR = np.average(tabAllArr[:, 3], axis=0), np.std(tabAllArr[:, 3], axis=0) tabAvgPearson, tabStdPearson = np.average(tabAllArr[:, 4], axis=0), np.std(tabAllArr[:, 4], axis=0) tabAvgAcc, tabStdAcc = np.average(tabAllArr[:, 5], axis=0), np.std(tabAllArr[:, 5], axis=0) tabAvgMSE, tabStdMSE = np.average(tabAllArr[:, 6], axis=0), np.std(tabAllArr[:, 6], axis=0) tabAvgJS, tabStdJS = np.average(tabAllArr[:, 7], axis=0), np.std(tabAllArr[:, 7], axis=0) tabAvgBriers, tabStdBriers = np.average(tabAllArr[:, 8], axis=0), np.std(tabAllArr[:, 8], axis=0) tabAvgCrossEntropy, tabStdMSECrossEntropy = np.average(tabAllArr[:, 9], axis=0), np.std(tabAllArr[:, 9], axis=0) tabAvgN, tabStdN = np.average(tabAllArr[:, 10], axis=0), np.std(tabAllArr[:, 10], axis=0) tabGen = [tabNamesMet, tabNamesCal, tabNamesDist, tabNamesErr] tabArrs = [tabAvgF1, tabAvgL1, tabAvgAUCROC, tabAvgAUCPR, tabAvgPearson, tabAvgAcc, tabAvgMSE, tabAvgJS, tabAvgBriers, tabAvgCrossEntropy, tabAvgN] tabStds = [tabStdF1, tabStdL1, tabStdAUCROC, tabStdAUCPR, tabStdPearson, tabStdAcc, tabStdMSE, tabStdJS, tabStdBriers, tabStdMSECrossEntropy, tabStdN] plotRecap(tabGen, tabArrs, fileName, typeInfs, fold="", tabStds=tabStds) print(fileTree)
def getMetricsDist(training, test, beta, betaIC, betaHawkes, betaIMMSBM, betaCoC, nbDistSample, lgStep, cntFreq, nom, save=False): nbInfs = len(beta) distTest = getMatInter(test, reduit=False) probsBL = getProbsBL(training) probsBLC = getProbsBLC(training) tabFreqs, tabProbs, tabProbsIC, tabProbsHawkes, tabProbsBL, tabProbsBLC, tabProbsIMMSBM, tabProbsCoC, tabProbsRand = [], [], [], [], [], [], [], [], [] dist, distIC, distHawkes, distBL, distBLC, distIMMSBM, distCoC, distRand = {}, {}, {}, {}, {}, {}, {}, {} crossEntropy, crossEntropyIC, crossEntropyHawkes, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyCoC, crossEntropyRand, div = 0, 0, 0, 0, 0, 0, 0, 0, 0 for c in distTest: if c not in dist: dist[c] = {} if c not in distIC: distIC[c] = {} if c not in distHawkes: distHawkes[c] = {} if c not in distBL: distBL[c] = {} if c not in distBLC: distBLC[c] = {} if c not in distRand: distRand[c] = {} if c not in distIMMSBM: distIMMSBM[c] = {} if c not in distCoC: distCoC[c] = {} for c2 in distTest[c]: if c2 not in dist[c]: dist[c][c2]={} if c2 not in distIC[c]: distIC[c][c2]={} if c2 not in distHawkes[c]: distHawkes[c][c2]={} if c2 not in distBL[c]: distBL[c][c2]={} if c2 not in distBLC[c]: distBLC[c][c2]={} if c2 not in distIMMSBM[c]: distIMMSBM[c][c2]={} if c2 not in distCoC[c]: distCoC[c][c2]={} if c2 not in distRand[c]: distRand[c][c2]={} for dt in distTest[c][c2]: if dt==1: continue dist[c][c2][dt] = H(dt, 0, beta[c][c2], nbDistSample=nbDistSample) try: if c==c2: distIC[c][c2][dt] = H(dt, 0, betaIC[c][c2], nbDistSample=nbDistSample) else: distIC[c][c2][dt] = probsBL[c] except: distIC[c][c2][dt] = 0 try: distHawkes[c][c2][dt] = np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*dt) except: distHawkes[c][c2][dt] = 0 distBL[c][c2][dt] = probsBL[c] try: distBLC[c][c2][dt] = probsBLC[c][c2] except: distBLC[c][c2][dt] = 0 try: distIMMSBM[c][c2][dt] = betaIMMSBM[c,c2,nbInfs+1]#/(betaIMMSBM[c,c2,c+nbInfs]+betaIMMSBM[c,c2,c]) except: distIMMSBM[c][c2][dt] = 0 try: distCoC[c][c2][dt] = betaCoC[c,c2,int(dt)]#/(betaCoC[c,c2,c+nbInfs]+betaCoC[c,c2,c]) except: distCoC[c][c2][dt] = 0 distRand[c][c2][dt] = random.random() ptest = distTest[c][c2][dt][1] / (sum(distTest[c][c2][dt])) tabFreqs.append(ptest) tabProbs.append(dist[c][c2][dt]) tabProbsIC.append(distIC[c][c2][dt]) tabProbsHawkes.append(distHawkes[c][c2][dt]) tabProbsBL.append(distBL[c][c2][dt]) tabProbsBLC.append(distBLC[c][c2][dt]) tabProbsIMMSBM.append(distIMMSBM[c][c2][dt]) tabProbsCoC.append(distCoC[c][c2][dt]) tabProbsRand.append(distRand[c][c2][dt]) if False: crossEntropy += distTest[c][c2][dt][1] * dist[c][c2][dt] crossEntropy += distTest[c][c2][dt][0] * (1. - dist[c][c2][dt]) crossEntropyIC += distTest[c][c2][dt][1] * distIC[c][c2][dt] crossEntropyIC += distTest[c][c2][dt][0] * (1. - distIC[c][c2][dt]) crossEntropyBL += distTest[c][c2][dt][1] * distBL[c][c2][dt] crossEntropyBL += distTest[c][c2][dt][0] * (1. - distBL[c][c2][dt]) crossEntropyBLC += distTest[c][c2][dt][1] * distBLC[c][c2][dt] crossEntropyBLC += distTest[c][c2][dt][0] * (1. - distBLC[c][c2][dt]) crossEntropyIMMSBM += distTest[c][c2][dt][1] * distIMMSBM[c][c2][dt] crossEntropyIMMSBM += distTest[c][c2][dt][0] * (1. - distIMMSBM[c][c2][dt]) crossEntropyCoC += distTest[c][c2][dt][1] * distCoC[c][c2][dt] crossEntropyCoC += distTest[c][c2][dt][0] * (1. - distCoC[c][c2][dt]) crossEntropyRand += distTest[c][c2][dt][1] * distRand[c][c2][dt] crossEntropyRand += distTest[c][c2][dt][0] * (1. - distRand[c][c2][dt]) if True: div += 1 if dist[c][c2][dt] >=1: dist[c][c2][dt]=1 elif dist[c][c2][dt]<=0: dist[c][c2][dt]=0 crossEntropy += distTest[c][c2][dt][1] * np.log2(1e-10+ dist[c][c2][dt]) crossEntropy += distTest[c][c2][dt][0] * np.log2(1e-10+ 1 - dist[c][c2][dt]) if distIC[c][c2][dt] >1: distIC[c][c2][dt]=1 elif distIC[c][c2][dt]<0: distIC[c][c2][dt]=0 crossEntropyIC += distTest[c][c2][dt][1] * np.log2(1e-10+ distIC[c][c2][dt]) crossEntropyIC += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distIC[c][c2][dt]) if distHawkes[c][c2][dt] >1: distHawkes[c][c2][dt]=1 elif distHawkes[c][c2][dt]<0: distHawkes[c][c2][dt]=0 crossEntropyHawkes += distTest[c][c2][dt][1] * np.log2(1e-10+ distHawkes[c][c2][dt]) crossEntropyHawkes += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distHawkes[c][c2][dt]) crossEntropyBL += distTest[c][c2][dt][1] * np.log2(1e-10+ distBL[c][c2][dt]) crossEntropyBL += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distBL[c][c2][dt]) crossEntropyBLC += distTest[c][c2][dt][1] * np.log2(1e-10+ distBLC[c][c2][dt]) crossEntropyBLC += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distBLC[c][c2][dt]) crossEntropyIMMSBM += distTest[c][c2][dt][1] * np.log2(1e-10+ distIMMSBM[c][c2][dt]) crossEntropyIMMSBM += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distIMMSBM[c][c2][dt]) crossEntropyCoC += distTest[c][c2][dt][1] * np.log2(1e-10+ distCoC[c][c2][dt]) crossEntropyCoC += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distCoC[c][c2][dt]) crossEntropyRand += distTest[c][c2][dt][1] * np.log2(1e-10+ distRand[c][c2][dt]) crossEntropyRand += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distRand[c][c2][dt]) if div==0: div=1e-10 crossEntropy, crossEntropyIC, crossEntropyHawkes, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyCoC, crossEntropyRand = crossEntropy/div, crossEntropyIC/div, crossEntropyHawkes/div, crossEntropyBL/div, crossEntropyBLC/div, crossEntropyIMMSBM/div, crossEntropyCoC/div, crossEntropyRand/div tabFreqs, tabProbs, tabProbsIC, tabProbsHawkes, tabProbsBL, tabProbsBLC, tabProbsIMMSBM, tabProbsCoC, tabProbsRand = toNp([tabFreqs, tabProbs, tabProbsIC, tabProbsHawkes, tabProbsBL, tabProbsBLC, tabProbsIMMSBM, tabProbsCoC, tabProbsRand]) if True: M = (tabFreqs + tabProbs) / 2 +1e-20 JS = np.mean(0.5 * tabFreqs*np.log2(tabFreqs/M + 1e-20) + 0.5 * tabProbs*np.log2(tabProbs/M + 1e-20)) M = (tabFreqs + tabProbsIC) / 2 +1e-20 JSIC = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsIC * np.log2(tabProbsIC / M + 1e-20)) M = (tabFreqs + tabProbsHawkes) / 2 +1e-20 JSHawkes = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsHawkes * np.log2(tabProbsHawkes / M + 1e-20)) M = (tabFreqs + tabProbsBL) / 2 +1e-20 JSBL = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsBL * np.log2(tabProbsBL / M + 1e-20)) M = (tabFreqs + tabProbsBLC) / 2 +1e-20 JSBLC = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsBLC * np.log2(tabProbsBLC / M + 1e-20)) M = (tabFreqs + tabProbsIMMSBM) / 2 +1e-20 JSIMMSBM = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsIMMSBM * np.log2(tabProbsIMMSBM / M + 1e-20)) M = (tabFreqs + tabProbsCoC) / 2 +1e-20 JSCoC = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsCoC * np.log2(tabProbsCoC / M + 1e-20)) M = (tabFreqs + tabProbsRand) / 2 +1e-20 JSRand = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsRand * np.log2(tabProbsRand / M + 1e-20)) if False: brierScore = np.mean((tabFreqs-tabProbs)**2) brierScoreIC = np.mean((tabFreqs-tabProbsIC)**2) brierScoreBL = np.mean((tabFreqs-tabProbsBL)**2) brierScoreBLC = np.mean((tabFreqs-tabProbsBLC)**2) brierScoreIMMSBM = np.mean((tabFreqs-tabProbsIMMSBM)**2) brierScoreCoC = np.mean((tabFreqs-tabProbsCoC)**2) brierScoreRand = np.mean((tabFreqs-tabProbsRand)**2) if True: # RSS brierScore = np.sum((tabFreqs-tabProbs)**2) brierScoreIC = np.sum((tabFreqs-tabProbsIC)**2) brierScoreHawkes = np.sum((tabFreqs-tabProbsHawkes)**2) brierScoreBL = np.sum((tabFreqs-tabProbsBL)**2) brierScoreBLC = np.sum((tabFreqs-tabProbsBLC)**2) brierScoreIMMSBM = np.sum((tabFreqs-tabProbsIMMSBM)**2) brierScoreCoC = np.sum((tabFreqs-tabProbsCoC)**2) brierScoreRand = np.sum((tabFreqs-tabProbsRand)**2) #print(JS, JSIC, JSBL, JSBLC, JSIMMSBM, JSRand) #print(brierScore, brierScoreIC, brierScoreBL, brierScoreBLC, brierScoreIMMSBM, brierScoreRand) #print(crossEntropy, crossEntropyIC, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyRand) tabJS = [JS, JSIC, JSHawkes, JSBL, JSBLC, JSIMMSBM, JSCoC, JSRand] tabBrierScore = [brierScore, brierScoreIC, brierScoreHawkes, brierScoreBL, brierScoreBLC, brierScoreIMMSBM, brierScoreCoC, brierScoreRand] tabCrossEntropy = [crossEntropy, crossEntropyIC, crossEntropyHawkes, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyCoC, crossEntropyRand] saveMetricsDist(nom, tabJS, tabBrierScore, tabCrossEntropy)
def getTabsClassic(training, test, beta, betaIC, betaIMMSBM, nbDistSample, lgStep, cntFreq): probsBL = getCntFreq(training) probsBLC = getProbsBLC(training) nbInfs = len(beta) iter=0 tabP, tabPIC, tabPBL, tabPRand, tabPBLC, tabPIMMSBM, tabTrue, tabInfs = [], [], [], [], [], [], [], [] dicTabsP, dicTabsPIC, dicTabsPBL, dicTabsPBLC, dicTabsPIMMSBM, dicTabsPRand, dicTabsInfs, dicTabsTrue = {}, {}, {}, {}, {}, {}, {}, {} for u in test: for (c, t, s) in test[u]: if t<10: continue p, pIC, pIMMSBM, pBLC = 1., 1., 1., 1. tabPtemp, tabPICtemp=[], [] for (c2, t2, s2) in test[u]: tdiff = t - t2 + lgStep if tdiff <= 0 or tdiff>20: continue p *= 1-max([H(tdiff, 0, beta[c, c2], nbDistSample)-probsBL[c]*0, 0]) tabPtemp.append(max([H(tdiff, 0, beta[c, c2], nbDistSample)-probsBL[c], 0])) #print(c, c2, s, p) if betaIC is not None: #pIC *= 1-H(tdiff, 0, betaIC[c2, c3], nbDistSample) pIC *= 1 - max([H(tdiff, 0, betaIC[c, c2], nbDistSample)-probsBL[c]*0, 0]) tabPICtemp.append(max([H(tdiff, 0, betaIC[c, c2], nbDistSample)-probsBL[c], 0])) else: pIC *= 1 if betaIMMSBM is not None: pIMMSBM *= 1. - betaIMMSBM[c,c2,nbInfs]/(betaIMMSBM[c,c2,nbInfs]+betaIMMSBM[c,c2,nbInfs+1]) # Prob de survie else: pIMMSBM = 1 try: pBLC *= 1-probsBLC[c][c2] except: pass p = max([1. - p, 0]) p = min([np.sum(tabPtemp)+probsBL[c], 1]) #p = H(tdiff, 0, beta[c, c2], nbDistSample)+cntFreq[c] pIC = max([1. - pIC, 0]) pIC = min([np.sum(tabPICtemp)+probsBL[c], 1]) #pIC = H(tdiff, 0, betaIC[c, c2], nbDistSample)+cntFreq[c] pBLC = 1. - pBLC #pBLC = probsBLC[c][c2] pIMMSBM = 1 - pIMMSBM pBL = probsBL[c] pRand = random.random() tabInfs.append(c) tabTrue.append(s) tabP.append(p) tabPIC.append(pIC) tabPBL.append(pBL) tabPBLC.append(pBLC) tabPIMMSBM.append(pIMMSBM) tabPRand.append(pRand) iter+=1 tabP, tabPIC, tabPBL, tabPBLC, tabPIMMSBM, tabPRand, tabTrue, tabInfs = np.array(tabP), np.array(tabPIC), np.array(tabPBL), np.array(tabPBLC), np.array(tabPIMMSBM), np.array(tabPRand), np.array(tabTrue), np.array(tabInfs) return tabTrue, tabInfs, tabP, tabPIC, tabPBL, tabPBLC, tabPIMMSBM, tabPRand
def getTabs(training, test, beta, betaIC, betaHawkes, betaIMMSBM, betaCoC, nbDistSample, lgStep, cntFreq): probsBL = getCntFreq(training) probsBLC = getProbsBLC(training) nbInfs = len(beta) mat = getMatInter(test, reduit=False) tabP, tabPIC, tabPBL, tabPRand, tabPBLC, tabPIMMSBM, tabPCoC, tabPHawkes, tabTrue, tabInfs = [], [], [], [], [], [], [], [], [], [] for c in mat: if c not in probsBL: probsBL[c]=0 for c2 in mat[c]: for dt in mat[c][c2]: if dt <= 0 or dt>20: continue p = H(dt, 0, beta[c, c2], nbDistSample) if betaIC is not None: if c==c2: pIC = H(dt, 0, betaIC[c, c2], nbDistSample) else: pIC = probsBL[c] else: pIC = 1 if betaHawkes is not None: pHawkes = np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*dt) else: pHawkes = 1 if betaIMMSBM is not None: pIMMSBM = betaIMMSBM[c,c2,nbInfs+1] # Prob de survie else: pIMMSBM = 1 if betaCoC is not None: pCoC = betaCoC[c,c2,int(dt)] # Prob de survie else: pCoC = 1 try: pBLC = probsBLC[c][c2] except: pBLC = 1 pBL = probsBL[c] pRand = random.random() ptest = mat[c][c2][dt][1]/(sum(mat[c][c2][dt])+1e-20) tabTrue.append(ptest) tabInfs.append(sum(mat[c][c2][dt])) tabP.append(p) tabPIC.append(pIC) tabPHawkes.append(pHawkes) tabPBL.append(pBL) tabPBLC.append(pBLC) tabPIMMSBM.append(pIMMSBM) tabPCoC.append(pCoC) tabPRand.append(pRand) tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabTrue, tabInfs = np.array(tabP), np.array(tabPIC), np.array(tabPHawkes), np.array(tabPBL), np.array(tabPBLC), np.array(tabPIMMSBM), np.array(tabPCoC), np.array(tabPRand), np.array(tabTrue), np.array(tabInfs) return tabTrue, tabInfs, tabP, tabPIC, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPHawkes, tabPRand