Exemplo n.º 1
0
def plotImportanceInteraction(fileName, typeInfs, nbData):
    tabGlob = []

    for fold in range(5):
        print(fileName, typeInfs, nbData, fold)
        training, test, usrToInt, beta, betaIC, betaTrue = getData(fileName, typeInfs, nbData, fold)
        mat = getMatInter(training, 1)
        tabInterFold = []
        for c in mat:
            for c2 in mat[c]:
                nbDistSample = len(beta[c][c2])

                for dt in mat[c][c2]:
                    qteSsInter = H(dt, 0, beta[c, c], nbDistSample=nbDistSample)
                    qte = H(dt, 0, beta[c,c2], nbDistSample=nbDistSample)

                    for r in range(sum(mat[c][c2][dt])):
                        tabInterFold.append((qte-qteSsInter)/qteSsInter)



        tabGlob += tabInterFold
    plt.hist(tabGlob)
    plt.semilogy()
    plt.xlabel(r"$\frac{P_{ij}(t) - P_{ii}(t)}{P_{ii}(t)}$", fontsize=18)
    plt.ylabel("Density", fontsize=18)
    plt.tight_layout()
    plt.savefig("Misc/"+fileName+"/"+"ImportanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".png")
    plt.savefig("Misc/"+fileName+"/"+"ImportanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".pdf", dpi=600)
    #plt.show()
    plt.close()
Exemplo n.º 2
0
def getTabsCalib(training, beta, betaIC, betaHawkes, betaIMMSBM, betaCoC, nbDistSample, cntSeq, cntFreq):
    probsBL = getProbsBL(training)
    probsBLC = getProbsBLC(training)
    tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabF, tabW = [], [], [], [], [], [], [], [], [], []
    tabKeys = []

    mat = getMatInter(training, reduit=False)

    for c in mat:
        for c2 in mat[c]:
            for tdiff in mat[c][c2]:
                s = sum(mat[c][c2][tdiff])
                f = mat[c][c2][tdiff][1] / s

                p = H(tdiff, 0, beta[c,c2], nbDistSample)
                if betaIC is not None:
                    if c==c2:
                        pIC = H(tdiff, 0, betaIC[c, c2], nbDistSample)

                    else:
                        pIC = probsBLC[c][c2]
                else:
                    pIC = -1
                if betaHawkes is not None:
                    pHawkes = np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*tdiff)
                else:
                    pHawkes = -1
                pBL = probsBL[c]
                try:
                    pBLC = probsBLC[c][c2]
                except:
                    pBLC = 0
                if betaIMMSBM is not None:
                    pIMMSBM = betaIMMSBM[c,c2,c]
                else:
                    pIMMSBM = 0
                if betaCoC is not None:
                    pCoC = betaCoC[c,c2,int(tdiff)]
                else:
                    pCoC = 0
                pRand = random.random()

                tabP.append(p)
                tabPIC.append(pIC)
                tabPHawkes.append(pHawkes)
                tabPBL.append(pBL)
                tabPBLC.append(pBLC)
                tabPIMMSBM.append(pIMMSBM)
                tabPCoC.append(pCoC)
                tabPRand.append(pRand)
                tabF.append(f)
                tabW.append(s)
                tabKeys.append((c,c2,tdiff))

    tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabF, tabW, tabKeys = np.array(tabP), np.array(tabPIC), np.array(tabPHawkes), np.array(tabPBL), np.array(tabPBLC), np.array(tabPIMMSBM), np.array(tabPCoC), np.array(tabPRand), np.array(tabF), np.array(tabW), np.array(tabKeys)

    return tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabF, tabW, tabKeys
Exemplo n.º 3
0
def likelihoodFromMatrix(obs, usri, alphai, lgStep, N, nbDistSample=1):
    mat = getMatInter(obs, lgStep, usri, reduit=False)
    L = 0
    for c2 in mat:
        for dt in mat[c2]:
            #L += logF(dt, 0, alphai[c2], nbDistSample) * mat[c2][dt][1]
            #L += logS(dt, 0, alphai[c2], nbDistSample) * mat[c2][dt][0]

            L += cp.log(H(dt, 0, alphai[c2], nbDistSample)) * mat[c2][dt][1]
            L += cp.log(1. -
                        H(dt, 0, alphai[c2], nbDistSample)) * mat[c2][dt][0]

    return L
Exemplo n.º 4
0
def plotDistanceInteraction(fileName, typeInfs, nbData):
    tabHMGlob = []
    nameKeys = []
    nbDistSample=0
    for fold in range(5):
        print(fileName, typeInfs, nbData, fold)
        training, test, usrToInt, beta, betaIC, betaTrue = getData(fileName, typeInfs, nbData, fold)
        print(usrToInt)
        lgStep = 1
        if fileName == "PD":
            lgStep=1.01
        mat = getMatInter(training, lgStep=lgStep)
        P0 = getCntFreq(training)
        nbDistSample = len(beta[0,0])
        tabHM=[]
        nameKeys = []
        for c in range(len(beta)):
            for c2 in range(len(beta[c])):
                tabT = []
                rng = range(1, len(beta[c,c2]))
                if fileName=="PD":
                    rng = range(1, 11)
                for dt in rng:
                    tabT.append(H(dt, 0, beta[c,c2], nbDistSample=nbDistSample) - P0[c])
                tabHM.append(tabT)
                nameKeys.append(str(c)+"-"+str(c2))



        tabHMGlob.append(tabHM)

        xtickslabels = list(range(1, nbDistSample+1))
    tabHMGlob = np.array(tabHMGlob)
    tabHMGlob = tabHMGlob.mean(axis=0)
    maxAbsAmp = 0.2
    maxAbsAmp = np.min([np.max([np.max(tabHMGlob), -np.min(tabHMGlob)]), 0.2])
    ax = sns.heatmap(tabHMGlob, xticklabels=xtickslabels, yticklabels=nameKeys, cmap="RdBu_r", linewidths=.5, vmin=-maxAbsAmp, vmax=maxAbsAmp, cbar_kws={'label': r"$P_{ij}(\Delta t) - P_0$"}) #
    ax.vlines([0, nbDistSample], *ax.get_ylim())
    ax.hlines([0, len(beta)**2], *ax.get_xlim())
    ax.hlines([i*len(beta) for i in range(len(beta))], *ax.get_xlim())
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
    plt.xlabel(r"$\Delta$t", fontsize=18)
    plt.ylabel("Information pairs", fontsize=18)
    plt.tight_layout()
    plt.savefig("Misc/"+fileName+"/"+"DistanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".png")
    plt.savefig("Misc/"+fileName+"/"+"DistanceInteraction_"+fileName+"_"+typeInfs+"_"+str(nbData)+".pdf", dpi=600)
    #plt.show()
    plt.close()
Exemplo n.º 5
0
def getGlobRes():
    runAll=True
    listTypeInfsInteressants = [
                                ("Synth", "20", 20000),
                                ("Synth", "5", 20000),
                                ("Ads", "Ads2", 1e6),
                                ("PD", "All", 300000),
                                ("Twitter", "URL", 1e6),]

    fileTree = getFileTree(listTypeInfsInteressants=listTypeInfsInteressants)




    for fileName in fileTree:
        if fileName!="Twitter":
            pass
            #continue
        for typeInfs in fileTree[fileName]:
            if typeInfs!="URL":
                pass
                #continue

            tabAllArr, tabAllGen = [], []
            for fold in fileTree[fileName][typeInfs]:
                tabNamesMet, tabNamesCal, tabNamesDist=[], [], []
                tabNs, tabF1s, tabAUCROCs, tabAUCPRs, tabL1s, tabPearsons, tabAccs, tabMSEs, tabJSs, tabBriers, tabCrossEntropys = [], [], [], [], [], [], [], [], [], [], []

                for i, nbData in enumerate(fileTree[fileName][typeInfs][fold]):
                    if nbData!=1e6:
                        pass
                        #continue

                    nbDistSample = len(np.load("Output/"+fileName+"/"+fileName+"_"+typeInfs+"_"+str(nbData)+"_"+str(fold) + "_Fit_beta.npy")[-1][-1])
                    if runAll:
                        GetResults.results(fileName, typeInfs, nbData, fold, nbDistSample)

                    training, test, usrToInt, beta, betaIC, betaHawkes, betaCoC, betaTrue = GetResults.getData(fileName, typeInfs, nbData, fold)
                    nom = "Output/"+fileName+"/" + fileName+"_"+typeInfs+"_"+str(nbData)+"_"+str(fold)
                    print(nom)
                    N_inter = len(training)


                    seeDistrib=False
                    if seeDistrib:
                        plt.close()
                        from LogS import H, HGen
                        lgStep = 1
                        if fileName=="PD":
                            lgStep=1.01
                        obs = training
                        dicTemp = getMatInter(obs, lgStep, reduit=False)
                        cntFreq = getCntFreq(training)
                        betaMMSBM = getBetaIMMSBM(nom)
                        training, test, usrToInt, beta, betaIC, betaHawkes, betaCoC, betaTrue = getData(fileName, typeInfs, nbData, fold)

                        print(usrToInt)
                        nbInfs = len(beta)
                        for c in dicTemp:
                            for c2 in dicTemp[c]:
                                if c!=0 or c2 !=3:
                                    continue
                                s=0
                                maxN = 0
                                for dt in dicTemp[c][c2]:
                                    if sum(dicTemp[c][c2][dt])>maxN:
                                        maxN=sum(dicTemp[c][c2][dt])
                                for dt in dicTemp[c][c2]:
                                    s+=sum(dicTemp[c][c2][dt])
                                    r = dicTemp[c][c2][dt][1] / (sum(dicTemp[c][c2][dt])+1e-20)
                                    plt.bar(dt, r, width=.5, color="orange")#, alpha=sum(dicTemp[c][c2][dt])/maxN)

                                #sm = plt.cm.ScalarMappable(cmap=plt.cm.Oranges, norm=plt.Normalize(0, maxN))
                                #sm.set_array([])
                                #cbar = plt.colorbar(sm)
                                #cbar.set_label('Number of observations', rotation=270, labelpad=15)

                                print(c, c2, betaHawkes[c][c2], s)

                                a=np.linspace(1, max(dicTemp[c][c2]), 10000)
                                arrH = np.array([H(a_val, 0, beta[c, c2], nbDistSample=nbDistSample) for a_val in a])
                                arrHIC = np.array([H(a_val, 0, betaIC[c, c2], nbDistSample=nbDistSample) for a_val in a])
                                arrHHawkes = np.array([np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*a_val) for a_val in a])
                                plt.plot(a, arrH, "b", label="IR-RBF")
                                #plt.plot(a, arrHHawkes, "y", label="IR-EXP")
                                #plt.plot(a, [betaMMSBM[c][c2][nbInfs+1] for i in range(len(a))], "r", label="IMMSBM")
                                #plt.plot(a, [cntFreq[c] for i in range(len(a))], "g", label="Naive")

                                try:
                                    arrHTrue = np.array([HGen(a_val, 0, betaTrue[c, c2], nbDistSample=nbDistSample) for a_val in a])
                                    plt.plot(a, arrHTrue+cntFreq[c], "c", label="True")
                                except Exception as e: pass

                                plt.ylim([0,1])
                                plt.legend()
                                plt.xlabel("Time separation", fontsize=18)
                                plt.ylabel("Probability of contamination", fontsize=18)
                                plt.rcParams['pdf.fonttype'] = 42
                                plt.rcParams['font.family'] = 'Calibri'
                                plt.tight_layout()
                                #plt.savefig("Misc/ExDist_SocialMedia_N=350000_Flickr-Flickr.pdf", dpi=600)
                                plt.show()

                        pause()

                    tabModelesMet, tabF1, tabROCAUC, tabPRAUC, tabAcc = loadMetrics(nom)
                    tabModelesCal, tabL1, tabPearson = loadCalib(nom)
                    tabModelesDist, tabJS, tabBrierScore, tabCrossEntropy = loadMetricsDist(nom)
                    try: tabModelesErr, tabMAE, tabMSE = loadErrs(nom)
                    except: tabModelesErr, tabMAE, tabMSE = np.array(tabModelesMet), np.array([0 for i in range(len(tabModelesMet))]), np.array([0 for i in range(len(tabModelesMet))])

                    tabNamesMet=tabModelesMet
                    tabNamesCal=tabModelesCal
                    tabNamesDist=tabModelesDist
                    tabNamesErr=tabModelesErr

                    tabF1s.append(tabF1)
                    tabL1s.append(tabL1)
                    tabAUCROCs.append(tabROCAUC)
                    tabAUCPRs.append(tabPRAUC)
                    tabPearsons.append(tabPearson)
                    tabAccs.append(tabAcc)
                    tabMSEs.append(tabMSE)
                    tabJSs.append(tabJS)
                    tabBriers.append(tabBrierScore)
                    tabCrossEntropys.append(tabCrossEntropy)
                    tabNs.append([N_inter for _ in range(len(tabF1))])


                tabNs, tabF1s, tabL1s, tabAUCROCs, tabAUCPRs, tabPearsons, tabAccs, tabMSEs, tabNamesMet, tabNamesCal, tabNamesErr = toNp(([tabNs, tabF1s, tabL1s, tabAUCROCs, tabAUCPRs, tabPearsons, tabAccs, tabMSEs, tabNamesMet, tabNamesCal, tabNamesErr]))
                tabJSs, tabBriers, tabCrossEntropys = toNp([tabJSs, tabBriers, tabCrossEntropys])

                tabArrs = [tabF1s, tabL1s, tabAUCROCs, tabAUCPRs, tabPearsons, tabAccs, tabMSEs, tabJSs, tabBriers, tabCrossEntropys, tabNs]
                tabGen = [tabNamesMet, tabNamesCal, tabNamesDist, tabNamesErr]
                #plotRecap(tabGen, tabArrs, fileName, typeInfs, fold, tabStds=None)
                tabAllArr.append(tabArrs)
                tabAllGen.append(tabGen)

            tabAllGen = np.array(tabAllGen)
            tabAllArr = np.array(tabAllArr)
            tabGen = tabAllGen[-1]

            tabNamesMet, tabNamesCal, tabNamesErr = tabGen[0], tabGen[1], tabGen[2]

            tabAvgF1, tabStdF1 = np.average(tabAllArr[:, 0], axis=0), np.std(tabAllArr[:, 0], axis=0)
            tabAvgL1, tabStdL1 = np.average(tabAllArr[:, 1], axis=0), np.std(tabAllArr[:, 1], axis=0)
            tabAvgAUCROC, tabStdAUCROC = np.average(tabAllArr[:, 2], axis=0), np.std(tabAllArr[:, 2], axis=0)
            tabAvgAUCPR, tabStdAUCPR = np.average(tabAllArr[:, 3], axis=0), np.std(tabAllArr[:, 3], axis=0)
            tabAvgPearson, tabStdPearson = np.average(tabAllArr[:, 4], axis=0), np.std(tabAllArr[:, 4], axis=0)
            tabAvgAcc, tabStdAcc = np.average(tabAllArr[:, 5], axis=0), np.std(tabAllArr[:, 5], axis=0)
            tabAvgMSE, tabStdMSE = np.average(tabAllArr[:, 6], axis=0), np.std(tabAllArr[:, 6], axis=0)
            tabAvgJS, tabStdJS = np.average(tabAllArr[:, 7], axis=0), np.std(tabAllArr[:, 7], axis=0)
            tabAvgBriers, tabStdBriers = np.average(tabAllArr[:, 8], axis=0), np.std(tabAllArr[:, 8], axis=0)
            tabAvgCrossEntropy, tabStdMSECrossEntropy = np.average(tabAllArr[:, 9], axis=0), np.std(tabAllArr[:, 9], axis=0)
            tabAvgN, tabStdN = np.average(tabAllArr[:, 10], axis=0), np.std(tabAllArr[:, 10], axis=0)

            tabGen = [tabNamesMet, tabNamesCal, tabNamesDist, tabNamesErr]
            tabArrs = [tabAvgF1, tabAvgL1, tabAvgAUCROC, tabAvgAUCPR, tabAvgPearson, tabAvgAcc, tabAvgMSE, tabAvgJS, tabAvgBriers, tabAvgCrossEntropy, tabAvgN]
            tabStds = [tabStdF1, tabStdL1, tabStdAUCROC, tabStdAUCPR, tabStdPearson, tabStdAcc, tabStdMSE, tabStdJS, tabStdBriers, tabStdMSECrossEntropy, tabStdN]

            plotRecap(tabGen, tabArrs, fileName, typeInfs, fold="", tabStds=tabStds)


    print(fileTree)
Exemplo n.º 6
0
def getMetricsDist(training, test, beta, betaIC, betaHawkes, betaIMMSBM, betaCoC, nbDistSample, lgStep, cntFreq, nom, save=False):
    nbInfs = len(beta)
    distTest = getMatInter(test, reduit=False)
    probsBL = getProbsBL(training)
    probsBLC = getProbsBLC(training)
    tabFreqs, tabProbs, tabProbsIC, tabProbsHawkes, tabProbsBL, tabProbsBLC, tabProbsIMMSBM, tabProbsCoC, tabProbsRand = [], [], [], [], [], [], [], [], []
    dist, distIC, distHawkes, distBL, distBLC, distIMMSBM, distCoC, distRand = {}, {}, {}, {}, {}, {}, {}, {}
    crossEntropy, crossEntropyIC, crossEntropyHawkes, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyCoC, crossEntropyRand, div = 0, 0, 0, 0, 0, 0, 0, 0, 0
    for c in distTest:
        if c not in dist: dist[c] = {}
        if c not in distIC: distIC[c] = {}
        if c not in distHawkes: distHawkes[c] = {}
        if c not in distBL: distBL[c] = {}
        if c not in distBLC: distBLC[c] = {}
        if c not in distRand: distRand[c] = {}
        if c not in distIMMSBM: distIMMSBM[c] = {}
        if c not in distCoC: distCoC[c] = {}
        for c2 in distTest[c]:
            if c2 not in dist[c]: dist[c][c2]={}
            if c2 not in distIC[c]: distIC[c][c2]={}
            if c2 not in distHawkes[c]: distHawkes[c][c2]={}
            if c2 not in distBL[c]: distBL[c][c2]={}
            if c2 not in distBLC[c]: distBLC[c][c2]={}
            if c2 not in distIMMSBM[c]: distIMMSBM[c][c2]={}
            if c2 not in distCoC[c]: distCoC[c][c2]={}
            if c2 not in distRand[c]: distRand[c][c2]={}
            for dt in distTest[c][c2]:
                if dt==1:
                    continue

                dist[c][c2][dt] = H(dt, 0, beta[c][c2], nbDistSample=nbDistSample)
                try:
                    if c==c2:
                        distIC[c][c2][dt] = H(dt, 0, betaIC[c][c2], nbDistSample=nbDistSample)
                    else:
                        distIC[c][c2][dt] = probsBL[c]
                except: distIC[c][c2][dt] = 0
                try: distHawkes[c][c2][dt] = np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*dt)
                except: distHawkes[c][c2][dt] = 0
                distBL[c][c2][dt] = probsBL[c]
                try: distBLC[c][c2][dt] = probsBLC[c][c2]
                except: distBLC[c][c2][dt] = 0
                try: distIMMSBM[c][c2][dt] = betaIMMSBM[c,c2,nbInfs+1]#/(betaIMMSBM[c,c2,c+nbInfs]+betaIMMSBM[c,c2,c])
                except: distIMMSBM[c][c2][dt] = 0
                try: distCoC[c][c2][dt] = betaCoC[c,c2,int(dt)]#/(betaCoC[c,c2,c+nbInfs]+betaCoC[c,c2,c])
                except: distCoC[c][c2][dt] = 0
                distRand[c][c2][dt] = random.random()
                ptest = distTest[c][c2][dt][1] / (sum(distTest[c][c2][dt]))
                
                tabFreqs.append(ptest)
                tabProbs.append(dist[c][c2][dt])
                tabProbsIC.append(distIC[c][c2][dt])
                tabProbsHawkes.append(distHawkes[c][c2][dt])
                tabProbsBL.append(distBL[c][c2][dt])
                tabProbsBLC.append(distBLC[c][c2][dt])
                tabProbsIMMSBM.append(distIMMSBM[c][c2][dt])
                tabProbsCoC.append(distCoC[c][c2][dt])
                tabProbsRand.append(distRand[c][c2][dt])

                if False:
                    crossEntropy += distTest[c][c2][dt][1] * dist[c][c2][dt]
                    crossEntropy += distTest[c][c2][dt][0] * (1. - dist[c][c2][dt])

                    crossEntropyIC += distTest[c][c2][dt][1] * distIC[c][c2][dt]
                    crossEntropyIC += distTest[c][c2][dt][0] * (1. - distIC[c][c2][dt])

                    crossEntropyBL += distTest[c][c2][dt][1] * distBL[c][c2][dt]
                    crossEntropyBL += distTest[c][c2][dt][0] * (1. - distBL[c][c2][dt])

                    crossEntropyBLC += distTest[c][c2][dt][1] * distBLC[c][c2][dt]
                    crossEntropyBLC += distTest[c][c2][dt][0] * (1. - distBLC[c][c2][dt])

                    crossEntropyIMMSBM += distTest[c][c2][dt][1] * distIMMSBM[c][c2][dt]
                    crossEntropyIMMSBM += distTest[c][c2][dt][0] * (1. - distIMMSBM[c][c2][dt])

                    crossEntropyCoC += distTest[c][c2][dt][1] * distCoC[c][c2][dt]
                    crossEntropyCoC += distTest[c][c2][dt][0] * (1. - distCoC[c][c2][dt])

                    crossEntropyRand += distTest[c][c2][dt][1] * distRand[c][c2][dt]
                    crossEntropyRand += distTest[c][c2][dt][0] * (1. - distRand[c][c2][dt])

                if True:
                    div += 1

                    if dist[c][c2][dt] >=1: dist[c][c2][dt]=1
                    elif dist[c][c2][dt]<=0: dist[c][c2][dt]=0
                    crossEntropy += distTest[c][c2][dt][1] * np.log2(1e-10+ dist[c][c2][dt])
                    crossEntropy += distTest[c][c2][dt][0] * np.log2(1e-10+ 1 - dist[c][c2][dt])

                    if distIC[c][c2][dt] >1: distIC[c][c2][dt]=1
                    elif distIC[c][c2][dt]<0: distIC[c][c2][dt]=0
                    crossEntropyIC += distTest[c][c2][dt][1] * np.log2(1e-10+ distIC[c][c2][dt])
                    crossEntropyIC += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distIC[c][c2][dt])
                    
                    if distHawkes[c][c2][dt] >1: distHawkes[c][c2][dt]=1
                    elif distHawkes[c][c2][dt]<0: distHawkes[c][c2][dt]=0
                    crossEntropyHawkes += distTest[c][c2][dt][1] * np.log2(1e-10+ distHawkes[c][c2][dt])
                    crossEntropyHawkes += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distHawkes[c][c2][dt])

                    crossEntropyBL += distTest[c][c2][dt][1] * np.log2(1e-10+ distBL[c][c2][dt])
                    crossEntropyBL += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distBL[c][c2][dt])

                    crossEntropyBLC += distTest[c][c2][dt][1] * np.log2(1e-10+ distBLC[c][c2][dt])
                    crossEntropyBLC += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distBLC[c][c2][dt])

                    crossEntropyIMMSBM += distTest[c][c2][dt][1] * np.log2(1e-10+ distIMMSBM[c][c2][dt])
                    crossEntropyIMMSBM += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distIMMSBM[c][c2][dt])

                    crossEntropyCoC += distTest[c][c2][dt][1] * np.log2(1e-10+ distCoC[c][c2][dt])
                    crossEntropyCoC += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distCoC[c][c2][dt])

                    crossEntropyRand += distTest[c][c2][dt][1] * np.log2(1e-10+ distRand[c][c2][dt])
                    crossEntropyRand += distTest[c][c2][dt][0] * np.log2(1e-10+ 1. - distRand[c][c2][dt])

    if div==0: div=1e-10
    crossEntropy, crossEntropyIC, crossEntropyHawkes, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyCoC, crossEntropyRand = crossEntropy/div, crossEntropyIC/div, crossEntropyHawkes/div, crossEntropyBL/div, crossEntropyBLC/div, crossEntropyIMMSBM/div, crossEntropyCoC/div, crossEntropyRand/div
    tabFreqs, tabProbs, tabProbsIC, tabProbsHawkes, tabProbsBL, tabProbsBLC, tabProbsIMMSBM, tabProbsCoC, tabProbsRand = toNp([tabFreqs, tabProbs, tabProbsIC, tabProbsHawkes, tabProbsBL, tabProbsBLC, tabProbsIMMSBM, tabProbsCoC, tabProbsRand])


    if True:
        M = (tabFreqs + tabProbs) / 2 +1e-20
        JS = np.mean(0.5 * tabFreqs*np.log2(tabFreqs/M + 1e-20) + 0.5 * tabProbs*np.log2(tabProbs/M + 1e-20))

        M = (tabFreqs + tabProbsIC) / 2 +1e-20
        JSIC = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsIC * np.log2(tabProbsIC / M + 1e-20))

        M = (tabFreqs + tabProbsHawkes) / 2 +1e-20
        JSHawkes = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsHawkes * np.log2(tabProbsHawkes / M + 1e-20))

        M = (tabFreqs + tabProbsBL) / 2 +1e-20
        JSBL = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsBL * np.log2(tabProbsBL / M + 1e-20))

        M = (tabFreqs + tabProbsBLC) / 2 +1e-20
        JSBLC = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsBLC * np.log2(tabProbsBLC / M + 1e-20))

        M = (tabFreqs + tabProbsIMMSBM) / 2 +1e-20
        JSIMMSBM = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsIMMSBM * np.log2(tabProbsIMMSBM / M + 1e-20))

        M = (tabFreqs + tabProbsCoC) / 2 +1e-20
        JSCoC = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsCoC * np.log2(tabProbsCoC / M + 1e-20))

        M = (tabFreqs + tabProbsRand) / 2 +1e-20
        JSRand = np.mean(0.5 * tabFreqs * np.log2(tabFreqs / M + 1e-20) + 0.5 * tabProbsRand * np.log2(tabProbsRand / M + 1e-20))


    if False:
        brierScore = np.mean((tabFreqs-tabProbs)**2)
        brierScoreIC = np.mean((tabFreqs-tabProbsIC)**2)
        brierScoreBL = np.mean((tabFreqs-tabProbsBL)**2)
        brierScoreBLC = np.mean((tabFreqs-tabProbsBLC)**2)
        brierScoreIMMSBM = np.mean((tabFreqs-tabProbsIMMSBM)**2)
        brierScoreCoC = np.mean((tabFreqs-tabProbsCoC)**2)
        brierScoreRand = np.mean((tabFreqs-tabProbsRand)**2)

    if True:  # RSS
        brierScore = np.sum((tabFreqs-tabProbs)**2)
        brierScoreIC = np.sum((tabFreqs-tabProbsIC)**2)
        brierScoreHawkes = np.sum((tabFreqs-tabProbsHawkes)**2)
        brierScoreBL = np.sum((tabFreqs-tabProbsBL)**2)
        brierScoreBLC = np.sum((tabFreqs-tabProbsBLC)**2)
        brierScoreIMMSBM = np.sum((tabFreqs-tabProbsIMMSBM)**2)
        brierScoreCoC = np.sum((tabFreqs-tabProbsCoC)**2)
        brierScoreRand = np.sum((tabFreqs-tabProbsRand)**2)

    #print(JS, JSIC, JSBL, JSBLC, JSIMMSBM, JSRand)
    #print(brierScore, brierScoreIC, brierScoreBL, brierScoreBLC, brierScoreIMMSBM, brierScoreRand)
    #print(crossEntropy, crossEntropyIC, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyRand)

    tabJS = [JS, JSIC, JSHawkes, JSBL, JSBLC, JSIMMSBM, JSCoC, JSRand]
    tabBrierScore = [brierScore, brierScoreIC, brierScoreHawkes, brierScoreBL, brierScoreBLC, brierScoreIMMSBM, brierScoreCoC, brierScoreRand]
    tabCrossEntropy = [crossEntropy, crossEntropyIC, crossEntropyHawkes, crossEntropyBL, crossEntropyBLC, crossEntropyIMMSBM, crossEntropyCoC, crossEntropyRand]

    saveMetricsDist(nom, tabJS, tabBrierScore, tabCrossEntropy)
Exemplo n.º 7
0
def getTabsClassic(training, test, beta, betaIC, betaIMMSBM, nbDistSample, lgStep, cntFreq):
    probsBL = getCntFreq(training)
    probsBLC = getProbsBLC(training)
    nbInfs = len(beta)

    iter=0
    tabP, tabPIC, tabPBL, tabPRand, tabPBLC, tabPIMMSBM, tabTrue, tabInfs = [], [], [], [], [], [], [], []
    dicTabsP, dicTabsPIC, dicTabsPBL, dicTabsPBLC, dicTabsPIMMSBM, dicTabsPRand, dicTabsInfs, dicTabsTrue = {}, {}, {}, {}, {}, {}, {}, {}
    for u in test:
        for (c, t, s) in test[u]:
            if t<10:
                continue

            p, pIC, pIMMSBM, pBLC = 1., 1., 1., 1.
            tabPtemp, tabPICtemp=[], []

            for (c2, t2, s2) in test[u]:
                tdiff = t - t2 + lgStep
                if tdiff <= 0 or tdiff>20:
                    continue

                p *= 1-max([H(tdiff, 0, beta[c, c2], nbDistSample)-probsBL[c]*0, 0])
                tabPtemp.append(max([H(tdiff, 0, beta[c, c2], nbDistSample)-probsBL[c], 0]))
                #print(c, c2, s, p)
                if betaIC is not None:
                    #pIC *= 1-H(tdiff, 0, betaIC[c2, c3], nbDistSample)
                    pIC *= 1 - max([H(tdiff, 0, betaIC[c, c2], nbDistSample)-probsBL[c]*0, 0])
                    tabPICtemp.append(max([H(tdiff, 0, betaIC[c, c2], nbDistSample)-probsBL[c], 0]))
                else:
                    pIC *= 1

                if betaIMMSBM is not None:
                    pIMMSBM *= 1. - betaIMMSBM[c,c2,nbInfs]/(betaIMMSBM[c,c2,nbInfs]+betaIMMSBM[c,c2,nbInfs+1])  # Prob de survie
                else:
                    pIMMSBM = 1

                try:
                    pBLC *= 1-probsBLC[c][c2]
                except:
                    pass


            p = max([1. - p, 0])
            p = min([np.sum(tabPtemp)+probsBL[c], 1])
            #p = H(tdiff, 0, beta[c, c2], nbDistSample)+cntFreq[c]
            pIC = max([1. - pIC, 0])
            pIC = min([np.sum(tabPICtemp)+probsBL[c], 1])
            #pIC = H(tdiff, 0, betaIC[c, c2], nbDistSample)+cntFreq[c]
            pBLC = 1. - pBLC
            #pBLC = probsBLC[c][c2]
            pIMMSBM = 1 - pIMMSBM
            pBL = probsBL[c]
            pRand = random.random()

            tabInfs.append(c)
            tabTrue.append(s)
            tabP.append(p)
            tabPIC.append(pIC)
            tabPBL.append(pBL)
            tabPBLC.append(pBLC)
            tabPIMMSBM.append(pIMMSBM)
            tabPRand.append(pRand)


            iter+=1

    tabP, tabPIC, tabPBL, tabPBLC, tabPIMMSBM, tabPRand, tabTrue, tabInfs = np.array(tabP), np.array(tabPIC), np.array(tabPBL), np.array(tabPBLC), np.array(tabPIMMSBM), np.array(tabPRand), np.array(tabTrue), np.array(tabInfs)


    return tabTrue, tabInfs, tabP, tabPIC, tabPBL, tabPBLC, tabPIMMSBM, tabPRand
Exemplo n.º 8
0
def getTabs(training, test, beta, betaIC, betaHawkes, betaIMMSBM, betaCoC, nbDistSample, lgStep, cntFreq):
    probsBL = getCntFreq(training)
    probsBLC = getProbsBLC(training)
    nbInfs = len(beta)

    mat = getMatInter(test, reduit=False)

    tabP, tabPIC, tabPBL, tabPRand, tabPBLC, tabPIMMSBM, tabPCoC, tabPHawkes, tabTrue, tabInfs = [], [], [], [], [], [], [], [], [], []
    for c in mat:
        if c not in probsBL:
            probsBL[c]=0
        for c2 in mat[c]:
            for dt in mat[c][c2]:
                if dt <= 0 or dt>20:
                    continue

                p = H(dt, 0, beta[c, c2], nbDistSample)
                if betaIC is not None:
                    if c==c2:
                        pIC = H(dt, 0, betaIC[c, c2], nbDistSample)
                    else:
                        pIC = probsBL[c]
                else:
                    pIC = 1

                if betaHawkes is not None:
                    pHawkes = np.exp(-betaHawkes[c,c2,0] - betaHawkes[c,c2,1]*dt)
                else:
                    pHawkes = 1

                if betaIMMSBM is not None:
                    pIMMSBM = betaIMMSBM[c,c2,nbInfs+1]  # Prob de survie
                else:
                    pIMMSBM = 1
                    
                if betaCoC is not None:
                    pCoC = betaCoC[c,c2,int(dt)]  # Prob de survie
                else:
                    pCoC = 1

                try:
                    pBLC = probsBLC[c][c2]
                except:
                    pBLC = 1

                pBL = probsBL[c]
                pRand = random.random()
                ptest = mat[c][c2][dt][1]/(sum(mat[c][c2][dt])+1e-20)

                tabTrue.append(ptest)
                tabInfs.append(sum(mat[c][c2][dt]))
                tabP.append(p)
                tabPIC.append(pIC)
                tabPHawkes.append(pHawkes)
                tabPBL.append(pBL)
                tabPBLC.append(pBLC)
                tabPIMMSBM.append(pIMMSBM)
                tabPCoC.append(pCoC)
                tabPRand.append(pRand)


    tabP, tabPIC, tabPHawkes, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPRand, tabTrue, tabInfs = np.array(tabP), np.array(tabPIC), np.array(tabPHawkes), np.array(tabPBL), np.array(tabPBLC), np.array(tabPIMMSBM), np.array(tabPCoC), np.array(tabPRand), np.array(tabTrue), np.array(tabInfs)

    return tabTrue, tabInfs, tabP, tabPIC, tabPBL, tabPBLC, tabPIMMSBM, tabPCoC, tabPHawkes, tabPRand