示例#1
0
def agreementRates(clf,valLabels,uLabels,plot=False):
    '''
    This function gets a classifiers dictionary generated from clfsEval
    and calculates the agreement of the classifiers and
    generate a list of accuracy, f1_scores and other performance metrics
    so that we can figure out the thold that can maximize performance
    '''
    
    clfsLabels=[i for i in clf['pred_val']]
    clfsLabels=np.array(clfsLabels)
    
    agmnt=[]
    mvLabels=[]
    
    #Calculation of the majority vote labels
    for i in range(len(clfsLabels[1])):
        temp=clfsLabels[:,i]
        mx=len(temp)
        cts=dS.counts(temp)
        oCts=sorted(cts['counts'])
        temp=dS.expSmooth(oCts,0.95)/mx
        agmnt.append(temp)
        
        
        tmax=max(cts['counts'])
        tind=np.argwhere(np.array(cts['counts'])==tmax)
        
        #Check whether there is a tie in the number of counts
        #if there is one simply throw the dies
        if len(tind)==1:
            tind=np.argmax(cts['counts'])
            mvLabels.append(cts['vals'][tind])
        else:
            tind=np.random.permutation(tind)[0]
            mvLabels.append(cts['vals'][tind])
            
        
    sAgmnt=np.sort(np.unique(agmnt))
    clf['agmntLevels']=sAgmnt
    
    #In this section is calculated the f1_score for the ground truth and
    #the predicted values filtering by the agreement rate
    f1_scores=[]
    for i in sAgmnt:
        inds2=np.argwhere(agmnt>=i)
        temp=[]
        for cnum in range(len(clf['pred_val'])):
            try:
                temp.append(f1_score(valLabels[inds2],clf['pred_val'][cnum][inds2],labels=uLabels))
            except:
                print('problem at funcs agreement rates')
#             print(f1_score(valLabels[inds2],clf['pred_val'][cnum][inds2],labels=uLabels))
        f1_scores.append(temp)
    if plot==True:
        plt.plot(sAgmnt,f1_scores)
        plt.show()
    clf['f1_score_val_predval_agmnt']=f1_scores
    
    return mvLabels,agmnt
示例#2
0
def majorityVote(clfs,testingData,ignore=[]):
    '''
    This function takes a set of previously built classifiers and calculates the
    majority vote for a given testing data set as well some performance measures related with
    the majority vote itself
    ignore is a list of the indexes of the classifiers to be ignored
    '''
    votes=[]
    mV=[]
    agmnt=[]
    
    for i in range(len(clfs)):
        if i not in ignore:
            temp=clfs[i]['classifier'][0].predict(testingData)
            votes.append(temp)
    votes=np.array(votes)
    for i in range(np.shape(votes)[1]):
        temp=votes[:,i]
        mx=len(temp)
        cts=dS.counts(temp)
        oCts=sorted(cts['counts'])
        temp=dS.expSmooth(oCts,0.95)/mx
        agmnt.append(temp)
        
        tmax=max(cts['counts'])
        tind=np.argwhere(np.array(cts['counts'])==tmax)
        
        #Check whether there is a tie in the number of counts
        #if there is one simply throw the dies
        if len(tind)==1:
            tind=np.argmax(cts['counts'])
            mV.append(cts['vals'][tind])
        else:
            tind=np.random.permutation(tind)[0]
            mV.append(cts['vals'][tind])
            #Check the agreement and majority vote values
            #I should be done with this section if it is
            #working correctly
            
    return mV,agmnt,votes