Пример #1
0
def evaluateSpecificDataSet(eng):
    # evaluate data set
    dRectWnd = 2.0
    dSMWnd = 2.0
    dSCWnd = 0.15
    strCoder = ecc.CODER_RS
    m = 4
    n = 2**m-1
    k = 1
    r = (n-k)/2
    nInterleaving = 25
    print "%s: n=%d, k=%d, m=%d, r=%d, interleave=%d" % \
            (strCoder, n, k, m, r, nInterleaving)
    
    strWorkingDir = "../../data/evaluation/selected_set/"
    lsFilePath = cf.getFileList(strWorkingDir, None)
    
    strKeyOutput = "keys"
    with open(strWorkingDir+strKeyOutput, 'w+') as hFile:

        srMean, srStd, dfDetailed = evaluateDataSet('selected', 
                                         strWorkingDir, lsFilePath,
                                         dRectWnd, dSMWnd, dSCWnd,
                                         eng, strCoder, n, k, m, r,
                                         nInterleaving, bOutputData=False,
                                         bReconciliation=False,
                                         hKeyOutput=hFile)
    return srMean, srStd, dfDetailed
Пример #2
0
def evaluateMutualInformation():
    """
       scatter plot & mutual information 
    """
    strWorkingDir = "../../data/evaluation/mutual_info/"
    lsFilePath = cf.getFileList(strWorkingDir, None)

    lsOutputData = []
    srMean, srStd, dfDetailed = evaluateDataSet('selected', 
                                         strWorkingDir, lsFilePath,
                                         bSourceEncoding = True,
                                         bReconciliation = False,
                                         bOutputData=True,
                                         lsOutputData=lsOutputData)
    dfData = pd.concat(lsOutputData, axis=0)
    return dfData, lsOutputData
Пример #3
0
def evaluateGesture(eng):
    """
        This function evaluates the performance of different gestures.
    """
    # parameters
    dRectWnd = 2.0
    dSMWnd = 2.0
    dSCWnd = 0.1
    strCoder = ecc.CODER_GOLAY
    m = 1
    n = 23
    k = 12
    r = 3
    nInterleaving = 25
    print "%s: n=%d, k=%d, m=%d, r=%d, interleave=%d" % \
            (strCoder, n, k, m, r, nInterleaving)
    
    lsResult = []
    strWorkingDir = "../../data/evaluation/gesture/"
    for strLabel in ['g1', 'g2', 'g3']:
        strFileNamePattern= strLabel
        lsFilePath = cf.getFileList(strWorkingDir, strFileNamePattern)
        if (len(lsFilePath) != 0 ):
            srMean, srStd, dfDetailed = evaluateDataSet(strLabel, 
                                                 strWorkingDir, lsFilePath,
                                                 dRectWnd, dSMWnd, dSCWnd,
                                                 eng, strCoder, n, k, m, r,
                                                 nInterleaving)
                                                 
            lsResult.append(srMean)
            
            # print out rotten apples
            dfSelected = dfDetailed[dfDetailed[sd.BER_USER_SRC]>=0.1]
            if(dfSelected.shape[0] != 0):
                print "--records with high BER--"
                print dfSelected[\
                    [sd.FILE_NAME, sd.BER_USER_SRC, sd.BER_USER_EC]]
                print "----\n"
                
    dfSummary = pd.concat(lsResult, axis=1)
    return dfSummary
Пример #4
0
def evaluateDistance(eng):
    """
        This function evaluate the effect of distance btw A and B
    """
    # parameters
    dRectWnd = 2.0
    dSMWnd = 2.0
    dSCWnd = 0.1
    strCoder = ecc.CODER_GOLAY
    m = 1    
    n = 23
    k = 12
    r = int(math.floor((n-k)/2.0) )
    nInterleaving = 25
    print "%s: n=%d, k=%d, m=%d, r=%d, interleave=%d" % \
            (strCoder, n, k, m, r, nInterleaving)
    
    lsResult = []
    strWorkingDir = "../../data/evaluation/distance/"
    for strLabel in ['d1', 'd2', 'd3']:
        strFileNamePattern= strLabel
        lsFilePath = cf.getFileList(strWorkingDir, strFileNamePattern)
        
        srMean, srStd, dfDetailed = evaluateDataSet(strLabel, 
                                             strWorkingDir, lsFilePath,
                                             dRectWnd, dSMWnd, dSCWnd,
                                             eng, strCoder, n, k, m, r,
                                             nInterleaving)
        lsResult.append(srMean)
        
        # print out rotten apples
        dfSelected = dfDetailed[dfDetailed[sd.BER_USER_SRC]>=0.1]
        if(dfSelected.shape[0] != 0):
            print "--records with high BER--"
            print dfSelected[\
                [sd.FILE_NAME, sd.BER_USER_SRC, sd.BER_USER_EC]]
            print "----\n"
                
    dfSummary = pd.concat(lsResult, axis=1)
    return dfSummary
Пример #5
0
def evaluateShapeCodingParams(eng):
    """
        This function evaluate the parameter of shape coding
    """
    # select data
    strWorkingDir = "../../data/evaluation/BER/"
    strFileNamePattern= None
    lsFilePath = cf.getFileList(strWorkingDir, strFileNamePattern)
    
    # params
    lsSCWnd = np.arange(0.05, 0.3, 0.05)
    dRectWnd = 2.0
    dSMWnd = 2.0
    strCoder = ecc.CODER_GOLAY
    m = 1
    n = 23
    k = 12
    r = 2
    nInterleaving = 25
    print "%s: n=%d, k=%d, m=%d, r=%d, interleave=%d" % \
            (strCoder, n, k, m, r, nInterleaving)
    
    # test
    lsResult = []
    for dCodingWnd in lsSCWnd:
        print "evalauting SCWnd=%.2f..." % dCodingWnd
        for fn in lsFilePath:
            lsDataResult = sd.evaluateSingleData(strWorkingDir, fn,
                                 dRectDuration = dRectWnd,
                                 dSMDurction = dSMWnd,
                                 dSCDuration = dCodingWnd,
                                 eng=eng, strCoder=strCoder, 
                                 n=n, k=k, m=m, r=r,
                                 nInterleaving=nInterleaving)
            lsResult.extend(lsDataResult)
    dfResult = pd.DataFrame(lsResult)
    gp = dfResult.groupby(dfResult[sd.WND_SC])
    dfMean = gp.mean()
    return dfMean, dfResult
Пример #6
0
def validateOnSH(strInPath, strOutPath, bSerialize=False):
    '''
        this function validates user diversity on Shanghai data set
        
        param:
                strInPath  - path for separate files of top users
                strOutPath - path to serialize model
        
        Note: we need another script to distributes records of top users into separate files,
              and this function will only read separate files from strInPath
    '''
    # find xdr
    lsXDR = common_function.getFileList(strInPath, "out")
    
    dcVariableImportance = {}           # variable importance of each personal model
    dcModels = {}                       # dict of personal models
    for xdr in lsXDR:
        # load data
        print("processing %s..." % xdr)
        dfData = pd.read_csv(xdr, sep='|', \
                             names= ['BEGIN_TIME','BEGIN_TIME_MSEL','MSISDN','IMSI','SERVER_IP',\
                                     'SERVER_PORT','APN','PROT_CATEGORY','PROT_TYPE','LAC','SAC',\
                                     'CI','IMEI','RAT','HOST','STREAMING_URL','STREAMING_FILESIZE',\
                                     'STREAMING_DW_PACKETS','STREAMING_DOWNLOAD_DELAY','ASSOCIATED_ID',\
                                     'L4_UL_THROUGHPUT','L4_DW_THROUGHPUT', 'use_less'] )
        del dfData['use_less']
        dfData['DOWNLOAD_RATIO'] = dfData.iloc[:,17]*1.0/dfData.iloc[:,16]
        
        strIMSI = xdr.split('/')[-1].split('.')[0]
        
        # prepare data set
        mtX, arrY, lsTrainingFeatureNames = preprocessDataSet(dfData, g_lsSelectedColumns, \
                                                                                g_lsNumericColumns, \
                                                                                g_lsCategoricalColumns,\
                                                                                'DOWNLOAD_RATIO')
        
#         # train model
#         model = trainModel(mtX, arrY, g_modelParams)
#         dcVariableImportance[strIMSI] = getVariableImportance(model, lsTrainingFeatureNames)
#          
#         # test
#         mse = mean_squared_error(arrY, model.predict(mtX) )
#         mae = mean_absolute_error(arrY, model.predict(mtX) )
#         print("MSE: %.4f, MAE: %.4f" % (mse, mae) )
        
        # cross validation
        dcPersonalModels = crossValidate(mtX, arrY, g_modelParams, 10)
        bestModel, fBestScore = max(dcPersonalModels.iteritems(), key=operator.itemgetter(1) )
        dcVariableImportance[strIMSI] = getVariableImportance(bestModel, lsTrainingFeatureNames)
        
        dcModels[strIMSI] = (fBestScore, bestModel)

        print("model:%s, #record=%d, best=%0.2f, mean=%.2f, std=%0.2f. \n)" % \
              (strIMSI, len(arrY), fBestScore, np.mean(dcPersonalModels.values() ), np.std(dcPersonalModels.values() ) ) )
    
    dfVariableImportance = pd.DataFrame(dcVariableImportance).T

    # serialize models
    if(bSerialize is True):
        common_function.serialize2File(strOutPath+'serDcModels.out', dcPersonalModels)
        dfVariableImportance.to_csv(strOutPath+'dfVariableImportance_all.out')
        
    return dcModels, dfVariableImportance
Пример #7
0
def evaluateReconciliationParams(eng):
    """
        Evaluate the effect of reconciliation parameters, i.e., m, k, on
        system performance
        
        Paramters:
        ---------
            lsFileList:
                        list of data files
            eng:
                    instance of matlab engine
            strCoder:
                        name of coder
            lsM:
                the possible values of m
            lsR:
                the possible values of r
       Returns:
           a pandas.DataFrame consisted of all performance result
    """
    # select data
    strWorkingDir = "../../data/evaluation/reconciliation/"
    strFileNamePattern= None
    lsFilePath = cf.getFileList(strWorkingDir, strFileNamePattern)
    
    # parameter    
    strCoder = ecc.CODER_RS
    lsM = [4,]
    lsR = range(1, 8)
    dRectWnd = 2.0
    dSMWnd = 2.0
    dSCWnd = 0.15
    
    # evaluate
    lsResult = []
    if (strCoder == ecc.CODER_RS):    
        for m in lsM:
            for r in lsR:
                n = 2**m - 1
                k = n - 2*r
                if(k<1 or n*m>=500):
                    break
                
                print "testing m=%d, r=%d..." % (m, r)
                for fn in lsFilePath:
                    lsDataResult = sd.evaluateSingleData(strWorkingDir, fn,
                        dRectDuration=dRectWnd, dSMDuration=dSMWnd,
                        dSCDuration=dSCWnd,
                        eng=eng, strCoder=strCoder, n=n, k=k, m=m, r=r)
                    lsResult.extend(lsDataResult)
    elif strCoder == ecc.CODER_GOLAY:
        n = 23
        k = 12
        m = 1
        r = 2
        for fn in lsFilePath:
            lsDataResult = sd.evaluateSingleData(strWorkingDir, fn, 
                dRectDuration=dRectWnd, dSMDuration=dSMWnd,
                dSCDuration=dSCWnd,
                eng=eng, strCoder=strCoder, n=n, k=k, m=m, r=r)
            lsResult.extend(lsDataResult)

    # result
    dfResult = pd.DataFrame(lsResult)
    dcMatchingRate = {}
    for r in lsR:
        nMatchedKey = (dfResult[sd.ERR_USER_EC][ (dfResult[sd.R]==r) & \
                        (dfResult[sd.ERR_USER_EC]==0) ]).count()
        nTotalKey = dfResult[sd.ERR_USER_EC][dfResult[sd.R]==r].count()
        dMatchingRate = nMatchedKey * 1.0 / nTotalKey
        dcMatchingRate[r] = dMatchingRate
    srMatchingRate = pd.Series(dcMatchingRate)
        
    return dfSummary, dfResult, srMatchingRate