示例#1
0
def makeModelCounts(
    splits, modelLocation, dataLocation, neighborhoodLocation=None, minBehavior=0, compress=2, splitLength=8
):
    """
    Makes a set of counts for a given dataset and models.  
    
    Neighborhood location specifies if the models and data need to be preclustered.
    
    Returns the datavector and the associated split times.
    """
    files = os.listdir(modelLocation)

    neighborhood = False
    dVector = []
    times = []

    if neighborhoodLocation:
        neighborclusters = ncluster.parse(neighborhoodLocation)
        neighborhood = True

    # Iterate over splits.
    for s in splits:
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")

        tmpDoc = []
        # Loop over all models
        for f in files:
            # It is a data file.
            if f.split(".")[-1] == "dat":
                # Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)

                cd, td = bbdata.getdata(oldSplit, newSplit, comp=compress, sens=fn.sensors, readLocation=dataLocation)

                cd2 = cd
                if neighborhood:
                    local = neighborclusters[str(fn.sensors)]
                    cd2 = ncluster.convertNeighborhood(cd, local)

                cd2 = numpy.array(cd2, ndmin=2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
                tmpDoc += counts

        if len(tmpDoc) >= minBehavior:
            dVector.append(tmpDoc)
            times.append(oldSplit)

        oldSplit = newSplit

    return dVector, times
示例#2
0
        print i
        i += 1
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")

        tmpDoc = []
        #Loop over all models
        for f in files:
            #It is a data file.
            if f.split('.')[-1] == 'dat':
                #Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)

                cd, td = bbdata.getdata(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataLocation)

                local = neighborclusters[str(fn.sensors)]
                cd2 = ncluster.convertNeighborhood(cd, local)
                cd2 = numpy.array(cd2, ndmin=2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                #for each split, make a document matrix and append it to the
                #ongoing tdmatrix
                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
示例#3
0
def makeModelCounts(splits, modelLocation, dataLocation, \
                    neighborhoodLocation = None, minBehavior = 0, \
                    compress = 2, splitLength = 8):
    """
    Makes a set of counts for a given dataset and models.  
    
    Neighborhood location specifies if the models and data need to be preclustered.
    
    Returns the datavector and the associated split times.
    """
    files = os.listdir(modelLocation)
    
    neighborhood = False
    dVector = []
    times = []
    
    if neighborhoodLocation:
        neighborclusters = ncluster.parse(neighborhoodLocation)
        neighborhood = True
        
    #Iterate over splits.
    for s in splits:
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")
        
        tmpDoc = []
        #Loop over all models
        for f in files:
            #It is a data file.
            if f.split('.')[-1] == 'dat':
                #Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)
            
                cd, td = bbdata.getdata(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataLocation)
                
                
                cd2 = cd
                if neighborhood:
                    local = neighborclusters[str(fn.sensors)]
                    cd2 = ncluster.convertNeighborhood(cd, local)
                
                cd2 = numpy.array(cd2, ndmin = 2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
                tmpDoc += counts

        if len(tmpDoc) >= minBehavior:
            dVector.append(tmpDoc)
            times.append(oldSplit)

        oldSplit = newSplit
        
    return dVector, times
示例#4
0
                
                #sData = markov_anneal.splitLocalMax(cd2, td, splitLen)

                #try:
                #    val, counts = analysis.ratio(sData.values(), fn.models)
                #except Exception, e:
                #    counts = [0] * len(fn.models)
                #    val = [0] * len(fn.models)

                #tmpDoc += counts
        
        
        
        cd, td = bbdata.getdata(oldSplit, newSplit, \
                            comp = compress, \
                            sens = bbdata.allSensors, \
                            vDays = validDays, \
                            readLocation = dataDirectory)

        cd2 = bbdata.uncompressData(cd, 50)
        b = numpy.array(cd2)
        b = numpy.sum(b, axis = 0)
        
        tmpDoc = list(b)
                
        #if sum(tmpDoc) >= minBehaviour:
        #    tdMatrix.append(tmpDoc)
        #    print len(tdMatrix)

        firstRun = False
示例#5
0
            fn.matrixToModel(fn.modelList)

            models.append(fn)

    #For each window, go through each sensor block and get lsa projection
    #information.
    while (ct + window < et):
        print ct

        tmpDoc = []

        #Iterate through each model
        for m in models:

            cd, td = bbdata.getdata(ct, ct + window, \
                                comp = compress, \
                                sens = m.sensors,
                                readLocation = dataLocation)

            sData = markov_anneal.splitLocalMax(cd, td, splitLen)

            #for each split, make a document matrix and append it to the
            #ongoing document
            try:
                val, counts = analysis.ratio(sData.values(), m.models)
            except:
                counts = [0] * len(m.models)
                val = [0] * len(m.models)

            tmpDoc += counts

        print tmpDoc
示例#6
0
bestOut = None

if __name__ == "__main__":

    #Generate the data first.
    st = datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S")
    et = datetime.datetime.strptime(et, "%Y-%m-%d %H:%M:%S")

    #Get the sensor blocks
    for i in range(len(sensors)):
        print "Sensors:" + str(sensors[i])

        cd, td = bbdata.getdata(st, et, \
                pStart = periodStart, \
                pEnd = periodEnd, \
                vDays = validDays, \
                comp = compress, \
                sens = sensors[i], 
                readLocation = dataDirectory)
        
        neighborclusters = ncluster.parse(neighborhoodLocation)
        local = neighborclusters[str(sensors[i])]
        cd2 = ncluster.convertNeighborhood(cd, local)
        cd2 = numpy.array(cd2, ndmin = 2)
        cd2 = cd2.T
        
        #obs = 2**len(sensors[i])
        #sData = markov_anneal.splitLocalMax(cd, td, splitLen)

        obs = 9
        
示例#7
0
        #sData = markov_anneal.splitLocalMax(cd2, td, splitLen)

        #try:
        #    val, counts = analysis.ratio(sData.values(), fn.models)
        #except Exception, e:
        #    counts = [0] * len(fn.models)
        #    val = [0] * len(fn.models)

        #tmpDoc += counts



        cd, td = bbdata.getdata(oldSplit, newSplit, \
                            comp = compress, \
                            sens = bbdata.allSensors, \
                            vDays = validDays, \
                            readLocation = dataDirectory)

        cd2 = bbdata.uncompressData(cd, 50)
        b = numpy.array(cd2)
        b = numpy.sum(b, axis=0)

        tmpDoc = list(b)

        #if sum(tmpDoc) >= minBehaviour:
        #    tdMatrix.append(tmpDoc)
        #    print len(tdMatrix)

        firstRun = False
示例#8
0
bestOut = None

if __name__ == "__main__":

    #Generate the data first.
    st = datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S")
    et = datetime.datetime.strptime(et, "%Y-%m-%d %H:%M:%S")

    #Get the sensor blocks
    for i in range(len(sensors)):
        print "Sensors:" + str(sensors[i])

        cd, td = bbdata.getdata(st, et, \
                pStart = periodStart, \
                pEnd = periodEnd, \
                vDays = validDays, \
                comp = compress, \
                sens = sensors[i],
                readLocation = dataDirectory)

        neighborclusters = ncluster.parse(neighborhoodLocation)
        local = neighborclusters[str(sensors[i])]
        cd2 = ncluster.convertNeighborhood(cd, local)
        cd2 = numpy.array(cd2, ndmin=2)
        cd2 = cd2.T

        #obs = 2**len(sensors[i])
        #sData = markov_anneal.splitLocalMax(cd, td, splitLen)

        obs = 9
示例#9
0
        print i
        i+=1
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")
        
        tmpDoc = []
        #Loop over all models
        for f in files:
            #It is a data file.
            if f.split('.')[-1] == 'dat':
                #Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)
            
                cd, td = bbdata.getdata(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataLocation)
                
                #cd2 = cd
                local = neighborclusters[str(fn.sensors)]
                cd2 = ncluster.convertNeighborhood(cd, local)
                cd2 = numpy.array(cd2, ndmin = 2)
                cd2 = cd2.T
                
                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                #print len(sData)

                #for each split, make a document matrix and append it to the
                #ongoing tdmatrix
                try:
示例#10
0
            
            models.append(fn)
            
        
    #For each window, go through each sensor block and get lsa projection
    #information.
    while (ct + window < et):
        print ct
        
        tmpDoc = []
        
        #Iterate through each model
        for m in models:
            
            cd, td = bbdata.getdata(ct, ct + window, \
                                comp = compress, \
                                sens = m.sensors,
                                readLocation = dataLocation)
            
            sData = markov_anneal.splitLocalMax(cd, td, splitLen)
            
            #for each split, make a document matrix and append it to the
            #ongoing document
            try:
                val, counts = analysis.ratio(sData.values(), m.models)
            except:
                counts = [0] * len(m.models)
                val = [0] * len(m.models)

            tmpDoc += counts
        
        print tmpDoc