def makeModelCounts( splits, modelLocation, dataLocation, neighborhoodLocation=None, minBehavior=0, compress=2, splitLength=8 ): """ Makes a set of counts for a given dataset and models. Neighborhood location specifies if the models and data need to be preclustered. Returns the datavector and the associated split times. """ files = os.listdir(modelLocation) neighborhood = False dVector = [] times = [] if neighborhoodLocation: neighborclusters = ncluster.parse(neighborhoodLocation) neighborhood = True # Iterate over splits. for s in splits: oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] # Loop over all models for f in files: # It is a data file. if f.split(".")[-1] == "dat": # Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, comp=compress, sens=fn.sensors, readLocation=dataLocation) cd2 = cd if neighborhood: local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin=2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts if len(tmpDoc) >= minBehavior: dVector.append(tmpDoc) times.append(oldSplit) oldSplit = newSplit return dVector, times
print i i += 1 oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] #Loop over all models for f in files: #It is a data file. if f.split('.')[-1] == 'dat': #Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = fn.sensors, readLocation = dataLocation) local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin=2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) #for each split, make a document matrix and append it to the #ongoing tdmatrix try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models)
def makeModelCounts(splits, modelLocation, dataLocation, \ neighborhoodLocation = None, minBehavior = 0, \ compress = 2, splitLength = 8): """ Makes a set of counts for a given dataset and models. Neighborhood location specifies if the models and data need to be preclustered. Returns the datavector and the associated split times. """ files = os.listdir(modelLocation) neighborhood = False dVector = [] times = [] if neighborhoodLocation: neighborclusters = ncluster.parse(neighborhoodLocation) neighborhood = True #Iterate over splits. for s in splits: oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] #Loop over all models for f in files: #It is a data file. if f.split('.')[-1] == 'dat': #Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = fn.sensors, readLocation = dataLocation) cd2 = cd if neighborhood: local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts if len(tmpDoc) >= minBehavior: dVector.append(tmpDoc) times.append(oldSplit) oldSplit = newSplit return dVector, times
#sData = markov_anneal.splitLocalMax(cd2, td, splitLen) #try: # val, counts = analysis.ratio(sData.values(), fn.models) #except Exception, e: # counts = [0] * len(fn.models) # val = [0] * len(fn.models) #tmpDoc += counts cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = bbdata.allSensors, \ vDays = validDays, \ readLocation = dataDirectory) cd2 = bbdata.uncompressData(cd, 50) b = numpy.array(cd2) b = numpy.sum(b, axis = 0) tmpDoc = list(b) #if sum(tmpDoc) >= minBehaviour: # tdMatrix.append(tmpDoc) # print len(tdMatrix) firstRun = False
fn.matrixToModel(fn.modelList) models.append(fn) #For each window, go through each sensor block and get lsa projection #information. while (ct + window < et): print ct tmpDoc = [] #Iterate through each model for m in models: cd, td = bbdata.getdata(ct, ct + window, \ comp = compress, \ sens = m.sensors, readLocation = dataLocation) sData = markov_anneal.splitLocalMax(cd, td, splitLen) #for each split, make a document matrix and append it to the #ongoing document try: val, counts = analysis.ratio(sData.values(), m.models) except: counts = [0] * len(m.models) val = [0] * len(m.models) tmpDoc += counts print tmpDoc
bestOut = None if __name__ == "__main__": #Generate the data first. st = datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S") et = datetime.datetime.strptime(et, "%Y-%m-%d %H:%M:%S") #Get the sensor blocks for i in range(len(sensors)): print "Sensors:" + str(sensors[i]) cd, td = bbdata.getdata(st, et, \ pStart = periodStart, \ pEnd = periodEnd, \ vDays = validDays, \ comp = compress, \ sens = sensors[i], readLocation = dataDirectory) neighborclusters = ncluster.parse(neighborhoodLocation) local = neighborclusters[str(sensors[i])] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T #obs = 2**len(sensors[i]) #sData = markov_anneal.splitLocalMax(cd, td, splitLen) obs = 9
#sData = markov_anneal.splitLocalMax(cd2, td, splitLen) #try: # val, counts = analysis.ratio(sData.values(), fn.models) #except Exception, e: # counts = [0] * len(fn.models) # val = [0] * len(fn.models) #tmpDoc += counts cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = bbdata.allSensors, \ vDays = validDays, \ readLocation = dataDirectory) cd2 = bbdata.uncompressData(cd, 50) b = numpy.array(cd2) b = numpy.sum(b, axis=0) tmpDoc = list(b) #if sum(tmpDoc) >= minBehaviour: # tdMatrix.append(tmpDoc) # print len(tdMatrix) firstRun = False
bestOut = None if __name__ == "__main__": #Generate the data first. st = datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S") et = datetime.datetime.strptime(et, "%Y-%m-%d %H:%M:%S") #Get the sensor blocks for i in range(len(sensors)): print "Sensors:" + str(sensors[i]) cd, td = bbdata.getdata(st, et, \ pStart = periodStart, \ pEnd = periodEnd, \ vDays = validDays, \ comp = compress, \ sens = sensors[i], readLocation = dataDirectory) neighborclusters = ncluster.parse(neighborhoodLocation) local = neighborclusters[str(sensors[i])] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin=2) cd2 = cd2.T #obs = 2**len(sensors[i]) #sData = markov_anneal.splitLocalMax(cd, td, splitLen) obs = 9
print i i+=1 oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] #Loop over all models for f in files: #It is a data file. if f.split('.')[-1] == 'dat': #Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = fn.sensors, readLocation = dataLocation) #cd2 = cd local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) #print len(sData) #for each split, make a document matrix and append it to the #ongoing tdmatrix try:
models.append(fn) #For each window, go through each sensor block and get lsa projection #information. while (ct + window < et): print ct tmpDoc = [] #Iterate through each model for m in models: cd, td = bbdata.getdata(ct, ct + window, \ comp = compress, \ sens = m.sensors, readLocation = dataLocation) sData = markov_anneal.splitLocalMax(cd, td, splitLen) #for each split, make a document matrix and append it to the #ongoing document try: val, counts = analysis.ratio(sData.values(), m.models) except: counts = [0] * len(m.models) val = [0] * len(m.models) tmpDoc += counts print tmpDoc