def initialDBScanSciLearn(self): db = DBSCAN(eps=8, min_samples=self.minPts, algorithm='brute').fit(self.buffer) clusters = db.labels_ self.buffer['clusters'] = clusters clusterNumber = np.unique(clusters) for clusterId in clusterNumber: if (clusterId != -1): cl = self.buffer[self.buffer['clusters'] == clusterId] cl = cl.drop('clusters', axis=1) sample = Sample(cl.iloc[0].tolist()) mc = MicroCluster(sample, self.currentTimestamp, self.lamb) for sampleNumber in range(len(cl[1:])): sample = Sample(cl.iloc[sampleNumber].tolist()) mc.insertSample(sample, self.currentTimestamp) self.pMicroCluster.insert(mc)
def initWithoutDBScan(self): sample = Sample(self.buffer.iloc[0].values, 0) sample.set_timestamp(1) mc = MicroCluster(1, self.lamb, self.pMicroCluster.N + 1) maxEpsilon = 0 for sampleNumber in range(0, len(self.buffer)): sample = Sample(self.buffer.iloc[sampleNumber].values, sampleNumber) sample.set_timestamp(sampleNumber + 1) mc.insert_sample(sample) if mc.radius > maxEpsilon: maxEpsilon = mc.radius # print 'New max: {}'.format(mc.radius) self.pMicroCluster.insert(mc) if isinstance(self.epsilon, str): if self.epsilon == 'auto': self.epsilon = self.pMicroCluster.clusters[ 0].radius * self.radiusFactor self.epsilon = maxEpsilon
def initWithoutDBScan(self): sample = Sample(self.buffer.iloc[0].values, 0) sample.setTimestamp(1) mc = MicroCluster(1, self.lamb) for sampleNumber in range(0, len(self.buffer)): sample = Sample(self.buffer.iloc[sampleNumber].values, sampleNumber) sample.setTimestamp(sampleNumber + 1) mc.insertSample(sample, self.currentTimestamp) self.pMicroCluster.insert(mc) if isinstance(self.epsilon, str): if self.epsilon == 'auto': self.epsilon = self.pMicroCluster.clusters[ 0].radius * self.radiusFactor
def initWithoutDBScan(self): sample = Sample(self.buffer[0], 0) sample.setTimestamp(1) mc = MicroCluster(1, self.lamb, self.pMicroCluster.N + 1) maxEpsilon = 0 for sampleNumber in range(0, len(self.buffer)): sample = Sample(self.buffer[sampleNumber], sampleNumber) sample.setTimestamp(sampleNumber + 1) mc.insertSample(sample, self.currentTimestamp) if mc.radius > maxEpsilon: maxEpsilon = mc.radius self.pMicroCluster.insert(mc) if isinstance(self.epsilon, str): if self.epsilon == 'auto': self.epsilon = self.pMicroCluster.clusters[ 0].radius * self.radiusFactor self.epsilon = maxEpsilon
def runOnNewSample(self, sample): if simulation: self.currentTimestamp += 1 sample.setTimestamp(self.currentTimestamp) else: self.currentTimestamp = time.time() ### INITIALIZATION PHASE ### if not self.inizialized: self.buffer.append(sample) if (len(self.buffer) >= self.numberInitialSamples): self.resetLearningImpl() self.initialDBScanSciLearn() self.inizialized = True ### MERGING PHASE ### else: merged = False TrueOutlier = True returnOutlier = True if len(self.pMicroCluster.clusters) != 0: closestMicroCluster = self.nearestCluster( sample, self.currentTimestamp, kind='cluster') backupClosestCluster = copy.deepcopy(closestMicroCluster) backupClosestCluster.insertSample(sample, self.currentTimestamp) if (backupClosestCluster.radius <= self.epsilon): closestMicroCluster.insertSample(sample, self.currentTimestamp) merged = True TrueOutlier = False returnOutlier = False self.updateAll(closestMicroCluster) if not merged and len(self.oMicroCluster.clusters) != 0: closestMicroCluster = self.nearestCluster( sample, self.currentTimestamp, kind='outlier') backupClosestCluster = copy.deepcopy(closestMicroCluster) backupClosestCluster.insertSample(sample, self.currentTimestamp) if (backupClosestCluster.radius <= self.epsilon): closestMicroCluster.insertSample(sample, self.currentTimestamp) merged = True if (closestMicroCluster.weight > self.beta * self.mu): self.oMicroCluster.clusters.pop( self.oMicroCluster.clusters.index( closestMicroCluster)) self.pMicroCluster.insert(closestMicroCluster) self.updateAll(closestMicroCluster) if not merged: newOutlierMicroCluster = MicroCluster(1, self.lamb) newOutlierMicroCluster.insertSample(sample, self.currentTimestamp) for clusterTest in self.pMicroCluster.clusters: if np.linalg.norm( clusterTest.center - newOutlierMicroCluster.center) < 2 * self.epsilon: TrueOutlier = False if TrueOutlier: self.oMicroCluster.insert(newOutlierMicroCluster) self.updateAll(newOutlierMicroCluster) else: self.pMicroCluster.insert(newOutlierMicroCluster) self.updateAll(newOutlierMicroCluster) returnOutlier = False if self.currentTimestamp % self.tp == 0: for cluster in self.pMicroCluster.clusters: if cluster.weight < self.beta * self.mu: self.pMicroCluster.clusters.pop( self.pMicroCluster.clusters.index(cluster)) for cluster in self.oMicroCluster.clusters: creationTimestamp = cluster.creationTimeStamp xs1 = math.pow( 2, -self.lamb * (self.currentTimestamp - creationTimestamp + self.tp)) - 1 xs2 = math.pow(2, -self.lamb * self.tp) - 1 xsi = xs1 / xs2 if cluster.weight < xsi: self.oMicroCluster.clusters.pop( self.oMicroCluster.clusters.index(cluster)) if self.exportVariables: record = { 'pMicroClusters': self.pMicroCluster.clusters, 'oMicroClusters': self.oMicroCluster.clusters, 'result': returnOutlier } return record else: return returnOutlier