def get_kmeans_clusters(data, count_centers): rows = data.getRows() input_data = list() result_clusters = list() for row in rows: input_data.append(row.getDataArray()) SST = calculate_sst(input_data) # initialize initial centers using K-Means++ method initial_centers = kmeans_plusplus_initializer( input_data, count_centers).initialize() # create instance of K-Means algorithm with prepared centers kmeans_instance = kmeans(input_data, initial_centers) # run cluster analysis and obtain results kmeans_instance.process() clusters = kmeans_instance.get_clusters() colorRange = Constants.DEFAULT_COLOR_SET SSB = 0 SSW = 0 for i, cluster in enumerate(clusters): result_cluster = Cluster( KMeansWindow.get_rows_kmeans(data, cluster)) ro = KMeansWindow.get_rows_kmeans(data, cluster) f = [x._dataArray for x in ro] SSW = SSW + calculate_ssw(f) colour = random.choice(colorRange) result_cluster.setName(colour) result_cluster.setColor(colour) result_clusters.append(result_cluster) SSB = calculate_ssb(SST, SSW) RS_RESULT.append(SSB / SST) print(RS_RESULT) return result_clusters
def get_cure_clusters(data, count_clusters=3): rows = data.getRows() input_data = list() result_clusters = list() for row in rows: input_data.append(row.getDataArray()) SST = calculate_sst(input_data) cure_instance = cure(input_data, count_clusters) cure_instance.process() clusters = cure_instance.get_clusters() colorRange = Constants.DEFAULT_COLOR_SET SSB = 0 SSW = 0 for i, cluster in enumerate(clusters): SSW = SSW + calculate_ssw(cluster) result_cluster = Cluster(CureWindow.get_rows(data, cluster)) colour = random.choice(colorRange) result_cluster.setName(colour) result_cluster.setColor(colour) result_clusters.append(result_cluster) SSB = calculate_ssb(SST, SSW) RS_RESULT.append(SSB / SST) print(RS_RESULT) return result_clusters
def performAlgorithm(self): if self.prepareData(): print(*self.rowsToClusterize) iterCounter = 0 while len(self.rowsToClusterize) > 0: iterCounter += 1 self.supposedClusterElems = set() prevIterSize = 0 self.supposedClusterElems.add(self.rowsToClusterize[0]) while prevIterSize < len(self.supposedClusterElems): prevIterSize = len(self.supposedClusterElems) clusterExtension = set() for item in self.supposedClusterElems: neighbors = self.findNeighbors(item, self.radius) for item in neighbors: if self.isElemSuitable(item, self.radius, self.neighborsamount): clusterExtension.add(item) self.supposedClusterElems.update(clusterExtension) cluster = Cluster( self.workData.buildClusterDataFromRows( list(self.supposedClusterElems))) cluster.setColor( Constants.EXTENDED_COLOR_SET[(iterCounter - 1) % len( Constants.EXTENDED_COLOR_SET)]) cluster.setName("dbscan" + str(iterCounter)) self.resultClusters.append(cluster) # строки полученного кластера удаляем из исходного набора for item in self.supposedClusterElems: self.rowsToClusterize.remove(item) self.rowsToConsider.remove(item) self.parent().addClusters(self.resultClusters)
def get_agglomerative_clusters(data, count_clusters, line_type): rows = data.getRows() input_data = list() result_clusters = list() for row in rows: input_data.append(row.getDataArray()) # create object that uses python code only SST = calculate_sst(input_data) agglomerative_instance = agglomerative(input_data, count_clusters, link=line_type) # cluster analysis agglomerative_instance.process() # obtain results of clustering clusters = agglomerative_instance.get_clusters() colorRange = Constants.DEFAULT_COLOR_SET SSB = 0 SSW = 0 for i, cluster in enumerate(clusters): result_cluster = Cluster( AgglomerativeWindow.get_rows_agglomerative(data, cluster)) ro = AgglomerativeWindow.get_rows_agglomerative(data, cluster) f = [x._dataArray for x in ro] SSW = SSW + calculate_ssw(f) colour = random.choice(colorRange) result_cluster.setName(colour) result_cluster.setColor(colour) result_clusters.append(result_cluster) SSB = calculate_ssb(SST, SSW) RS_RESULT.append(SSB / SST) print(RS_RESULT) return result_clusters
def translateClusters(self, clope): for num in sorted(clope.clusters.keys()): numsOfTransactions = [] for k, v in clope.transaction.items(): if v == num: numsOfTransactions.append(k) # print(numsOfTransactions) trs = [self.workData._rows[i] for i in numsOfTransactions] cluster = Cluster(self.workData.buildClusterDataFromRows(trs)) cluster.setColor(Constants.EXTENDED_COLOR_SET[(num-1) % len(Constants.EXTENDED_COLOR_SET)]) cluster.setName("clope" + str(num)) self.resultClusters.append(cluster) self.parent().addClusters(self.resultClusters)
def get_xmeans_clusters(data, count_centers): rows = data.getRows() input_data = list() result_clusters = list() for row in rows: input_data.append(row.getDataArray()) SST = calculate_sst(input_data) # create object of X-Means algorithm that uses CCORE for processing # initial centers - optional parameter, if it is None, then random centers will be used by the algorithm. # let's avoid random initial centers and initialize them using K-Means++ method: initial_centers = kmeans_plusplus_initializer( input_data, count_centers).initialize() xmeans_instance = xmeans(input_data, initial_centers, ccore=True) # run cluster analysis xmeans_instance.process() # obtain results of clustering clusters = xmeans_instance.get_clusters() colorRange = Constants.DEFAULT_COLOR_SET SSB = 0 SSW = 0 for i, cluster in enumerate(clusters): result_cluster = Cluster( XMeansWindow.get_rows_kmeans(data, cluster)) ro = XMeansWindow.get_rows_kmeans(data, cluster) f = [x._dataArray for x in ro] SSW = SSW + calculate_ssw(f) colour = random.choice(colorRange) result_cluster.setName(colour) result_cluster.setColor(colour) result_clusters.append(result_cluster) SSB = calculate_ssb(SST, SSW) RS_RESULT.append(SSB / SST) print(RS_RESULT) return result_clusters
class DBScanWindow(QMainWindow): def __init__(self, parent): super().__init__(parent) # Глобальные переменные для пошагового исполнения алгоритма. # Для того чтобы выполнить шаг нужно помнить состояние достигнутое прыдыдущими шагами. # Это состояние хранится в следующих переменных self.workData = self.parent().globalData self.rowsToClusterize = list(self.workData) self.rowsToConsider = list(self.workData) self.resultClusters = [] self.supposedClusterElems = set() self.stepIterator = 0 self.clusterIterator = 0 self.neighborsCurrent = set() self.neighborsToConsider = set() self.neighborsConsidered = set() self.cluster = None self.prevClusterSize = 0 self.currClusterSize = 0 self.centralWidget = QWidget() self.setCentralWidget(self.centralWidget) self.layout = QVBoxLayout(self.centralWidget) self.vicinityLabel = QLabel("Радиус окрестности") self.vicinityEdit = QLineEdit() self.checkforamountLabel = QLabel("Число соседей") self.checkforamountEdit = QLineEdit() self.layout.addWidget(self.vicinityLabel) self.layout.addWidget(self.vicinityEdit) self.layout.addWidget(self.checkforamountLabel) self.layout.addWidget(self.checkforamountEdit) self.confirmationButton = QPushButton("Выполнить алгоритм") self.confirmationButton.clicked.connect(self.performAlgorithm) self.layout.addWidget(self.confirmationButton) self.exstepbystepButton = QPushButton("Выполнить пошагово") self.exstepbystepButton.clicked.connect(self.exstepbystep) self.layout.addWidget(self.exstepbystepButton) self.buttongroup = QWidget() self.buttongrouplayout = QHBoxLayout(self.buttongroup) self.nextstepButton = QPushButton("След Шаг") self.nextstepButton.clicked.connect(self.stepAndVisualize) self.nextstepButton.setEnabled(False) self.continueButton = QPushButton("Завершить") self.continueButton.setEnabled(False) self.buttongrouplayout.addWidget(self.nextstepButton) self.buttongrouplayout.addWidget(self.continueButton) self.layout.addWidget(self.buttongroup) self.multiplestepsbutton = QPushButton("Выполнить N шагов") self.multiplestepsbutton.clicked.connect(self.takemultiplesteps) self.multiplestepsbutton.setEnabled(False) self.label = QLabel("N = ") self.lineedit = QLineEdit() self.lineedit.setPlaceholderText("N") self.widgetgroup = QWidget() self.widgetgrouplayout = QHBoxLayout(self.widgetgroup) self.widgetgroup.setEnabled(False) self.widgetgrouplayout.addWidget(self.label) self.widgetgrouplayout.addWidget(self.lineedit) self.layout.addWidget(self.multiplestepsbutton) self.layout.addWidget(self.widgetgroup) self.setGeometry(100, 100, 200, 200) self.setWindowTitle("DBScan") self.show() def performAlgorithm(self): if self.prepareData(): print(*self.rowsToClusterize) iterCounter = 0 while len(self.rowsToClusterize) > 0: iterCounter += 1 self.supposedClusterElems = set() prevIterSize = 0 self.supposedClusterElems.add(self.rowsToClusterize[0]) while prevIterSize < len(self.supposedClusterElems): prevIterSize = len(self.supposedClusterElems) clusterExtension = set() for item in self.supposedClusterElems: neighbors = self.findNeighbors(item, self.radius) for item in neighbors: if self.isElemSuitable(item, self.radius, self.neighborsamount): clusterExtension.add(item) self.supposedClusterElems.update(clusterExtension) cluster = Cluster( self.workData.buildClusterDataFromRows( list(self.supposedClusterElems))) cluster.setColor( Constants.EXTENDED_COLOR_SET[(iterCounter - 1) % len( Constants.EXTENDED_COLOR_SET)]) cluster.setName("dbscan" + str(iterCounter)) self.resultClusters.append(cluster) # строки полученного кластера удаляем из исходного набора for item in self.supposedClusterElems: self.rowsToClusterize.remove(item) self.rowsToConsider.remove(item) self.parent().addClusters(self.resultClusters) def performStep(self): if self.cluster is None: if len(self.rowsToClusterize) is 0: self.close() else: self.clusterIterator += 1 self.staterow = self.rowsToClusterize[0] self.neighborsConsidered.add(self.staterow) self.neighborsCurrent = self.findNeighbors( self.staterow, self.radius) self.neighborsToConsider = set(self.neighborsCurrent) self.cluster = Cluster( self.workData.buildClusterDataFromRows([self.staterow])) self.cluster.setName("dbscan" + str(self.clusterIterator)) self.cluster.setColor(Constants.EXTENDED_COLOR_SET[ (self.clusterIterator - 1) % len(Constants.EXTENDED_COLOR_SET)]) self.rowsToClusterize.remove(self.staterow) self.parent().addCluster(self.cluster) self.currClusterSize = 1 self.prevClusterSize = 0 else: if len(self.neighborsToConsider) is not 0: self.staterow = self.neighborsToConsider.pop() self.neighborsConsidered.add(self.staterow) if self.isElemSuitable(self.staterow, self.radius, self.neighborsamount): self.cluster.addRow(self.staterow) if self.staterow in self.rowsToClusterize: self.rowsToClusterize.remove(self.staterow) else: self.prevClusterSize = self.currClusterSize self.currClusterSize = len(self.cluster) if self.currClusterSize != self.prevClusterSize: for elem in self.neighborsCurrent: self.neighborsToConsider.update( self.findNeighbors(elem, self.radius)) self.neighborsToConsider = self.neighborsToConsider - self.neighborsConsidered self.neighborsCurrent = set(self.neighborsToConsider) else: for elem in self.cluster: self.rowsToConsider.remove(elem) self.cluster = None def isElemSuitable(self, sourcerow, radius, amount): neighbors = self.findNeighbors(sourcerow, radius) if len(neighbors) >= amount: return True else: return False def findNeighbors(self, sourcerow, radius): significancefactors = self.parent().globalData.getSignificanceFactors() neighbors = set() rowsToIterateOver = set(self.rowsToConsider) - set([sourcerow]) for row in rowsToIterateOver: if sourcerow.distanceTo(row, significancefactors) < radius: neighbors.add(row) return neighbors def visualize(self, sourcerow, radius): self.parent().sphere = (sourcerow, radius) self.parent().refreshCanvas() def stepAndVisualize(self): self.performStep() self.visualize(self.staterow, self.radius) def takemultiplesteps(self): try: amount = int(self.lineedit.text()) for i in range(0, amount): self.performStep() self.visualize(self.staterow, self.radius) except ValueError: msg = QMessageBox() msg.setIcon(QMessageBox.Warning) msg.setText("Число шагов задано некорректно") msg.setWindowTitle("Внимание") msg.exec_() def exstepbystep(self): if self.prepareData(): self.confirmationButton.setEnabled(False) self.continueButton.setEnabled(True) self.nextstepButton.setEnabled(True) self.checkforamountEdit.setEnabled(False) self.vicinityEdit.setEnabled(False) self.exstepbystepButton.setEnabled(False) self.multiplestepsbutton.setEnabled(True) self.widgetgroup.setEnabled(True) self.stepAndVisualize() def prepareData(self): """ Подготавилвает данные, нужные для работы алгоритма :return: возвращает True если подготовка выполнена успешно и False в противном случае """ try: self.radius = float(self.vicinityEdit.text()) except ValueError: msg = QMessageBox() msg.setIcon(QMessageBox.Warning) msg.setText("Радиус задан некорректно") msg.setWindowTitle("Внимание") msg.exec_() return False try: self.neighborsamount = int(self.checkforamountEdit.text()) except ValueError: msg = QMessageBox() msg.setIcon(QMessageBox.Warning) msg.setText("Число соседей задано некорректно") msg.setWindowTitle("Внимание") msg.exec_() return False self.workData = self.parent().globalData self.rowsToClusterize = list(self.workData) self.resultClusters = [] self.supposedClusterElems = set() return True def closeEvent(self, event): self.parent().sphere = None self.parent().circle = None self.parent().refreshCanvas() self.parent().refreshClusterTable() event.accept()