示例#1
0
def loadArffFile(filePath):
    arffFile = ArffFile(filePath)
    unsupervisedFeatures = arffFile.getData().copy()

    labelColumn = unsupervisedFeatures.columns[-1]
    unsupervisedFeatures = unsupervisedFeatures.drop(labelColumn, axis=1)
    y = arffFile.getData()[labelColumn]
    return unsupervisedFeatures, y
示例#2
0
文件: main.py 项目: vbadenas/MAI-IML
    def loadArffFile(arffFilePath):
        # load arff files
        arffFile = ArffFile(arffFilePath)
        unsupervisedFeatures = arffFile.getData().copy()

        # remove label column from training data
        labelColumn = unsupervisedFeatures.columns[-1]
        unsupervisedFeatures = unsupervisedFeatures.drop(labelColumn, axis=1)
        y = arffFile.getData()[labelColumn]

        return unsupervisedFeatures, y
示例#3
0
def main():
    file_paths = [
        Path("../datasets/vote.arff"),
        Path("../datasets/adult.arff"),
        Path("../datasets/pen-based.arff")
    ]
    for arfffile in file_paths:
        arffResultsFolder = RESULTS_PATH / arfffile.stem
        arffResultsFolder.mkdir(parents=True, exist_ok=True)
        arfffile = ArffFile(arfffile)
        data = arfffile.getData()
        data = data.drop(data.columns[-1], axis=1)

        arfffile.scatterPlot(ignoreLabel=True, show=False, figsize=(15, 9))
        plt.savefig(arffResultsFolder / "scatterplot.png")
        plt.close()

        plt.figure(figsize=(15, 9))
        data.boxplot()
        plt.xticks(rotation='vertical')
        plt.tight_layout()
        plt.savefig(arffResultsFolder / "boxplot.png")
        plt.close()
示例#4
0
import sys
sys.path.append(".")

import json
from pathlib import Path
from src.dataset import ArffFile

configFolderPath = Path("./configs")

for configPath in configFolderPath.glob('*.json'):
    with open(configPath) as f:
        confData = json.load(f)

    arffFilePath = confData["path"]
    arffFile = ArffFile(arffFilePath)
    arffFile.scatterPlot(ignoreLabel=True, show=True)
示例#5
0
def findEps(data, Ks=[2]):
    X = data.to_numpy()
    colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(Ks))]
    for K, col in zip(Ks, colors):
        neigh = NearestNeighbors(n_neighbors=K + 1)
        distances, _ = neigh.fit(X).kneighbors(X)
        distances = np.sort(distances[:, -1], axis=0)
        plt.plot(distances, color=tuple(col), label=f'{K} Nearest Neighbor')
    plt.tight_layout()
    plt.xlabel('Points sorted according to distance of Kth Nearest Neighbor')
    plt.ylabel('Kth Nearest Neighbor Distance')
    plt.title("K-dist plot for 'adult' dataset")
    plt.legend()
    plt.savefig(f"results/adult/dbscan/dbscanDistanceToKthNeighbor.png")
    plt.show()


if __name__ == "__main__":
    arffFile = ArffFile(Path("./datasets/adult.arff"))
    data = arffFile.getData().copy()

    labelColumn = data.columns[-1]
    y = data[labelColumn]
    data = data.drop(labelColumn, axis=1)

    minPts = [10, 20, 30, 40, 50]
    eps = [.55, .6, .65, .70, .75]

    findEps(data, Ks=minPts)
    #evaluate(data, y, eps, minPts)