Пример #1
0
def extract_compareClustering(clusterClass):
    # Get data, n_bands=4
    norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest"
    img = "4Band_Subtracted_20040514_20050427"

    dataset = oi.open_tiff(norm_img_path, img)
    H = dataset[1]
    W = dataset[2]
    n_bands = dataset[3]
    org_data = art.tif2vec(dataset[0])  #NOTE: this step is really important

    select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT"
    select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
    simg = oi.open_tiff(select_path, select_img)
    select = simg[0]  #(2720000)

    changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True)
    ns_changePos = DataProcess.selectArea(select, n_bands, -1, isStack=False)
    ns_nonChangePos = DataProcess.selectArea(select, n_bands, 0, isStack=False)

    X_train = org_data[changePos].reshape(-1, n_bands)

    result = np.zeros_like(select.reshape(-1, 1))

    for cls_name, cls_class in clusterClass.items():
        print("running", cls_name, "...")
        t0 = time.clock()
        cls_class.fit(X_train)
        usingTime = time.clock() - t0

        # combine the result
        result[ns_changePos] = cls_class.labels_
        result[ns_nonChangePos] = np.max(cls_class.labels_) + 1

        evaluation = silhouette_score(X=org_data,
                                      labels=result,
                                      metric='euclidean',
                                      sample_size=10000)

        save_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\sklearn_clustering\\compare"
        DataProcess.visualize_class(
            result.reshape(H, W),
            save_path + '\\' + cls_name + "_change_area_class")

        # save using time
        print("save the information to txt file...")
        with open(
                save_path + '/' +
                "Outlier Detection Algorithms Running Time.txt", 'a') as f:
            f.write("detetion algorithm: " + cls_name + "\nsilhouette_score:" +
                    str(evaluation) + "\ndetection using time: " +
                    str(usingTime))
            f.write("\n----------------------------------------------\n")
Пример #2
0
def runClusteringBased(img_path,img_name,data_path,data_name,outlier_save_path,\
    clusteringPara,outlierPara,o_filter="highRank"):
    #clusteringPara[0] is the name, the rest are parameters
    #TODO:change score and filter para
    org_data = DataProcess.csv_to_array(data_path, data_name)
    AlgorithmName = clusteringPara[0]
    print("running " + AlgorithmName + " for clustering...")
    t0 = time.time()

    if AlgorithmName == "kMeans":
        d_label = cl.kMeans.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Affinity":
        d_label = cl.Affinity.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "MeanShift":
        d_label = cl.MeanShift.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Spectral":
        d_label = cl.Spectral.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Agglomerative":
        d_label = cl.Agglomerative.getCluster(org_data, *(clusteringPara[1]))
        AlgorithmName = AlgorithmName + '_' + clusteringPara[1][6]
    elif AlgorithmName == "DBSCAN":
        d_label = cl.DBSCAN.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "BIRCH":
        d_label = cl.BIRCH.getCluster(org_data, *(clusteringPara[1]))
    else:
        print("algorithm name ilegal")
        exit()
    AlgorithmName += '_'

    #save the cluster information
    saveclass_extend_name = '_' + AlgorithmName + "cluster_label"
    DataProcess.int_to_csv(outlier_save_path, img_name, d_label,
                           saveclass_extend_name)
    DataProcess.visualize_class(img_path, img_name, outlier_save_path,
                                img_name + saveclass_extend_name)
    t1 = time.time()

    print("running " + outlierPara[0] +
          " for calculating the outlier scores...")
    if outlierPara[0] == "LDCOF":
        d_score = cb.calLDCOF.findLDCOF(org_data, d_label, outlierPara[1],
                                        outlierPara[2], outlierPara[3])

    if o_filter == "highRank":
        outlier_label = sc2r.highRank.getOutliers(d_score, 98)

    #save the label information for further usage
    savelabel_extend_name = '_' + AlgorithmName + "outlier_label"
    DataProcess.int_to_csv(outlier_save_path, img_name, outlier_label,
                           savelabel_extend_name)
    GeoProcess.getSHP(
        img_path, img_name, outlier_save_path, AlgorithmName,
        outlier_label)  #FIXME: the .tif file could not be specified the path
    # DataProcess.visualize_class(img_path,img_name,outlier_save_path,img_name+savelabel_extend_name)

    #calculate the Silhouette Coefficient as a reference of the performance of the outcome
    #NOTE:due to the limited memory, I adjust the sample_size to 10000,which may cause the score less reliable
    print("calculating Silhouette Coefficients...")
    clusteringScore = cl.Silhouette.getSilhouette(org_data,
                                                  d_label,
                                                  sample_size=10000)
    usingTime = t1 - t0
    print("save the information to txt file...")
    with open(data_path + '/' + "runningstatus.txt", 'a') as f:
        f.write("clustering algorithm: " + AlgorithmName +
                "\nsilhouette coefficient: " + str(clusteringScore) +
                "\nclstering using time: " + str(usingTime))
        f.write("\n----------------------------------------------\n")
    org_data = None
    return clusteringScore, usingTime