Python DataProcess примеры использования

Язык программирования: Python

Пространство имен/Пакет: mypackages.processing

Класс/Тип: DataProcess

Примеров на hotexamples.com: 6

Python DataProcess - 6 примеров найдено. Это лучшие примеры Python кода для mypackages.processing.DataProcess, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

selectArea(3)

int_to_csv(2)

scaleNormalize(2)

visualize_class(2)

csv_to_array(1)

img_to_csv(1)

saveHeatMap(1)

Пример #1

Показать файл

    def __init__(
            self,
            isExtract=True,
            root_dir="C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\newest",
            filename="4Band_Subtracted_20040514_20050427"):
        """
        Args:
            root_dir (string): the path of the file
            file_name (string): the name of the picture
            isExtract (boolean): use the mask to extract changed area
        """
        self.isExtract = isExtract
        self.root_dir = root_dir
        self.file_name = filename
        self.dataset = oi.open_tiff(root_dir, filename)
        self.H = self.dataset[1]
        self.W = self.dataset[2]
        self.n_bands = self.dataset[3]
        self.npdataset = art.tif2vec(
            self.dataset[0])  #flatten and transform the array

        if self.isExtract:
            # extract out the changed area
            self.select_path = "C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\EXTRACT"
            self.select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
            self.simg = oi.open_tiff(self.select_path, self.select_img)
            self.select = self.simg[0]  #(2720000)
            self.changePos = DataProcess.selectArea(self.select,
                                                    self.n_bands,
                                                    -1,
                                                    isStack=True)
            self.ns_changePos = DataProcess.selectArea(self.select,
                                                       self.n_bands,
                                                       -1,
                                                       isStack=False)
            self.ns_nonChangePos = DataProcess.selectArea(self.select,
                                                          self.n_bands,
                                                          0,
                                                          isStack=False)

            self.npdataset = self.npdataset[self.changePos].reshape(
                -1, self.n_bands)

        # normalization
        self.nmax = self.npdataset.max(axis=0)
        self.nmin = self.npdataset.min(axis=0)
        self.norm_data = (self.npdataset - self.nmin) / (self.nmax - self.nmin)

        # TODO:don't know what's for yet, add only to be compatible to TensorDataset
        self.target_data = np.zeros_like(self.norm_data)

        #clear the memory
        # self.simg=None
        self.dataset = None

Пример #2

Показать файл

def extract_compareClustering(clusterClass):
    # Get data, n_bands=4
    norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest"
    img = "4Band_Subtracted_20040514_20050427"

    dataset = oi.open_tiff(norm_img_path, img)
    H = dataset[1]
    W = dataset[2]
    n_bands = dataset[3]
    org_data = art.tif2vec(dataset[0])  #NOTE: this step is really important

    select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT"
    select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
    simg = oi.open_tiff(select_path, select_img)
    select = simg[0]  #(2720000)

    changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True)
    ns_changePos = DataProcess.selectArea(select, n_bands, -1, isStack=False)
    ns_nonChangePos = DataProcess.selectArea(select, n_bands, 0, isStack=False)

    X_train = org_data[changePos].reshape(-1, n_bands)

    result = np.zeros_like(select.reshape(-1, 1))

    for cls_name, cls_class in clusterClass.items():
        print("running", cls_name, "...")
        t0 = time.clock()
        cls_class.fit(X_train)
        usingTime = time.clock() - t0

        # combine the result
        result[ns_changePos] = cls_class.labels_
        result[ns_nonChangePos] = np.max(cls_class.labels_) + 1

        evaluation = silhouette_score(X=org_data,
                                      labels=result,
                                      metric='euclidean',
                                      sample_size=10000)

        save_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\sklearn_clustering\\compare"
        DataProcess.visualize_class(
            result.reshape(H, W),
            save_path + '\\' + cls_name + "_change_area_class")

        # save using time
        print("save the information to txt file...")
        with open(
                save_path + '/' +
                "Outlier Detection Algorithms Running Time.txt", 'a') as f:
            f.write("detetion algorithm: " + cls_name + "\nsilhouette_score:" +
                    str(evaluation) + "\ndetection using time: " +
                    str(usingTime))
            f.write("\n----------------------------------------------\n")

Пример #3

Показать файл

 def anomaly(self,scores):
     if isExtract:
         self.score_result=np.empty_like(self.select.reshape(-1,1))
         # scale the scores
         self.score_result[self.ns_changePos]=DataProcess.scaleNormalize(scores,(0,500)).reshape(-1,)
         self.score_result[self.ns_nonChangePos]=0
     else:
         self.score_result=DataProcess.scaleNormalize(scores,(0,500)).reshape(-1,)
     # give labels
     self.outlier_result=highRank.getOutliers(self.score_result,99)
     # generate picture
     GeoProcess.getSHP(img_path=self.root_dir,img_name=self.file_name,
         save_path="C:\\Users\\DELL\\Projects\\VHR_CD\\repository\\code-v2",extend_name="VAE_noEXT_",result_array=self.outlier_result)

Пример #4

Показать файл

def RunPyodOutlier(classifiers, outlier_save_path, isExtract=True):
    # Get data, n_bands=4
    norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest"
    img = "4Band_Subtracted_20040514_20050427"

    dataset = oi.open_tiff(norm_img_path, img)
    H = dataset[1]
    W = dataset[2]
    n_bands = dataset[3]
    org_data = art.tif2vec(dataset[0])  #NOTE: this step is really important

    #NOTE: Normalize the scale of the orignialdata
    org_data = org_data / org_data.max(axis=0)

    #TODO: normalize the data?

    if isExtract:
        # extract out the changed area
        select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT"
        select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
        simg = oi.open_tiff(select_path, select_img)
        select = simg[0]  #(2720000)

        changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True)
        ns_changePos = DataProcess.selectArea(select,
                                              n_bands,
                                              -1,
                                              isStack=False)
        ns_nonChangePos = DataProcess.selectArea(select,
                                                 n_bands,
                                                 0,
                                                 isStack=False)

        X_train = org_data[changePos].reshape(-1, n_bands)
        print("shape of original data: ", org_data.shape)
        print("shape of extracted data: ", X_train.shape)
        # to save the final result
        outlier_result = np.zeros_like(select.reshape(-1, 1))
        score_result = np.empty_like(select.reshape(-1, 1))
    else:
        X_train = org_data.reshape(-1, n_bands)
        print("shape of training data: ", X_train.shape)

    for clf_name, clf in classifiers.items():
        if not isExtract:
            clf_name = "no_extract_" + clf_name

        print("running " + clf_name + "...")
        t0 = time.clock()
        clf.fit(X_train)
        usingTime = time.clock() - t0
        # get the prediction labels and outlier scores of the training data
        y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
        y_train_scores = clf.decision_scores_  # raw outlier scores

        if isExtract:
            # combine the extraction non-changed label&&scores and the algorithm result
            outlier_result[ns_changePos] = y_train_pred
            outlier_result[ns_nonChangePos] = 0
            score_result[ns_changePos] = DataProcess.scaleNormalize(
                y_train_scores, (0, 500)).reshape(-1, )
            score_result[ns_nonChangePos] = 0
            #save the outlier detection result as .tif and .shp file
        else:
            # combine the extraction non-changed label and the algorithm result
            outlier_result = y_train_pred
            score_result = DataProcess.scaleNormalize(y_train_scores,
                                                      (0, 500)).reshape(-1, )

        print("the scale of the y_train_score is:", y_train_scores.min(),
              y_train_scores.max())
        print("the scale of the score_result is:", score_result.min(),
              score_result.max())

        DataProcess.int_to_csv(outlier_save_path, img, outlier_result,
                               clf_name + "_outliers")
        GeoProcess.getSHP(norm_img_path, img, outlier_save_path,
                          clf_name + "_outliers", outlier_result)

        #save the outlier scores as heatmap
        DataProcess.saveHeatMap(score_result.reshape(H, W),
                                outlier_save_path + "\\" + clf_name)

        print("save the information to txt file...")
        with open(
                outlier_save_path + '/' +
                "Outlier Detection Algorithms Running Time.txt", 'a') as f:
            f.write("detetion algorithm: " + clf_name +
                    "\ndetection using time: " + str(usingTime))
            f.write("\n----------------------------------------------\n")

Пример #5

Показать файл

def runClusteringBased(img_path,img_name,data_path,data_name,outlier_save_path,\
    clusteringPara,outlierPara,o_filter="highRank"):
    #clusteringPara[0] is the name, the rest are parameters
    #TODO:change score and filter para
    org_data = DataProcess.csv_to_array(data_path, data_name)
    AlgorithmName = clusteringPara[0]
    print("running " + AlgorithmName + " for clustering...")
    t0 = time.time()

    if AlgorithmName == "kMeans":
        d_label = cl.kMeans.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Affinity":
        d_label = cl.Affinity.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "MeanShift":
        d_label = cl.MeanShift.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Spectral":
        d_label = cl.Spectral.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Agglomerative":
        d_label = cl.Agglomerative.getCluster(org_data, *(clusteringPara[1]))
        AlgorithmName = AlgorithmName + '_' + clusteringPara[1][6]
    elif AlgorithmName == "DBSCAN":
        d_label = cl.DBSCAN.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "BIRCH":
        d_label = cl.BIRCH.getCluster(org_data, *(clusteringPara[1]))
    else:
        print("algorithm name ilegal")
        exit()
    AlgorithmName += '_'

    #save the cluster information
    saveclass_extend_name = '_' + AlgorithmName + "cluster_label"
    DataProcess.int_to_csv(outlier_save_path, img_name, d_label,
                           saveclass_extend_name)
    DataProcess.visualize_class(img_path, img_name, outlier_save_path,
                                img_name + saveclass_extend_name)
    t1 = time.time()

    print("running " + outlierPara[0] +
          " for calculating the outlier scores...")
    if outlierPara[0] == "LDCOF":
        d_score = cb.calLDCOF.findLDCOF(org_data, d_label, outlierPara[1],
                                        outlierPara[2], outlierPara[3])

    if o_filter == "highRank":
        outlier_label = sc2r.highRank.getOutliers(d_score, 98)

    #save the label information for further usage
    savelabel_extend_name = '_' + AlgorithmName + "outlier_label"
    DataProcess.int_to_csv(outlier_save_path, img_name, outlier_label,
                           savelabel_extend_name)
    GeoProcess.getSHP(
        img_path, img_name, outlier_save_path, AlgorithmName,
        outlier_label)  #FIXME: the .tif file could not be specified the path
    # DataProcess.visualize_class(img_path,img_name,outlier_save_path,img_name+savelabel_extend_name)

    #calculate the Silhouette Coefficient as a reference of the performance of the outcome
    #NOTE:due to the limited memory, I adjust the sample_size to 10000,which may cause the score less reliable
    print("calculating Silhouette Coefficients...")
    clusteringScore = cl.Silhouette.getSilhouette(org_data,
                                                  d_label,
                                                  sample_size=10000)
    usingTime = t1 - t0
    print("save the information to txt file...")
    with open(data_path + '/' + "runningstatus.txt", 'a') as f:
        f.write("clustering algorithm: " + AlgorithmName +
                "\nsilhouette coefficient: " + str(clusteringScore) +
                "\nclstering using time: " + str(usingTime))
        f.write("\n----------------------------------------------\n")
    org_data = None
    return clusteringScore, usingTime

Пример #6

Показать файл

from mypackages.processing import DataProcess
from mypackages.processing import GeoProcess
from mypackages import clustering as cl
from mypackages import clusteringBased as cb
from mypackages import scoresToResults as sc2r
from mypackages.processing import open_image as oi

import numpy as np

norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\Encoded_dataset\\Encoded_models_2018-10-03_1337\\subtracted_norm_from_norm"
norm_data_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\raw_data\\1337_sub"
img = "Subtracted_20040514_20050427"
raw = "Subtracted_20040514_20050427_raw_data"

#get the paths and names in the dir
# img_path_name,img_f_names = DataProcess.file_name(norm_subtracted_path,".TIF")
# data_path_name,data_f_names = DataProcess.file_name(norm_subtracted_save,".csv")

DataProcess.img_to_csv(norm_img_path, norm_data_path, img)

#transform all the images in the path into csv (done)
# for name in img_f_names:
# DataProcess.img_to_csv(norm_subtracted_path,norm_subtracted_save,name)