Пример #1
0
def BoxPlotByCluster(DataDirectory, OutDirectory, fname_prefix,  raster_name, stream_order=1):
    """
    Make a boxplot of the results of the clustering compared to the raster specified
    by raster_name
    """
    #df = pd.read_csv(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order))

    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order),data_type ='csv')
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    cluster_id = pts.QueryData('cluster_id', PANDEX=True)
    clusters = list(set(cluster_id))

    # dict for the data
    data = {k: [] for k in clusters}
    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_value = this_raster[row_point][col_point]
        if not np.isnan(this_value):
            if this_value < 10:
                # get the cluster id
                data[cluster_id[x]].append(this_value)

    print(data)

    # now make a boxplot
    labels, these_data = [*zip(*data.items())]  # 'transpose' items to parallel key, value lists

    plt.boxplot(these_data)
    plt.xticks(range(1, len(labels) + 1), labels)
    plt.show()
Пример #2
0
def GetLithologyPercentages(DataDirectory, OutDirectory, fname_prefix, raster_name, stream_order=1):
    """
    Get the percentage of the nodes in each cluster that drain each lithology
    """
    from collections import Counter
    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order),data_type ='csv')
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    cluster_id = pts.QueryData('cluster_id', PANDEX=True)
    clusters = list(set(cluster_id))

    # dict for the data
    data = {k: [] for k in clusters}
    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_value = this_raster[row_point][col_point]
        if not np.isnan(this_value):
            data[cluster_id[x]].append(this_value)

    # you have the values. now what percentage are each?
    for key, liths in data.items():
        c = Counter(liths)
        n_ndv = c[0.0]
        print(c)
        [print(x,": ",vals/len(liths) * 100) for x, vals in c.items()]