Пример #1
0
def BoxPlotByCluster(DataDirectory, OutDirectory, fname_prefix,  raster_name, stream_order=1):
    """
    Make a boxplot of the results of the clustering compared to the raster specified
    by raster_name
    """
    #df = pd.read_csv(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order))

    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order),data_type ='csv')
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    cluster_id = pts.QueryData('cluster_id', PANDEX=True)
    clusters = list(set(cluster_id))

    # dict for the data
    data = {k: [] for k in clusters}
    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_value = this_raster[row_point][col_point]
        if not np.isnan(this_value):
            if this_value < 10:
                # get the cluster id
                data[cluster_id[x]].append(this_value)

    print(data)

    # now make a boxplot
    labels, these_data = [*zip(*data.items())]  # 'transpose' items to parallel key, value lists

    plt.boxplot(these_data)
    plt.xticks(range(1, len(labels) + 1), labels)
    plt.show()
Пример #2
0
def GetLithologyPercentages(DataDirectory, OutDirectory, fname_prefix, raster_name, stream_order=1):
    """
    Get the percentage of the nodes in each cluster that drain each lithology
    """
    from collections import Counter
    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(OutDirectory+fname_prefix+'_profiles_clustered_SO{}.csv'.format(stream_order),data_type ='csv')
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    cluster_id = pts.QueryData('cluster_id', PANDEX=True)
    clusters = list(set(cluster_id))

    # dict for the data
    data = {k: [] for k in clusters}
    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_value = this_raster[row_point][col_point]
        if not np.isnan(this_value):
            data[cluster_id[x]].append(this_value)

    # you have the values. now what percentage are each?
    for key, liths in data.items():
        c = Counter(liths)
        n_ndv = c[0.0]
        print(c)
        [print(x,": ",vals/len(liths) * 100) for x, vals in c.items()]
Пример #3
0
def MakeBoxPlotsKsnLithology(DataDirectory, fname_prefix, raster_name, theta=0.45, label_list=[]):
    """
    Make boxplots of ksn compared to lithology raster. Lithology should have integer
    values for the different rock types (rock type with 0 is excluded). Pass in list of
    labels for the different units, which must be the same length as the number of lithology codes.
    If none is passed then just use the integer values for labelling.
    """
    from scipy import stats

    # read in the raster
    raster_ext = '.bil'
    this_raster = IO.ReadRasterArrayBlocks(DataDirectory+raster_name)
    #EPSG_string = IO.GetUTMEPSG(DataDirectory+raster_name)
    EPSG_string='epsg:32611'
    print(EPSG_string)
    NDV, xsize, ysize, GeoT, Projection, DataType = IO.GetGeoInfo(DataDirectory+raster_name)
    CellSize,XMin,XMax,YMin,YMax = IO.GetUTMMaxMin(DataDirectory+raster_name)

    pts = PT.LSDMap_PointData(DataDirectory+fname_prefix+'_ksn.csv',data_type ='csv')
    print(pts)
    easting, northing = pts.GetUTMEastingNorthing(EPSG_string=EPSG_string)
    ksn = pts.QueryData('ksn', PANDEX=True)
    #print(ksn)

    # get the unique values in the raster
    raster_values = np.unique(this_raster)
    raster_values = raster_values[1:]


    # dict for the data
    data = {k: [] for k in raster_values}

    for x, (i, j) in enumerate(zip(northing, easting)):
    # convert to rows and cols
        X_coordinate_shifted_origin = j - XMin;
        Y_coordinate_shifted_origin = i - YMin;

        col_point = int(X_coordinate_shifted_origin/CellSize);
        row_point = (ysize - 1) - int(round(Y_coordinate_shifted_origin/CellSize));
        # check for data at this cell
        this_raster_value = this_raster[row_point][col_point]
        if not np.isnan(this_raster_value) and this_raster_value != 0:
            data[this_raster_value].append(ksn[x])

    # set up a figure
    fig,ax = plt.subplots(nrows=1,ncols=1, figsize=(5,5), sharex=True, sharey=True)

    labels, dict = [*zip(*data.items())]  # 'transpose' items to parallel key, value lists
    print(label_list)
    if label_list:
        labels = label_list
    print(labels)
    box = plt.boxplot(dict, patch_artist=True)
    plt.xticks(range(1, len(labels) + 1), labels)
    plt.ylabel('$k_{sn}$', fontsize=14)

    # get the medians for plotting as an upper label
    medians = []
    print("========SOME KSN STATISTICS=========")
    for key, value in data.items():
        print("Key {}, median ksn = {}".format(key, np.median(value)))
        medians.append(np.median(value))
        print("Key {}, IQR = {}".format(key, stats.iqr(value)))
    print("========================================")
    pos = np.arange(len(labels)) + 1
    upperLabels = [str(np.round(s, 2)) for s in medians]

    # change the colours for each lithology
    colors=['#60609fff', '#fdbb7fff', '#935353ff', '#f07b72ff']
    for patch, color in zip(box['boxes'], colors):
        patch.set_facecolor(color)
        patch.set_alpha(0.9)
        patch.set_edgecolor('k')
    for cap in box['caps']:
        cap.set(color='k')
    for wh in box['whiskers']:
        wh.set(color='k')
    for med in box['medians']:
        med.set(color='k')
    for flier, color in zip(box['fliers'], colors):
        flier.set_markeredgecolor(color)
        flier.set_markerfacecolor(color)
        flier.set_markersize(2)

    # for tick, label in zip(range(len(labels)), ax.get_xticklabels()):
    #     k = tick % 2
    #     ax.text(pos[tick], top - (top*0.05), upperLabels[tick],
    #              horizontalalignment='center', color=colors[k])

    ax.grid(color='0.8', linestyle='--', which='major', zorder=1)
    plt.title('Boxplots of $k_{sn}$ by lithology', fontsize=14)
    plt.savefig(DataDirectory+fname_prefix+'_boxplot_lith_ksn.png', dpi=300, transparent=True)
    plt.clf()



    # Do some stats, yo
    # KS test to see if we can distinguish the distributions at a confidence level of p = 0.05
    # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.kstest.html
    # relief
    keys = list(data.keys())
    values = list(data.values())
    k=0
    for i in range(len(keys)-1):
        for j in range(i+1, len(keys)):
            print("KS test between {} and {}".format(keys[i], keys[j]))
            d, p = stats.ks_2samp(values[i], values[j])
            print(d, p)
            k += 1
Пример #4
0
def PlotLithologyWithClusters(DataDirectory,
                              OutDirectory,
                              fname_prefix,
                              stream_order=1,
                              shapefile_name='geol.shp',
                              geol_field='geol'):
    """
        Make a hillshade of the raster with the channels coloured by the cluster
        value. Rasterise a geology shapefile and drape on the top.
        Uses the LSDPlottingTools libraries. https://github.com/LSDtopotools/LSDMappingTools

        Args:
            stream_order: the stream order of the profiles that you are analysing
            shapefile_name: name of the lithology shapefile
            geol_field: the field of the shapefile that has the lithology information

        Author: FJC
        """
    import LSDPlottingTools as LSDP
    from LSDMapFigure.PlottingRaster import MapFigure

    df = pd.read_csv(DataDirectory + fname_prefix + '_all_tribs.csv')
    cluster_df = pd.read_csv(
        OutDirectory + fname_prefix +
        '_profiles_clustered_SO{}.csv'.format(stream_order))

    # set figure sizes based on format
    fig_width_inches = 4.92126

    # some raster names
    raster_ext = '.bil'
    BackgroundRasterName = fname_prefix + raster_ext
    HSName = fname_prefix + '_hs' + raster_ext

    if not os.path.isfile(DataDirectory + HSName):
        print("Making a hillshade for you")
        # make a hillshade
        BM.GetHillshade(DataDirectory + BackgroundRasterName,
                        DataDirectory + HSName)

    # create the map figure
    MF = MapFigure(HSName, DataDirectory, coord_type="UTM")
    res = IO.GetUTMMaxMin(DataDirectory + BackgroundRasterName)[0]

    #rasterise the shapefile
    new_shp, geol_dict = VT.geologic_maps_modify_shapefile(
        DataDirectory + shapefile_name, geol_field)
    lith_raster = VT.Rasterize_geologic_maps_pythonic(new_shp, res, geol_field)
    MF.add_drape_image(lith_raster,
                       "",
                       colourmap=plt.cm.jet,
                       alpha=0.4,
                       show_colourbar=False,
                       discrete_cmap=True,
                       cbar_type=int,
                       mask_value=0)

    clusters = cluster_df.cluster_id.unique()
    for cl in clusters:
        # plot the whole channel network in black
        ChannelPoints = LSDP.LSDMap_PointData(df,
                                              data_type="pandas",
                                              PANDEX=True)
        MF.add_point_data(ChannelPoints,
                          show_colourbar="False",
                          unicolor='0.9',
                          manual_size=2,
                          zorder=1,
                          alpha=0.5)
        # plot the clustered profiles in the correct colour
        this_df = cluster_df[cluster_df.cluster_id == cl]
        this_colour = str(this_df.colour.unique()[0])
        ClusteredPoints = LSDP.LSDMap_PointData(this_df,
                                                data_type="pandas",
                                                PANDEX=True)
        MF.add_point_data(ClusteredPoints,
                          show_colourbar="False",
                          zorder=100,
                          unicolor=this_colour,
                          manual_size=3)

    MF.save_fig(fig_width_inches=fig_width_inches,
                FigFileName=OutDirectory + fname_prefix +
                '_lith_clusters_SO{}.png'.format(stream_order),
                FigFormat='png',
                Fig_dpi=300,
                fixed_cbar_characters=6,
                adjust_cbar_characters=False,
                transparent=True)  # Save the figure