Пример #1
0
def extract_statistics(image_file, boxes_gpd, n_retain, spectra_ml_csv):
    spectra_ml = pd.read_csv(spectra_ml_csv, index_col=0)
    arr, meta = rio_read_all_bands(image_file)
    arr = rescale_s2(arr)
    osm_file = os.path.join(dir_osm, "osm%s" % os.path.basename(image_file))
    lat, lon = lat_from_meta(meta), lon_from_meta(meta)
    bbox_epsg4326 = list(np.flip(metadata_to_bbox_epsg4326(meta)))
    osm_mask = get_osm_mask(bbox_epsg4326, meta["crs"], arr[0], {
        "lat": lat,
        "lon": lon
    }, dir_osm)
    meta["count"] = 1
    meta["dtype"] = osm_mask.dtype
    with rio.open(osm_file, "w", **meta) as tgt:
        tgt.write(osm_mask, 1)
    arr *= osm_mask
    n_bands = 3
    ratios = np.zeros((n_bands + 1, arr.shape[1], arr.shape[2]))
    ratio_counterparts = [2, 0, 0]
    for band_idx in range(n_bands):
        ratios[band_idx] = normalized_ratio(arr[band_idx],
                                            arr[ratio_counterparts[band_idx]])
    ratios[3] = normalized_ratio(arr[1], arr[2])  # add green vs. blue
    lat, lon = lat_from_meta(meta), lon_from_meta(meta)
    # shift lat lon to pixel center
    lat_shifted, lon_shifted = shift_lat(lat, 0.5), shift_lon(lon, 0.5)
    #    boxes_training.index = range(len(boxes_training))
    boxes_training = boxes_gpd
    means_arr = [np.nanmean(arr[band_idx]) for band_idx in [0, 1, 2, 3]]
    np.random.seed(99)
    for i in np.random.choice(list(range(len(boxes_training))),
                              n_retain,
                              replace=False):
        box = boxes_training.geometry[i].bounds
        x0, x1 = get_smallest_deviation(lon_shifted,
                                        box[0]), get_smallest_deviation(
                                            lon_shifted, box[2])
        y1, y0 = get_smallest_deviation(lat_shifted,
                                        box[1]), get_smallest_deviation(
                                            lat_shifted, box[3])
        sub_arr = arr[0:4, y0:y1 + 1, x0:x1 + 1].copy()
        sub_ratios = ratios[:, y0:y1 + 1, x0:x1 + 1].copy()
        spectra_ml = extract_rgb_spectra(spectra_ml, sub_arr, sub_ratios,
                                         means_arr)
        arr[:, y0:y1 + 1, x0:x1 +
            1] = np.nan  # mask out box reflectances in order to avoid using them as background
        ratios[:, y0:y1 + 1, x0:x1 + 1] = np.nan
    print("Number of training boxes: %s" % n_retain)
    # ensure equal number of blueish, greenish and reddish spectra
    spectra_ml = add_background(spectra_ml, arr, ratios, means_arr,
                                int(n_retain))
    spectra_ml.to_csv(spectra_ml_csv)
Пример #2
0
 def preprocess_bands(self, band_stack, subset_box=None):
     bands_rescaled = band_stack[0:4].copy()
     bands_rescaled[np.isnan(bands_rescaled)] = 0
     bands_rescaled = rescale_s2(bands_rescaled)
     bands_rescaled[bands_rescaled == 0] = np.nan
     band_stack = None
     self.lat, self.lon = lat_from_meta(self.meta), lon_from_meta(self.meta)
     if subset_box is not None:
         ymin, ymax, xmin, xmax = subset_box["ymin"], subset_box[
             "ymax"], subset_box["xmin"], subset_box["xmax"]
         bands_rescaled = bands_rescaled[:, ymin:ymax, xmin:xmax]
         self.lat, self.lon = self.lat[ymin:ymax], self.lon[xmin:xmax]
         self.meta["height"], self.meta["width"] = bands_rescaled.shape[
             1], bands_rescaled.shape[2]
         t = list(self.meta["transform"])
         t[2], t[5] = self.lon[0], self.lat[0]
         self.meta["transform"] = Affine(t[0], t[1], t[2], t[3], t[4], t[5])
     bbox_epsg4326 = list(np.flip(metadata_to_bbox_epsg4326(self.meta)))
     osm_mask = self._get_osm_mask(
         bbox_epsg4326, "EPSG:" + str(self.meta["crs"].to_epsg()),
         bands_rescaled[0], {
             "lat": self.lat,
             "lon": self.lon
         }, dirs["osm"])
     if np.count_nonzero(osm_mask) == 0:
         raise ValueError("No OSM roads of requested road types in aoi")
     bands_rescaled *= osm_mask
     bands_rescaled[bands_rescaled == 0] = np.nan
     osm_mask = None
     self._build_variables(bands_rescaled)
Пример #3
0
 def pre_process(self, band_dict, metadata, subset_box=None):
     """
     rescales data to 0-1 and calculates lat, lon coordinates, masks to OSM roads
     :param band_dict: dict holding 3 arrays with shape (height, width), keys are B02, B03, B04, B08
     :param metadata: dict metadata from rasterio IO
     :param subset_box: dict with int ymin, ymax, xmin, xmax
     """
     self.metadata = metadata
     if not isinstance(band_dict, dict):
         raise TypeError("'band_dict' must be a dictionary")
     try:
         test = band_dict["B02"], band_dict["B03"], band_dict[
             "B04"], band_dict["B08"]
     except KeyError:
         raise KeyError(
             "'band_dict' must contain 'B02', 'B03', 'B04', 'B08'")
     if not isinstance(metadata, dict):
         raise TypeError("'metadata' must be a dictionary")
     self.crs = metadata["crs"]
     try:
         self.lat, self.lon = metadata["lat"], metadata["lon"]
     except KeyError:
         try:
             self.lat, self.lon = lat_from_meta(metadata), lon_from_meta(
                 metadata)
         except KeyError as e:
             raise e
     box_epsg4326 = metadata_to_bbox_epsg4326(metadata)
     if not os.path.exists(self.dir_ancil):
         os.mkdir(self.dir_ancil)
     self.box_epsg4326 = list(np.flip(box_epsg4326))
     osm_mask = self.get_osm_mask(self.box_epsg4326, metadata["crs"],
                                  band_dict["B02"], {
                                      "lat": self.lat,
                                      "lon": self.lon
                                  }, self.dir_ancil)
     osm_mask[osm_mask != 0] = 1
     osm_mask[osm_mask == 0] = np.nan
     band_stack_np = np.array([
         band_dict["B04"], band_dict["B03"], band_dict["B02"],
         band_dict["B08"]
     ])
     band_stack_np *= osm_mask
     try:
         band_stack_np = band_stack_np[:, subset_box["ymin"]:subset_box[
             "ymax"], subset_box["xmin"]:subset_box["xmax"]]
         self.lat = self.lat[subset_box["ymin"]:subset_box["ymax"] + 1]
         self.lon = self.lon[subset_box["xmin"]:subset_box["xmax"] + 1]
     except TypeError:  # subset_box is allowed to be None
         pass
     band_stack_np_rescaled = band_stack_np.copy()
     band_stack_np = None
     band_stack_np_rescaled[np.isnan(band_stack_np_rescaled)] = 0
     band_stack_np_rescaled = rescale(band_stack_np_rescaled, 0, 1)
     band_stack_np_rescaled[band_stack_np_rescaled == 0] = np.nan
     return band_stack_np_rescaled
Пример #4
0
 def validate_boxes(self):
     tiles_pd = pd.read_csv(os.path.join(dir_training, "tiles.csv"), sep=",")
     try:
         os.remove(boxes_validation_file)
     except FileNotFoundError:
         pass
     boxes_validation_pd = pd.DataFrame()
     tiles = list(tiles_pd["validation_tiles"])
     for tile in tiles:
         print(tile)
         try:
             imgs = np.array(glob(dir_s2_subsets + os.sep + "*" + tile + "*.tif"))
         except TypeError:  # nan
             continue
         validation_boxes = gpd.read_file(glob(os.path.join(dir_labels, "*%s*.gpkg" % tile))[0])
         try:
             prediction_boxes_file = glob(os.path.join(self.dirs["detections"], "*%s*.gpkg" % tile))[0]  # fail
         except IndexError:
             lens = np.int32([len(x) for x in imgs])
             img_file = imgs[np.where(lens == lens.max())[0]][0]
             name = os.path.basename(img_file).split(".tif")[0]
             print(img_file)
             # read labels
             prediction_boxes_file = os.path.join(self.dirs["detections"], name + "_boxes.gpkg")
             rf_td = RFTruckDetector()
             band_data = rf_td.read_bands(img_file)
             # subset to label extent
             lat, lon = lat_from_meta(rf_td.meta), lon_from_meta(rf_td.meta)
             extent = validation_boxes.total_bounds  # process only subset where boxes given
             diff_ymin, diff_ymax = np.abs(lat - extent[3]), np.abs(lat - extent[1])
             diff_xmin, diff_xmax = np.abs(lon - extent[0]), np.abs(lon - extent[2])
             ymin, ymax = np.argmin(diff_ymin), np.argmin(diff_ymax)
             xmin, xmax = np.argmin(diff_xmin), np.argmin(diff_xmax)
             rf_td.preprocess_bands(band_data, {"ymin": ymin, "xmin": xmin, "ymax": ymax + 1, "xmax": xmax + 1})
             # do detection
             prediction_array = rf_td.predict()
             prediction_boxes = rf_td.extract_objects(prediction_array)
             rf_td.prediction_raster_to_gtiff(prediction_array,
                                              os.path.join(self.dirs["detections"], name + "_raster"))
             rf_td.prediction_boxes_to_gpkg(prediction_boxes, prediction_boxes_file)
         prediction_boxes = gpd.read_file(prediction_boxes_file)
         prediction_array, band_data, rf_td = None, None, None
         # iterate over score thresholds and plot precision and recall curve
         for score_threshold in np.arange(0, 2, 0.1):
             prediction_boxes = prediction_boxes[prediction_boxes["score"] > score_threshold]
             tp = 0
             intersection_over_union = []
             yet_seen_validation_boxes = []
             for row_idx, prediction_box in enumerate(prediction_boxes.geometry):
                 for validation_box in validation_boxes.geometry:
                     if validation_box in yet_seen_validation_boxes:
                         continue
                     else:
                         if prediction_box.intersects(validation_box):
                             union = prediction_box.union(validation_box)
                             intersection = prediction_box.intersection(validation_box)
                             iou = intersection.area/union.area
                             if iou > 0.25:
                                 yet_seen_validation_boxes.append(validation_box)
                                 intersection_over_union.append(iou)
                                 tp += 1
                                 break
         #    for validation_box in validation_boxes.geometry:
          #       for prediction_box in prediction_boxes.geometry:
           #          if validation_box.intersects(prediction_box):
            #             union = prediction_box.union(validation_box)
             #            intersection = prediction_box.intersection(validation_box)
              #           iou = intersection.area/union.area
               #          if iou > 0.25:
                #             validation_positive += 1
                 #        break
             try:
                 precision = tp / len(prediction_boxes)
             except ZeroDivisionError:
                 precision = 0
             fn = len(validation_boxes) - tp
             fp = len(prediction_boxes) - tp
             recall = tp / (tp + fn)
             row_idx = len(boxes_validation_pd)
             boxes_validation_pd.loc[row_idx, "detection_file"] = prediction_boxes_file
             boxes_validation_pd.loc[row_idx, "accuracy"] = tp / (tp + fp + fn)
             boxes_validation_pd.loc[row_idx, "precision"] = precision
             boxes_validation_pd.loc[row_idx, "recall"] = recall
             boxes_validation_pd.loc[row_idx, "score_threshold"] = score_threshold
             boxes_validation_pd.loc[row_idx, "n_prediction_boxes"] = len(prediction_boxes)
             boxes_validation_pd.loc[row_idx, "n_validation_boxes"] = len(validation_boxes)
             boxes_validation_pd.loc[row_idx, "IoU"] = np.mean(intersection_over_union)
     boxes_validation_pd.to_csv(boxes_validation_file)
Пример #5
0
    def validate_acquisition_wise(self, period):
        dates_between = self.generate_process_periods(period)
        station_folder = "zst" + self.station_name.split("(")[1].split(")")[0]
        wrong = len(station_folder) == 4
        station_folder = "zst" + self.station_name.split(") ")[1].split("(")[1][0:-1] if wrong else station_folder
        station_file = os.path.join(self.dirs["station_counts"], station_folder, station_folder + "_%s.csv" %
                                    str(year))

        for sub_period in dates_between:
            print("At date: %s" % sub_period)
            self.date = sub_period[0]
            band_names, resolution, folder = ["B04", "B03", "B02", "B08", "CLM"], 10, ""
            dir_save_archive = os.path.join(self.dirs["s2"], "archive")

            dir_save_archive = "G:\\archive"

         #   if not os.path.exists(dir_save_archive):
          #      os.mkdir(dir_save_archive)
            area_id = additional_stations[self.station_name_clear] if self.station_name_clear in additional_stations.keys() \
                else self.station_name_clear
            sh_bbox = (self.bbox_epsg4326[1], self.bbox_epsg4326[0], self.bbox_epsg4326[3], self.bbox_epsg4326[2])
            detections_file = os.path.join(self.dirs["detections"], "s2_detections_%s_%s.gpkg" %
                                           (self.date, area_id))
            merged_file = os.path.join(dir_save_archive, "s2_bands_%s_%s_%s_merged.tiff" % (area_id,
                                                                                            sub_period[0],
                                                                                            sub_period[1]))
            if os.path.exists(detections_file):
                self.validate_with_bast(sub_period[0], detections_file, station_file, merged_file)
                continue
            else:

                if os.path.exists(merged_file):
                    detector = RFTruckDetector()
                    band_stack = detector.read_bands(merged_file)
                    detector.preprocess_bands(band_stack[0:4])
                    prediction = detector.predict()
                    prediction_boxes = detector.extract_objects(prediction)
                    try:
                        detector.prediction_boxes_to_gpkg(prediction_boxes, detections_file)
                    except ValueError:
                        print("Number of detections: %s, cannot write" % len(prediction_boxes))
                        continue
                    self.detections_file = detections_file
                    with rio.open(merged_file, "r") as src:
                        meta = src.meta
                    self.lat = lat_from_meta(meta)
                    self.lon = lon_from_meta(meta)
                    detector, band_stack_np = None, None
                    self.validate_with_bast(sub_period[0], detections_file, station_file, merged_file)
                    continue
                else:
                    continue

                kwargs = dict(bbox=sh_bbox, period=sub_period, dataset=DataCollection.SENTINEL2_L2A,
                              bands=["B04", "B03", "B02", "B08"], resolution=resolution, dir_save=self.dirs["s2"],
                              merged_file=merged_file, mosaicking_order="leastCC")
                data_yet_there, sh = os.path.exists(merged_file), SentinelHub()
                obs_file = os.path.join(dir_save_archive, "obs.csv")  # check if acquisition has been checked
                yet_checked = False
                try:
                    obs_pd = pd.read_csv(obs_file, index_col=0)
                except FileNotFoundError:
                    obs_pd = pd.DataFrame()
                    try:
                        obs_pd.to_csv(obs_file)
                    except FileNotFoundError:
                        os.mkdir(os.path.dirname(obs_file))
                        obs_pd.to_csv(obs_file)
                try:
                    yet_checked = sub_period[0] in np.array(obs_pd[merged_file])
                except KeyError:
                    pass
                finally:
                    if yet_checked:
                        continue
                if data_yet_there:
                    data_available = True
                else:
                    sh.set_credentials(SH_CREDENTIALS_FILE)
                    data_available = sh.data_available(kwargs)
                if data_available:
                    if data_yet_there:
                        has_obs = data_yet_there
                    else:
                        # check if data has enough non-cloudy observations
                        kwargs_copy = kwargs.copy()
                        kwargs_copy["bands"] = ["CLM"]  # get cloud mask in order to check if low cloud coverage
                        kwargs_copy["merged_file"] = os.path.join(dir_save_archive, "clm.tiff")
                        clm, data_folder = sh.get_data(**kwargs_copy)  # get only cloud mask
                        has_obs = self.has_observations(kwargs_copy["merged_file"])
                        try:
                            os.remove(kwargs_copy["merged_file"])  # cloud mask
                        except FileNotFoundError:
                            pass
                    if has_obs:
                        print("Processing: %s" % sub_period[0])
                        band_stack, folder = sh.get_data(**kwargs)  # get full data
                        detector = RFTruckDetector()
                        band_stack = detector.read_bands(merged_file)
                        detector.preprocess_bands(band_stack[0:4])
                        prediction = detector.predict()
                        prediction_boxes = detector.extract_objects(prediction)
                        try:
                            detector.prediction_boxes_to_gpkg(prediction_boxes, detections_file)
                        except ValueError:
                            print("Number of detections: %s, cannot write" % len(prediction_boxes))
                            continue
                        self.detections_file = detections_file
                        with rio.open(merged_file, "r") as src:
                            meta = src.meta
                        self.lat = lat_from_meta(meta)
                        self.lon = lon_from_meta(meta)
                        detector, band_stack_np = None, None
                        self.validate_with_bast(sub_period[0], detections_file, station_file, merged_file)  # run comparison
                    else:
                        # add date for file in order to avoid duplicate check
                        self.register_non_available_date(sub_period[0], obs_pd, obs_file, merged_file)
                else:
                    self.register_non_available_date(sub_period[0], obs_pd, obs_file, merged_file)
        self.plot_bast_comparison(self.validation_file)