account_key = (get_setting("azure")["account_key"]
                   if setting_exists("azure") else None)
    store = stuett.ABSStore(
        container="hackathon-on-permafrost",
        prefix=prefix,
        account_name=account_name,
        account_key=account_key,
    )
    annotation_store = stuett.ABSStore(
        container="hackathon-on-permafrost",
        prefix="annotations",
        account_name=account_name,
        account_key=account_key,
    )
else:
    store = stuett.DirectoryStore(Path(data_path).joinpath(prefix))
    if "2017-01-01/20170101_080018.JPG" not in store and "MH36/2017/EHE.D/4D.MH36.A.EHE.D.20171231_230000.miniseed" not in store:
        raise RuntimeError(
            f"Please provide a valid path to the permafrost {prefix} data or see README how to download it"
        )
    annotation_store = stuett.DirectoryStore(
        Path(data_path).joinpath("annotations"))
    if label_filename not in annotation_store:
        print(
            "WARNING: Please provide a valid path to the permafrost annotation data or see README how to download it"
        )


################## START OF IDEA ################
#################################################
def get_seismic_transform():
Exemplo n.º 2
0
# Getting either cloud or local data file
if not args.local:
    account_name = (get_setting("azure")["account_name"]
                    if setting_exists("azure") else "storageaccountperma8980")
    account_key = (get_setting("azure")["account_key"]
                   if setting_exists("azure") else None)
    store = stuett.ABSStore(
        container="hackathon-on-permafrost",
        prefix="timeseries_derived_data_products",
        account_name=account_name,
        account_key=account_key,
    )
else:
    timeseries_folder = Path(data_path).joinpath(
        "timeseries_derived_data_products").resolve()
    store = stuett.DirectoryStore(timeseries_folder)
    if vaisalawxt520windpth_file not in store:
        raise RuntimeError(
            "Please provide a valid path to the permafrost data or see README how to download it"
        )

vaisalawxt520windpth_node = stuett.data.CsvSource(vaisalawxt520windpth_file,
                                                  store=store)
vaisalawxt520windpth = vaisalawxt520windpth_node()

# Create figure
# vaisalawxt520windpth = vaisalawxt520windpth.loc[:,['temperature_5cm','temperature_10cm','temperature_100cm']]
vaisalawxt520windpth = vaisalawxt520windpth.drop(dim="name",
                                                 labels=["position"])

fig = go.Figure()
data_path = Path(args.path)

if not args.local:
    account_name = (get_setting("azure")["account_name"]
                    if setting_exists("azure") else "storageaccountperma8980")
    account_key = (get_setting("azure")["account_key"]
                   if setting_exists("azure") else None)
    store = stuett.ABSStore(
        container="hackathon-on-permafrost",
        prefix="seismic_data/4D/",
        account_name=account_name,
        account_key=account_key,
    )
else:
    seismic_folder = Path(data_path).joinpath("seismic_data/4D/")
    store = stuett.DirectoryStore(seismic_folder)
    if "MH36/2017/EHE.D/4D.MH36.A.EHE.D.20171231_230000.miniseed" not in store:
        raise RuntimeError(
            "Please provide a valid path to the permafrost data or see README how to download it"
        )

seismic_node = stuett.data.SeismicSource(
    store=store,
    station="MH36",
    channel=["EHE", "EHN", "EHZ"],
    start_time="2017-08-02 10:00:00",
    end_time="2017-08-02 10:01:00",
)
""" If you have access to the [arclink service](arclink.ethz.ch/) you can use it to load your data
    You can use the following two lines of code, but be careful not to accidentally publish your password!
    ```
Exemplo n.º 4
0
        if setting_exists("azure")
        else "storageaccountperma8980"
    )
    account_key = (
        get_setting("azure")["account_key"] if setting_exists("azure") else None
    )
    if args.to_data_storage:
        output_store = stuett.ABSStore(
            container="hackathon-on-permafrost",
            prefix="annotations",
            account_name=account_name,
            account_key=account_key,
            blob_service_kwargs={},
        )
    else:
        output_store = stuett.DirectoryStore(annotations_path)
        input_store = stuett.ABSStore(
            container="hackathon-public-rw",
            prefix="",
            account_name=account_name,
            account_key=account_key,
        )
else:
    input_store = stuett.DirectoryStore(args.user_annotations)
    output_store = stuett.DirectoryStore(annotations_path)

print(args.user_annotations)

annotation_dict = {}
for key in input_store.keys():
    fn = Path(key)
    def __init__(
        self,
        local,
        data_path="../data",
        transform=None,
        time_slice={
            "start_time": "2017-01-01",
            "end_time": "2017-12-31"
        },
    ):
        """
        Args:
            local (bool): Whether to read the dataset from a local storage
                location or from a public Azure share.
            data_path (str, optional): If the data should be read from a local
                location, then this folder will denote the location of the
                dataset.
            transform (callable, optional): Optional transform to be applied
                on images.
            time_slice (dict): Can be used to create a different train and test
                set. Note, this is not a pretty solution, especially because
                time values are not interleaved. I.e., if time information is
                used as input to a network, but the network has never seen
                values from the corresponding month, then it can't make
                confident predictions.
        """
        if transform is not None:
            raise NotImplementedError("transform not implemented!")
        self.transform = transform

        # This sensor contains near-surface temperature readings and is on the
        # south side and therefore receives a lot of sunshine.
        rock_temperature_file_mh10 = "MH10_temperature_rock_2017.csv"  # South

        radiation_file = "MH15_radiometer__conv_2017.csv"

        if not local:
            account_name = (get_setting("azure")["account_name"]
                            if setting_exists("azure") else
                            "storageaccountperma8980")
            account_key = (get_setting("azure")["account_key"]
                           if setting_exists("azure") else None)

            ts_store = stuett.ABSStore(
                container="hackathon-on-permafrost",
                prefix="timeseries_derived_data_products",
                account_name=account_name,
                account_key=account_key,
            )

            img_store = stuett.ABSStore(
                container="hackathon-on-permafrost",
                prefix="timelapse_images_fast",
                account_name=account_name,
                account_key=account_key,
            )

        else:
            timeseries_folder = (Path(data_path).joinpath(
                "timeseries_derived_data_products").resolve())
            ts_store = stuett.DirectoryStore(timeseries_folder)
            if rock_temperature_file_mh10 not in store:
                raise RuntimeError("Please provide a valid path to the " +
                                   "permafrost data!")

            img_store = stuett.DirectoryStore(
                Path(data_path).joinpath("timelapse_images_fast"))
            if "2017-01-01/20170101_080018.JPG" not in store:
                raise RuntimeError("Please provide a valid path to the " +
                                   "permafrost images.")

        # self._ts_store = ts_store
        self._img_store = img_store

        ### Load timeseries data.
        rock_temperature_node_mh10 = stuett.data.CsvSource(
            rock_temperature_file_mh10, store=ts_store)
        rock_temp_mh10 = rock_temperature_node_mh10(time_slice)

        radiation_node = stuett.data.CsvSource(radiation_file, store=ts_store)
        radiation = radiation_node(time_slice)

        net_radiation = radiation.loc[:, ["net_radiation"]]
        surface_temp = rock_temp_mh10.loc[:, ["temperature_nearsurface_t2"]]
        target_temp = rock_temp_mh10.loc[:, ["temperature_10cm"]]

        ### Load image filenames.
        image_node = stuett.data.MHDSLRFilenames(
            store=img_store,
            force_write_to_remote=True,
            as_pandas=False,
        )
        image_fns = image_node(time_slice)

        ### Find image filenames that were captured close to temperature
        ### measures.
        # With close we mean within a 20min window.
        # Temperature/radiation values that have no corresponding image are
        # ignored.

        # Sanity check!
        # for t1, t2 in zip(radiation['time'], rock_temp_mh10['time']):
        #    assert (t1 == t2)

        j = 0
        n = len(image_fns["time"])

        measurement_pairs = []

        for i, t in enumerate(rock_temp_mh10["time"].values):
            while j < n:
                # Translate difference in timestamps to minutes before casting
                # to int.
                diff = ((image_fns["time"][j] -
                         t).values.astype("timedelta64[m]").astype(np.int))

                if diff > 10:
                    # Image too far in the future, ignore sensor value.
                    break

                absdiff = np.abs(diff)
                if absdiff < 10:
                    # The image is very close, simply check whether the next
                    # picture is even closer. Otherwise, we take the current
                    # image.
                    if j + 1 < n:
                        absdiff2 = np.abs(
                            (image_fns["time"][j + 1] -
                             t).values.astype("timedelta64[m]").astype(np.int))
                    else:
                        absdiff2 = None

                    if absdiff2 is None or absdiff < absdiff2:
                        measurement_pairs.append((i, j))
                        j += 1
                    else:
                        measurement_pairs.append((i, j + 1))
                        j += 2

                    break

                j += 1

        ### Build dataset (make sure that there are no None values in the
        ### timeseries measurements).
        self._img_fns = []
        self._surface_temp = []
        self._target_temp = []
        self._timestamps = []
        self._radiation = []

        # This is coarse time information that one may provide as additional
        # information. We encode the (normalized) month and daytime information,
        # as this information may be quite helpful when judging temperature
        # values.
        # Though, it might also tempt the regression system to ignore all
        # other information and solely predict based on this information
        # (as a strong local minimum).
        self._month = []
        self._daytime = []

        assert np.all(~np.isnan(net_radiation.values))
        assert np.all(~np.isnan(surface_temp.values))
        # assert(np.all(~np.isnan(target_temp.values)))

        for i, j in measurement_pairs:
            if np.any(np.isnan(target_temp.values[i, 0])):
                continue

            self._target_temp.append(target_temp.values[i, 0])
            self._surface_temp.append(surface_temp.values[i, 0])
            self._radiation.append(net_radiation.values[i, 0])

            self._timestamps.append(target_temp["time"].values[i])
            ts = pd.to_datetime(self._timestamps[-1])
            self._month.append(ts.month)
            self._daytime.append(ts.hour * 60 + ts.minute)

            self._img_fns.append(str(image_fns.values[0, j]))

        self._target_temp = np.array(self._target_temp, dtype=np.float32)
        self._surface_temp = np.array(self._surface_temp, dtype=np.float32)
        self._radiation = np.array(self._radiation, dtype=np.float32)

        self._month = np.array(self._month, dtype=np.float32)
        self._daytime = np.array(self._daytime, dtype=np.float32)

        # Normalize regression values.
        self.target_temp_mean = self._target_temp.mean()
        self.target_temp_std = self._target_temp.std()

        self.surface_temp_mean = self._surface_temp.mean()
        self.surface_temp_std = self._surface_temp.std()

        self.radiation_mean = self._radiation.mean()
        self.radiation_std = self._radiation.std()

        self._target_temp = (self._target_temp -
                             self.target_temp_mean) / self.target_temp_std

        self._surface_temp = (self._surface_temp -
                              self.surface_temp_mean) / self.surface_temp_std

        self._radiation = (self._radiation -
                           self.radiation_mean) / self.radiation_std

        self._month = (self._month - self._month.mean()) / self._month.std()
        self._daytime = (self._month -
                         self._daytime.mean()) / self._daytime.std()

        print("dataset contains %d samples." % len(self._img_fns))
Exemplo n.º 6
0
        get_setting("azure")["account_name"]
        if setting_exists("azure")
        else "storageaccountperma8980"
    )
    account_key = (
        get_setting("azure")["account_key"] if setting_exists("azure") else None
    )
    annotation_store = stuett.ABSStore(
        container="hackathon-on-permafrost",
        prefix="annotations",
        account_name=account_name,
        account_key=account_key,
        blob_service_kwargs={},
    )
else:
    annotation_store = stuett.DirectoryStore(annotations_path)


df = stuett.read_csv_with_store(annotation_store, "automatic_labels_mountaineers.csv")
df['start_time'] = pd.to_datetime(df['start_time'] )
df['end_time'] = pd.to_datetime(df['end_time'] )
df.index = df['start_time']
df = df['2017-08-01':'2017-08-02']


fig = go.Figure(
    layout=dict(
        xaxis={"type": "date"},
        xaxis_range=[
            pd.to_datetime("2017-08-01"),
            pd.to_datetime("2017-08-02"),