Пример #1
0
    def get(self, url_part, params=None, headers=None):
        """
        Login Data Source if not already logged in.
        Access url with the Authorization header and the access token

        Authorization Header:
            - Authorization": "{token_type} {access_token}

        :param url_part: The url part to request
        :param params: additional parameters for the request
        :type params: dict
        :param headers: request headers
        :return: None
        :raises: PermissionDenied
        """
        self._validate_token()

        # Prepare the Authorization header
        auth_headers = {
            "Authorization": "{token_type} {access_token}".format(**self.token)
        }
        if headers:
            auth_headers.update(headers)

        return get_url(url_part,
                       params=params,
                       headers=auth_headers,
                       verify=self.verify_ssl)
Пример #2
0
    def get_hydrological_unit_codes(self):
        """Get the hydrological unit codes for USGS"""

        try:
            response = get_url(URL_USGS_HUC, timeout=0.5)
            if response.status_code == 200:
                return response.text
        except requests.exceptions.ReadTimeout:
            logger.warning(
                f"Read Timeout for {URL_USGS_HUC} - Failing over to stored HUC codes"
            )
        except requests.exceptions.ConnectTimeout:
            logger.warning(
                f"Connection Timeout for {URL_USGS_HUC} - Failing over to stored HUC codes"
            )

        return usgs_huc_codes.CONTENT
Пример #3
0
    def get_observed_properties_variables(self, usgs_sites):
        """
        Get a dictionary of location variables for the given location results
        :param usgs_sites: datasource JSON object of the locations
        :return:
        """

        # Gather the location ids and get the parameters available
        # Only search for the mean data statCd=00003.
        url = '{}dv?huc={}&format=json&statCd=00003'.format(
            self.datasource.location, ",".join(usgs_sites))
        response_variables = get_url(url)
        observed_properties_variables = {}
        if response_variables and response_variables.status_code == 200:
            for location in response_variables.json()['value']['timeSeries']:
                _, location_id, parameter, statistic = location['name'].split(
                    ":")
                # We only want the mean
                observed_properties_variables.setdefault(location_id, [])
                observed_properties_variables[location_id].append(parameter)
            logging.debug(
                "Location DataTypes: {}".format(observed_properties_variables))
        return observed_properties_variables
Пример #4
0
def test_get_url():
    """Test get url"""

    response = get_url("http://www.google.com")
    assert response
    assert response.status_code == 200
Пример #5
0
def generator_usgs_measurement_timeseries_tvp_observation(
        view, query: QueryMeasurementTimeseriesTVP):
    """
    Get the Data Points for USGS Daily Values

    =================== === ===================
    USGS NWIS               Broker
    =================== === ===================
    ``nwis/dv``          >> ``data_points/``
    =================== === ===================

    :param view: The request object ( Please refer to the Django documentation ).
    :param query: Query information for this request
    :returns: a generator object that yields :class:`~basin3d.synthesis.models.field.DataPoint`
        objects
    """

    # Temporal resolution is always daily.
    search_params: List[Tuple[str, Any]] = list()

    search_params.append(("startDT", query.start_date))

    if query.end_date:
        search_params.append(("endDT", query.end_date))

    search_params.append(
        ("parameterCd",
         ",".join([str(o) for o in query.observed_property_variables])))

    if query.statistic:
        statistics: List[str] = []
        for stat in query.statistic:
            sythesized_stat = USGS_STATISTIC_MAP.get(stat)
            if not sythesized_stat:
                logger.info(
                    f"USGS Daily Values service does not support statistic {stat}"
                )
            else:
                statistics.append(sythesized_stat)
        search_params.append(("statCd", ",".join(statistics)))
    else:
        search_params.append(("siteStatus", "all"))

    if len(query.monitoring_features[0]) > 2:
        # search for stations
        search_params.append(("sites", ",".join(query.monitoring_features)))
    else:
        # search for stations by specifying the huc
        search_params.append(("huc", ",".join(query.monitoring_features)))

    # look for station locations only
    search_params.append(("siteType", "ST"))

    # JSON format
    search_params.append(("format", "json"))

    # Request the data points
    response = get_url('{}dv'.format(view.datasource.location),
                       params=search_params)

    if response.status_code == 200:
        try:

            json_obj = response.json()

            # There is a valid json response
            if json_obj:
                timeseries_json = json_obj['value']['timeSeries']

                # Iterate over monitoring_features
                for data_json in timeseries_json:
                    yield data_json

        except json.decoder.JSONDecodeError:

            logger.error("JSON Not Returned: {}".format(response.content))
    else:
        import re
        p = re.compile(r'<.*?>')
        logger.error("HTTP {}: {}".format(response.status_code,
                                          p.sub(' ', response.text)))
Пример #6
0
    def list(self, query: QueryMeasurementTimeseriesTVP):
        """
        Get the Data Points for USGS Daily Values

        =================== === ======================
        USGS NWIS               Broker
        =================== === ======================
        ``nwis/dv``          >> ``measurement_tvp_timeseries/``
        =================== === ======================

        :returns: a generator object that yields :class:`~basin3d.synthesis.models.measurement.MeasurementTimeseriesTVPObservation`
            objects
        """
        search_params = ""
        feature_obj_dict = {}
        if not query.monitoring_features:
            return None

        search_params = ",".join(query.monitoring_features)

        url = '{}site/?sites={}'.format(self.datasource.location,
                                        search_params)

        if len(search_params) < 3:
            url = '{}site/?huc={}'.format(self.datasource.location,
                                          search_params)

        usgs_site_response = None
        try:
            usgs_site_response = get_url(url)
            logging.debug("{}.{}".format(self.__class__.__name__, "list"),
                          url=url)
        except Exception as e:
            logging.warning(
                "Could not connect to USGS site info: {}".format(e))

        if usgs_site_response:
            for v in iter_rdb_to_json(usgs_site_response.text):
                if v["site_no"]:
                    feature_obj_dict[v["site_no"]] = v

        # Iterate over data objects returned
        for data_json in generator_usgs_measurement_timeseries_tvp_observation(
                self, query):
            unit_of_measurement = data_json["variable"]["unit"]['unitCode']
            timezone_offset = data_json["sourceInfo"]["timeZoneInfo"][
                "defaultTimeZone"]["zoneOffset"]

            # name has agency, sitecode, parameter id and stat code
            #   e.g. "USGS:385106106571000:00060:00003"
            _, feature_id, parameter, statistic = data_json["name"].split(":")

            if feature_id in feature_obj_dict.keys():
                monitoring_feature = _load_point_obj(
                    datasource=self,
                    json_obj=feature_obj_dict[feature_id],
                    feature_observed_properties=dict(),
                    observed_property_variables=
                    "Find observed property variables at monitoring feature url"
                )
            else:
                # ToDo: expand this to use the info in the data return
                # ToDo: log message
                monitoring_feature = None

            # deal with statistic
            basin3d_statistic = "NOT SET"
            if statistic:
                basin3d_statistic = map_statistic_code(statistic)

            result_points = []
            result_qualifiers = set()

            for values in data_json["values"]:
                result_qualifiers.update(
                    self.get_result_qualifiers(values["qualifier"]))

                for value in values["value"]:

                    # VAL: with result quality here, the quality of last value in the timeseries will be used.
                    #      Is the value the same throughout the time series?
                    result_quality = self.result_quality(value['qualifiers'])

                    # TODO: write some tests for this which will require mocking a data return.
                    # Only filter if quality_checked is True
                    # if QUERY_PARAM_RESULT_QUALITY not in kwargs or not kwargs[QUERY_PARAM_RESULT_QUALITY] or \
                    #         (QUERY_PARAM_RESULT_QUALITY in kwargs and kwargs[
                    #          QUERY_PARAM_RESULT_QUALITY] == result_quality):
                    if not query.result_quality or query.result_quality == result_quality:

                        # Get the broker parameter
                        try:
                            try:
                                data: Optional[float] = float(value['value'])
                                # Hardcoded unit conversion for river discharge parameters
                                data, unit_of_measurement = convert_discharge(
                                    data, parameter, unit_of_measurement)
                            except Exception as e:
                                logger.error(str(e))
                                data = None
                            # What do do with bad values?

                            result_points.append(
                                TimeValuePair(timestamp=value['dateTime'],
                                              value=data))

                        except Exception as e:
                            logger.error(e)

            timeseries_result_quality = query.result_quality
            if not timeseries_result_quality:
                if ResultQualityEnum.CHECKED in result_qualifiers:
                    timeseries_result_quality = ResultQualityEnum.CHECKED
                if ResultQualityEnum.UNCHECKED in result_qualifiers:
                    timeseries_result_quality = ResultQualityEnum.UNCHECKED
                if (ResultQualityEnum.PARTIALLY_CHECKED in result_qualifiers
                        and ResultQualityEnum.CHECKED in result_qualifiers):
                    timeseries_result_quality = ResultQualityEnum.PARTIALLY_CHECKED

            measurement_timeseries_tvp_observation = MeasurementTimeseriesTVPObservation(
                self,
                id=
                feature_id,  # FYI: this field is not unique and thus kinda useless
                unit_of_measurement=unit_of_measurement,
                feature_of_interest_type=FeatureTypeEnum.POINT,
                feature_of_interest=monitoring_feature,
                utc_offset=int(timezone_offset.split(":")[0]),
                result_points=result_points,
                observed_property_variable=parameter,
                result_quality=timeseries_result_quality,
                aggregation_duration=query.aggregation_duration,
                time_reference_position=TimeMetadataMixin.
                TIME_REFERENCE_MIDDLE,
                statistic=basin3d_statistic)

            yield measurement_timeseries_tvp_observation
Пример #7
0
    def list(self, query: QueryMonitoringFeature):
        """
        Get the Regions

        =================== === ===================
        USGS NWIS               Broker
        =================== === ===================
        ``new_huc_rdb.txt``  >> ``MonitoringFeature/``
        =================== === ===================

        :param query: The query information object
        :returns: a generator object that yields :class:`~basin3d.synthesis.models.field.MonitoringFeature`
            objects
        """

        feature_type = isinstance(
            query.feature_type,
            FeatureTypeEnum) and query.feature_type.value or query.feature_type
        if feature_type in USGSDataSourcePlugin.feature_types or feature_type is None:

            # Convert parent_features
            usgs_regions = []
            usgs_subbasins = []
            parent_features = []
            if query.parent_features:
                for value in query.parent_features:
                    parent_features.append(value)
                    if len(value) < 4:
                        usgs_regions.append(value)
                    elif len(value) == 8:
                        usgs_subbasins.append(value)

            if not feature_type or feature_type != FeatureTypeEnum.POINT:

                huc_text = self.get_hydrological_unit_codes()
                logging.debug("{}.{}".format(self.__class__.__name__, "list"),
                              url=URL_USGS_HUC)

                for json_obj in [
                        o for o in iter_rdb_to_json(huc_text)
                        if not parent_features or
                    [p for p in parent_features if o["huc"].startswith(p)]
                ]:

                    monitoring_feature = None
                    if (feature_type is None
                            or feature_type == FeatureTypeEnum.REGION) and len(
                                json_obj["huc"]) < 4:
                        monitoring_feature = self._load_huc_obj(
                            json_obj, feature_type=FeatureTypeEnum.REGION)

                    elif (feature_type is None or feature_type
                          == FeatureTypeEnum.SUBREGION) and len(
                              json_obj["huc"]) == 4:
                        monitoring_feature = self._load_huc_obj(
                            json_obj,
                            feature_type=FeatureTypeEnum.SUBREGION,
                            related_sampling_feature=json_obj["huc"][0:2],
                            related_sampling_feature_type=FeatureTypeEnum.
                            REGION)

                    elif (feature_type is None
                          or feature_type == FeatureTypeEnum.BASIN) and len(
                              json_obj["huc"]) == 6:
                        monitoring_feature = self._load_huc_obj(
                            json_obj,
                            feature_type=FeatureTypeEnum.BASIN,
                            related_sampling_feature=json_obj["huc"][0:4],
                            related_sampling_feature_type=FeatureTypeEnum.
                            SUBREGION)

                    elif (feature_type is None
                          or feature_type == FeatureTypeEnum.SUBBASIN) and len(
                              json_obj["huc"]) == 8:
                        hucs = {json_obj["huc"][0:i] for i in range(2, 8, 2)}

                        # Filter by regions if it is set
                        if not usgs_regions or not hucs.isdisjoint(
                                usgs_regions):

                            # This is a Cataloging Unit (See https://water.usgs.gov/GIS/huc_name.html)
                            monitoring_feature = self._load_huc_obj(
                                json_obj=json_obj,
                                feature_type=FeatureTypeEnum.SUBBASIN,
                                description=
                                "{} Watershed: Drainage basin code is defined by the USGS State "
                                "Office where the site is located.".format(
                                    json_obj["basin"]),
                                related_sampling_feature=json_obj["huc"][0:6],
                                related_sampling_feature_type=FeatureTypeEnum.
                                BASIN)

                    else:
                        logger.debug("Ignoring HUC {}".format(json_obj["huc"]))

                    # Determine whether to yield the monitoring feature object
                    if monitoring_feature:
                        if query.monitoring_features and json_obj[
                                'huc'] in query.monitoring_features:
                            yield monitoring_feature
                        elif not query.monitoring_features:
                            yield monitoring_feature

            # points: USGS calls these sites
            else:
                if query.monitoring_features:
                    usgs_sites = ",".join(query.monitoring_features)
                    feature_observed_properties = self.get_observed_properties_variables(
                        query.monitoring_features)
                else:
                    # Get the variables with data
                    feature_observed_properties = self.get_observed_properties_variables(
                        usgs_subbasins)
                    usgs_sites = ",".join(feature_observed_properties.keys())

                # Filter by locations with data
                url = '{}site/?sites={}'.format(self.datasource.location,
                                                usgs_sites)
                usgs_site_response = get_url(url)
                logging.debug("{}.{}".format(self.__class__.__name__, "list"),
                              url=url)

                if usgs_site_response and usgs_site_response.status_code == 200:

                    for v in iter_rdb_to_json(usgs_site_response.text):
                        yield _load_point_obj(datasource=self,
                                              json_obj=v,
                                              feature_observed_properties=
                                              feature_observed_properties)

        else:
            logger.warning(
                f"Feature type {feature_type} not supported by {self.datasource.name}."
            )