class MarineWeatherRealtimeLoader(BaseLoader):
    """Marine weather real-time loader"""
    def __init__(self, conn_config={}):
        """initializer"""

        BaseLoader.__init__(self)

        self.conn = ElasticsearchConnector(conn_config)
        self.filepath = None
        self.region_name_code = None
        self.language = None
        self.root = None
        self.area = {}
        self.items = []

        # create marine weather indices if it don't exist
        for item in MAPPINGS:
            SETTINGS['mappings']['properties']['properties'][
                'properties'] = MAPPINGS[item]
            self.conn.create(INDEX_NAME.format(item), SETTINGS)

    def parse_filename(self):
        """
        Parses a marine weather forecast XML filename to get the
        region name code and language.
        :return: `bool` of parse status
        """
        # parse filepath
        pattern = '{region_name_code}_{language}.xml'
        filename = self.filepath.name
        parsed_filename = parse(pattern, filename)

        # set class variables
        self.region_name_code = parsed_filename.named['region_name_code']
        self.language = parsed_filename.named['language']

        return True

    def create_datetime_dict(self, datetime_elems):
        """
        Used to pass a pair of timeStamp elements from the XML. These elements
        contain the UTC and local time for various marine forecast
        sections (warnings, regular forecasts, extended forecasts). The
        first element contains UTC datetime info and the second local datetime
        info.
        :param datetime_elems: list of lmxl `Element` objects representing the
        dateTime nodes to parse.
        :returns: `dict` with "utc" and "local" keys containing respective
        parsed datetime objects.
        """
        datetime_utc = datetime.strptime(
            datetime_elems[0].find('timeStamp').text, '%Y%m%d%H%M')
        local_offset = float(datetime_elems[1].attrib['UTCOffset'])

        datetime_local = datetime_utc + timedelta(hours=local_offset)
        datetime_local = datetime_local.replace(
            tzinfo=timezone(timedelta(hours=local_offset)))

        return {'utc': datetime_utc, 'local': datetime_local}

    def set_area_info(self):
        """
        Gets the area name from the marine weather XML document and
        looks up the equivalent meteocode forecast polygon feature ID to
        query the forecast_polygons_water ES index for the corresponding
        document. If document is found, assigns the self.area class attribute
        that contains region name, subregion name, area name and the
        associated geometry.
        :return: `bool` representing successful setting of self.area attribute
        """
        area_name = self.root.find('area').text

        with open(
                os.path.join(
                    MSC_PYGEOAPI_BASEPATH,
                    'lib/msc_pygeoapi/',
                    'resources/meteocode_lookup.json',
                )) as json_file:
            meteocode_lookup = json.load(json_file)
            forecast_id = meteocode_lookup[self.region_name_code]

        try:
            result = self.conn.Elasticsearch.get(
                index='forecast_polygons_water_detail',
                id=forecast_id,
                _source=['geometry'],
            )
            self.area = {
                # get area element value
                **{
                    'name': area_name
                },
                # get area element attribute values
                **{
                    key: self.root.find('area').attrib[key]
                    for key in ['countryCode', 'region', 'subRegion']
                },
                **result['_source'],
            }

            return True

        except exceptions.NotFoundError:
            LOGGER.warning("Could not get forecast polygon document with id: "
                           "{}".format(forecast_id))

    def generate_warnings(self):
        """
        Generates and yields a series of marine weather warnings
        for a given marine weather area. Warnings are returned
        as Elasticsearch bulk API upsert actions, with a single
        document for the marine weather region in GeoJSON to match the
        Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the marine
                  weather warnings.
        """
        warnings = self.root.findall('warnings/')

        feature = {'type': 'Feature', 'geometry': {}, 'properties': {}}

        feature['geometry'] = self.area['geometry']

        feature['properties']['area_{}'.format(
            self.language)] = self.area['name']
        feature['properties']['region_{}'.format(
            self.language)] = self.area['region']
        feature['properties']['sub_region_{}'.format(
            self.language)] = self.area['subRegion']
        feature['properties']['warnings_{}'.format(self.language)] = []

        if len(warnings) > 0:
            for elem in warnings:
                datetimes = self.create_datetime_dict(
                    elem.findall('event/'
                                 'dateTime'))
                location = {
                    'location_{}'.format(self.language):
                    elem.attrib['name'],
                    'issued_datetime_utc_{}'.format(self.language):
                    strftime_rfc3339(datetimes['utc']),
                    'issued_datetime_local_{}'.format(self.language):
                    strftime_rfc3339(datetimes['local']),
                    'event_type_{}'.format(self.language):
                    elem.find('event').attrib['type'],
                    'event_category_{}'.format(self.language):
                    elem.find('event').attrib['category'],
                    'event_name_{}'.format(self.language):
                    elem.find('event').attrib['name'],
                    'event_status_{}'.format(self.language):
                    elem.find('event').attrib['status'],
                }
                feature['properties']['warnings_{}'.format(
                    self.language)].append(location)

        self.items.append(feature)

        action = {
            '_id': self.filepath.stem.split('_')[0],
            '_index': 'marine_weather_warnings',
            '_op_type': 'update',
            'doc': feature,
            'doc_as_upsert': True,
        }

        yield action

    def generate_regular_forecasts(self):
        """
        Generates and yields a series of marine weather regular forecasts
        for a given marine weather area. Each regular forecast is returned
        as Elasticsearch bulk API upsert actions, with documents in GeoJSON to
        match the Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the marine
                  weather regular forecast.
        """
        regular_forecasts = self.root.findall('regularForecast/')
        feature = {'type': 'Feature', 'geometry': {}, 'properties': {}}

        feature['geometry'] = self.area['geometry']
        feature['properties']['area_{}'.format(
            self.language)] = self.area['name']
        feature['properties']['region_{}'.format(
            self.language)] = self.area['region']
        feature['properties']['sub_region_{}'.format(
            self.language)] = self.area['subRegion']
        feature['properties']['forecasts_{}'.format(self.language)] = []

        if len(regular_forecasts) > 0:
            datetimes = self.create_datetime_dict([
                element for element in regular_forecasts
                if element.tag == 'dateTime'
            ])
            feature['properties']['issued_datetime_utc'] = strftime_rfc3339(
                datetimes['utc'])
            feature['properties']['issued_datetime_local'] = strftime_rfc3339(
                datetimes['local'])

            locations = [
                element for element in regular_forecasts
                if element.tag == 'location'
            ]
            for location in locations:
                location = {
                    'location_{}'.format(self.language):
                    location.attrib['name']
                    if 'name' in location.attrib else self.area['name'],
                    'period_of_coverage_{}'.format(self.language):
                    location.find('weatherCondition/periodOfCoverage').text
                    if location.find('weatherCondition/periodOfCoverage')
                    is not None else None,
                    'wind_{}'.format(self.language):
                    location.find('weatherCondition/wind').text
                    if location.find('weatherCondition/wind') is not None else
                    None,
                    'weather_visibility_{}'.format(self.language):
                    location.find('weatherCondition/weatherVisibility').text
                    if location.find('weatherCondition/weatherVisibility')
                    is not None else None,
                    'air_temperature_{}'.format(self.language):
                    location.find('weatherCondition/airTemperature').text
                    if location.find('weatherCondition/airTemperature')
                    is not None else None,
                    'freezing_spray_{}'.format(self.language):
                    location.find('weatherCondition/freezingSpray').text
                    if location.find('weatherCondition/freezingSpray')
                    is not None else None,
                    'status_statement_{}'.format(self.language):
                    location.find('statusStatement').text
                    if location.find('statusStatement') is not None else None,
                }
                feature['properties']['forecasts_{}'.format(
                    self.language)].append(location)

        self.items.append(feature)

        action = {
            '_id': self.filepath.stem.split('_')[0],
            '_index': 'marine_weather_regular-forecasts',
            '_op_type': 'update',
            'doc': feature,
            'doc_as_upsert': True,
        }

        yield action

    def generate_extended_forecasts(self):
        """
        Generates and yields a series of marine weather extended forecasts
        for a given marine weather area. Each extended forecast is returned
        as Elasticsearch bulk API upsert actions, with documents in GeoJSON to
        match the Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the marine
                  weather extended forecast.
        """
        extended_forecasts = self.root.findall('extendedForecast/')
        feature = {'type': 'Feature', 'geometry': {}, 'properties': {}}

        feature['geometry'] = self.area['geometry']
        feature['properties']['area_{}'.format(
            self.language)] = self.area['name']
        feature['properties']['region_{}'.format(
            self.language)] = self.area['region']
        feature['properties']['sub_region_{}'.format(
            self.language)] = self.area['subRegion']
        feature['properties']['extended_forecasts_{}'.format(
            self.language)] = []

        if len(extended_forecasts) > 0:
            datetimes = self.create_datetime_dict([
                element for element in extended_forecasts
                if element.tag == 'dateTime'
            ])
            feature['properties']['issued_datetime_utc'] = strftime_rfc3339(
                datetimes['utc'])
            feature['properties']['issued_datetime_local'] = strftime_rfc3339(
                datetimes['local'])

            locations = [
                element for element in extended_forecasts
                if element.tag == 'location'
            ]
            for location in locations:
                location = {
                    'location_{}'.format(self.language):
                    location.attrib['name']
                    if 'name' in location.attrib else self.area['name'],
                    'forecast_periods_{}'.format(self.language):
                    [{
                        'forecast_period_{}'.format(self.language):
                        forecast_period.attrib['name'],
                        'forecast_{}'.format(self.language):
                        forecast_period.text,
                    } for forecast_period in location.findall(
                        'weatherCondition/')
                     if location.findall('weatherCondition/') is not None],
                    'status_statement_{}'.format(self.language):
                    location.find('statusStatement').text
                    if location.find('statusStatement') is not None else None,
                }
                feature['properties']['extended_forecasts_{}'.format(
                    self.language)].append(location)

        self.items.append(feature)

        action = {
            '_id': self.filepath.stem.split('_')[0],
            '_index': 'marine_weather_extended-forecasts',
            '_op_type': 'update',
            'doc': feature,
            'doc_as_upsert': True,
        }

        yield action

    def load_data(self, filepath):
        """
        loads data from event to target
        :returns: `bool` of status result
        """

        self.filepath = Path(filepath)
        self.parse_filename()

        LOGGER.debug('Received file {}'.format(self.filepath))

        self.root = etree.parse(str(self.filepath.resolve())).getroot()

        # set area info for both languages from XML
        self.set_area_info()

        warnings = self.generate_warnings()
        regular_forecasts = self.generate_regular_forecasts()
        extended_forecasts = self.generate_extended_forecasts()

        for package in [warnings, regular_forecasts, extended_forecasts]:
            self.conn.submit_elastic_package(package, request_size=80000)
        return True
class CapAlertsRealtimeLoader(BaseLoader):
    """Cap Alerts real-time loader"""

    def __init__(self, conn_config={}):
        """initializer"""

        BaseLoader.__init__(self)

        self.conn = ElasticsearchConnector(conn_config)
        self.conn.create(INDEX_NAME, mapping=SETTINGS)
        self.references_arr = []

    def load_data(self, filepath):
        """
        fonction from base to load the data in ES

        :param filepath: filepath for parsing the current condition file

        :returns: True/False
        """

        data = self.weather_warning2geojson(filepath)

        try:
            self.bulk_data = []
            for doc in data:
                op_dict = {
                    'index': {
                        '_index': INDEX_NAME,
                        '_type': '_doc'
                    }
                }
                op_dict['index']['_id'] = doc['properties']['identifier']
                self.bulk_data.append(op_dict)
                self.bulk_data.append(doc)
            r = self.conn.Elasticsearch.bulk(
                index=INDEX_NAME, body=self.bulk_data
            )

            LOGGER.debug('Result: {}'.format(r))

            previous_alerts = self.delete_references_alerts()

            click.echo('done importing in ES')

            if previous_alerts:
                LOGGER.debug('Deleted old warning')
            else:
                LOGGER.debug('New warning, no deletion')
            return True

        except Exception as err:
            LOGGER.warning('Error bulk indexing: {}'.format(err))
            return False

    def delete_references_alerts(self):
        """Delete old alerts documents"""

        if self.references_arr and len(self.references_arr) != 0:

            click.echo('Deleting old alerts')

            query = {
                'query': {
                    'terms': {
                        'properties.reference': self.references_arr
                    }
                }
            }

            self.conn.Elasticsearch.delete_by_query(
                index=INDEX_NAME, body=query
            )

            return True

        else:
            return False

    def weather_warning2geojson(self, filepath):
        """
        Create GeoJSON that will be use to display weather alerts

        :param filepath: filepath to the cap-xml file

        :returns: xml as json object
        """

        # we must define the variable that we'll need
        now = datetime.utcnow()

        french_alert = {}
        english_alert = {}
        english_alert_remove = []

        timeformat = '%Y-%m-%dT%H:%M:%SZ'

        # we want to run a loop on every cap-xml in filepath and add them
        # in the geojson
        # we want to strat by the newest file in the directory
        LOGGER.info('Processing {} CAP documents'.format(len(filepath)))

        LOGGER.debug('Processing {}'.format(filepath))
        # with the lxml library we parse the xml file
        try:
            tree = etree.parse(filepath)
        except Exception as err:
            LOGGER.warning('Cannot parse {}: {}'.format(filepath, err))

        url = 'https://dd.weather.gc.ca/alerts/{}'.\
            format(filepath.split('alerts')[1])

        root = tree.getroot()

        b_xml = '{urn:oasis:names:tc:emergency:cap:1.2}'

        identifier = _get_element(root,
                                  '{}identifier'.format(b_xml))

        references = _get_element(root,
                                  '{}references'.format(b_xml))

        if references:
            for ref in references.split(' '):
                self.references_arr.append(ref.split(',')[1])

        for grandchild in root.iter('{}info'.format(b_xml)):
            expires = _get_date_format(_get_element(grandchild,
                                       '{}expires'.format(b_xml)))\
                      .strftime(timeformat)

            status_alert = _get_element(grandchild,
                                        '{}parameter[last()-4]/'
                                        '{}value'.format(b_xml,
                                                         b_xml))

            if _get_date_format(expires) > now:
                language = _get_element(grandchild,
                                        '{}language'.format(b_xml))
                if language == 'fr-CA':
                    headline = _get_element(grandchild,
                                            '{}headline'.format(b_xml))

                    description_fr = '{}description'.format(b_xml)
                    descript = _get_element(grandchild, description_fr)
                    descript = descript.replace("\n", " ").strip()

                    for i in grandchild.iter('{}area'.format(b_xml)):
                        tag = _get_element(i,
                                           '{}polygon'.format(b_xml))
                        name = _get_element(i,
                                            '{}areaDesc'.format(b_xml))

                        for j in grandchild.iter('{}geocode'.format(b_xml)):
                            str_value_name = '{}valueName'.format(b_xml)
                            valueName = _get_element(j, str_value_name)

                            if valueName == 'layer:EC-MSC-SMC:1.0:CLC':
                                geocode_value = '{}value'.format(b_xml)
                                geocode = _get_element(j, geocode_value)

                        id_warning = '{}_{}'.format(identifier, geocode)

                        if id_warning not in french_alert:
                            french_alert[id_warning] = (id_warning,
                                                        name,
                                                        headline,
                                                        descript)
                else:
                    headline = _get_element(grandchild,
                                            '{}headline'.format(b_xml))

                    description = '{}description'.format(b_xml)
                    descript = _get_element(grandchild, description)
                    descript = descript.replace("\n", " ").strip()

                    effective_date =\
                        _get_element(grandchild,
                                     '{}effective'.format(b_xml))
                    effective = _get_date_format(effective_date)
                    effective = effective.strftime(timeformat)

                    warning = _get_element(grandchild,
                                           '{}parameter[1]/'
                                           '{}value'.format(b_xml,
                                                            b_xml))

                    # There can be many <area> cobvered by one
                    #  <info> so we have to loop through the info
                    for i in grandchild.iter('{}area'.format(b_xml)):
                        tag = _get_element(i, '{}polygon'.format(b_xml))
                        name = _get_element(i, '{}areaDesc'.format(b_xml))

                        for j in grandchild.iter('{}geocode'.format(b_xml)):
                            valueName = \
                                _get_element(j, '{}valueName'.format(b_xml))
                            if valueName == 'layer:EC-MSC-SMC:1.0:CLC':
                                geocode = \
                                    _get_element(j, '{}value'.format(b_xml))

                        split_tag = re.split(' |,', tag)

                        id_warning = '{}_{}'.format(identifier, geocode)

                        if id_warning not in english_alert:
                            english_alert[id_warning] = (split_tag,
                                                         name,
                                                         headline,
                                                         effective,
                                                         expires,
                                                         warning,
                                                         status_alert,
                                                         id_warning,
                                                         descript,
                                                         url)

        LOGGER.info('Done processing')
        for j in english_alert:
            if _get_date_format(english_alert[j][4]) < now:
                english_alert_remove.append(j)
                # We can't remove a element of a dictionary while looping in it
                # So we remove the warning in another step
        for key in english_alert_remove:
            del english_alert[key]
            del french_alert[key]

        # To keep going we want to have the same number of warning
        # in english and in french
        if len(french_alert) == len(english_alert):
            LOGGER.info('Creating %d features', len(english_alert))

            data = []
            for num_poly in english_alert:
                poly = []
                for el in list(reversed(range(0,
                                              len(english_alert[num_poly][0]),
                                              2))):
                    if len(english_alert[num_poly][0]) > 1:
                        poly.append([float(english_alert[num_poly][0][el + 1]),
                                     float(english_alert[num_poly][0][el]),
                                     0.0])

                # for temporary care of the duplicate neighbors coordinate
                # poly = [k for k, g in groupby(poly)]
                no_dup_poly = []
                for k in poly:
                    if k not in no_dup_poly:
                        no_dup_poly.append(k)
                no_dup_poly.append(poly[-1])

                id_ = english_alert[num_poly][7]

                AlertLocation = {
                    'type': "Feature",
                    'properties': {
                        'identifier': id_,
                        'area': english_alert[num_poly][1],
                        'reference': identifier,
                        'zone': french_alert[num_poly][1],
                        'headline': english_alert[num_poly][2],
                        'titre': french_alert[num_poly][2],
                        'descrip_en': english_alert[num_poly][8],
                        'descrip_fr': french_alert[num_poly][3],
                        'effective': english_alert[num_poly][3],
                        'expires': english_alert[num_poly][4],
                        'alert_type': english_alert[num_poly][5],
                        'status': english_alert[num_poly][6],
                        'references': self.references_arr,
                        'url': english_alert[num_poly][9]
                    },
                    'geometry': {
                        'type': "Polygon",
                        'coordinates': [no_dup_poly]
                    }
                }

                data.append(AlertLocation)

        return data
示例#3
0
class ForecastPolygonsLoader(BaseLoader):
    """Forecast polygons (land/water) loader"""
    def __init__(self, conn_config={}):
        """initializer"""

        BaseLoader.__init__(self)

        self.conn = ElasticsearchConnector(conn_config)
        self.filepath = None
        self.version = None
        self.zone = None
        self.items = []

        # create forecast polygon indices if they don't exist
        for index in INDICES:
            zone = index.split('_')[2]
            SETTINGS['mappings']['properties']['properties'][
                'properties'] = FILE_PROPERTIES[zone]
            self.conn.create(index, SETTINGS)

    def parse_filename(self):
        """
        Parses a meteocode filename in order to get the version,
        zone (land/water) and type (proj, unproj, kmz, etc.)
        :return: `bool` of parse status
        """
        # parse filepath
        pattern = 'MSC_Geography_Pkg_V{version:w}_{zone}_{type}.zip'
        filename = self.filepath.name
        parsed_filename = parse(pattern, filename)

        # set class variables
        self.version = parsed_filename.named['version'].replace('_', '.')
        self.zone = parsed_filename.named['zone']

        return True

    def generate_geojson_features(self, shapefile_name):
        """
        Generates and yields a series of meteocode geodata features,
        one for each feature in <self.filepath/self.filepath.stem/
        shapefile_name>. Features are returned as Elasticsearch bulk API
        upsert actions, with documents in GeoJSON to match the Elasticsearch
        index mappings.
        :returns: Generator of Elasticsearch actions to upsert the forecast
                  polygons for given shapefile in zip archive
        """
        filepath = str(
            (self.filepath / self.filepath.stem / shapefile_name).resolve())
        data = ogr.Open(r'/vsizip/{}'.format(filepath))
        lyr = data.GetLayer()

        for feature in lyr:
            feature_json = feature.ExportToJson(as_object=True,
                                                options=['RFC7946=YES'])
            feature_json['properties']['version'] = self.version

            _id = feature_json['properties']['FEATURE_ID']

            self.items.append(feature_json)

            action = {
                '_id':
                '{}'.format(_id),
                '_index':
                INDEX_NAME.format(self.zone.lower(),
                                  shapefile_name.split('_')[2]),
                '_op_type':
                'update',
                'doc':
                feature_json,
                'doc_as_upsert':
                True
            }

            yield action

    def load_data(self, filepath):
        """
        loads data from event to target
        :returns: `bool` of status result
        """

        self.filepath = Path(filepath)

        # set class variables from filename
        self.parse_filename()
        LOGGER.debug('Received file {}'.format(self.filepath))

        for shapefile in SHAPEFILES_TO_LOAD[self.filepath.stem]:
            # generate geojson features
            package = self.generate_geojson_features(shapefile)
            self.conn.submit_elastic_package(package, request_size=80000)

        return True
示例#4
0
class CitypageweatherRealtimeLoader(BaseLoader):
    """Current conditions real-time loader"""
    def __init__(self, conn_config={}):
        """initializer"""

        BaseLoader.__init__(self)

        self.conn = ElasticsearchConnector(conn_config)
        self.conn.create(INDEX_NAME, mapping=SETTINGS)

    def load_data(self, filepath):
        """
        fonction from base to load the data in ES

        :param filepath: filepath for parsing the current condition file

        :returns: True/False
        """

        with open(
                os.path.join(MSC_PYGEOAPI_BASEPATH,
                             'resources/wxo_lookup.json')) as json_file:
            wxo_lookup = json.load(json_file)

        data = self.xml2json_cpw(wxo_lookup, filepath)

        try:
            r = self.conn.Elasticsearch.index(
                index=INDEX_NAME,
                id=data['properties']['identifier'],
                body=data)
            LOGGER.debug('Result: {}'.format(r))
            return True
        except Exception as err:
            LOGGER.warning('Error indexing: {}'.format(err))
            return False

    def _get_element(self, node, path, attrib=None):
        """
        Convenience function to resolve lxml.etree.Element handling

        :param node: xml node
        :param path: path in the xml node
        :param attrib: attribute to get in the node

        returns: attribute as text or None
        """

        val = node.find(path)
        if attrib is not None and val is not None:
            return val.attrib.get(attrib)
        if hasattr(val, 'text') and val.text not in [None, '']:
            return val.text
        return None

    def if_none(self, type_, value):
        """
        Convenience fonction to avoid errors when
        converting to int or float

        :param type_: f for float and i for int
        :param value: value to convert to float/int

        :returns: converted variable
        """

        if type_ == 'f':
            variable = float(value) if value else 'null'
        elif type_ == 'i':
            variable = int(value) if value else 'null'

        return variable

    def xml2json_cpw(self, wxo_lookup, xml):
        """
        main for generating weather data

        :param wxo_lookup: json file to have the city id
        :param xml: xml file to parse and convert to json

        :returns: xml as json object
        """

        feature = {}
        row = {}

        LOGGER.debug('Processing XML: {}'.format(xml))
        LOGGER.debug('Fetching English elements')

        try:
            root = etree.parse(xml).getroot()
        except Exception as err:
            LOGGER.error('ERROR: cannot process data: {}'.format(err))

        if root.findall("currentConditions/"):
            sitecode = os.path.basename(xml)[:-6]
            try:
                citycode = wxo_lookup[sitecode]['citycode']
            except KeyError as err:
                LOGGER.error('ERROR: cannot find sitecode {} : '
                             'err: {}'.format(sitecode, err))

            location_name = root.find('location/name')
            x = float(location_name.attrib.get('lon')[:-1])
            y = float(location_name.attrib.get('lat')[:-1])

            if location_name.attrib.get('lat')[-1] == 'S':
                y *= -1  # south means negative latitude
            elif location_name.attrib.get('lon')[-1] in ['W', 'O']:
                x *= -1  # west means negative longitude

            feature['geom'] = [x, y, 0.0]
            icon = self._get_element(root, 'currentConditions/iconCode')

            if icon:
                row['icon'] = 'https://weather.gc.ca/' \
                              'weathericons/{}.gif'.format(icon)
            else:
                row['icon'] = None

            for dates in root.findall("currentConditions/dateTime"
                                      "[@zone='UTC'][@name='observation']"):
                timestamp = dates.find('timeStamp')
                if timestamp is not None:
                    dt2 = datetime.strptime(timestamp.text, '%Y%m%d%H%M%S')
                    row['timestamp'] = dt2.strftime('%Y-%m-%dT%H:%M:%SZ')

            row['rel_hum'] = self._get_element(
                root, 'currentConditions/relativeHumidity')
            row['speed'] = self._get_element(root,
                                             'currentConditions/wind/speed')
            row['gust'] = self._get_element(root,
                                            'currentConditions/wind/gust')
            row['direction'] = self._get_element(
                root, 'currentConditions/wind/direction')
            row['bearing'] = self._get_element(
                root, 'currentConditions/wind/bearing')
            row['temp'] = self._get_element(root,
                                            'currentConditions/temperature')
            row['dewpoint'] = self._get_element(root,
                                                'currentConditions/dewpoint')
            row['windchill'] = self._get_element(
                root, 'currentConditions/windChill')

            if xml.endswith('e.xml'):
                row['name'] = self._get_element(root, 'location/name')
                row['station_en'] = self._get_element(
                    root, 'currentConditions/station')
                row['cond_en'] = self._get_element(
                    root, 'currentConditions/condition')
                row['pres_en'] = self._get_element(
                    root, 'currentConditions/pressure')
                row['prestnd_en'] = self._get_element(
                    root, 'currentConditions/pressure', 'tendency')
                row['url_en'] = 'https://weather.gc.ca/city/pages/' \
                                '{}_metric_e.html'.format(citycode)

                row['national'] = 0
                if row['name'] in NATIONAL_CITIES:
                    row['national'] = 1

                LOGGER.debug('Adding feature')
                LOGGER.debug('Setting geometry')

                conditions = {
                    'type': "Feature",
                    'properties': {
                        'identifier': citycode,
                        'name': row['name'],
                        'station_en': row['station_en'],
                        'icon': row['icon'],
                        'cond_en': row['cond_en'],
                        'temp': self.if_none('f', row['temp']),
                        'dewpoint': self.if_none('f', row['dewpoint']),
                        'windchill': self.if_none('i', row['windchill']),
                        'pres_en': self.if_none('f', row['pres_en']),
                        'prestnd_en': row['prestnd_en'],
                        'rel_hum': self.if_none('i', row['rel_hum']),
                        'speed': self.if_none('i', row['speed']),
                        'gust': self.if_none('i', row['gust']),
                        'direction': row['direction'],
                        'bearing': self.if_none('f', row['bearing']),
                        'timestamp': row['timestamp'],
                        'url_en': row['url_en'],
                        'national': int(row['national'])
                    },
                    'geometry': {
                        'type': "Point",
                        'coordinates': feature['geom']
                    }
                }

            elif xml.endswith('f.xml'):
                LOGGER.debug('Processing {}'.format(xml))

                row['nom'] = self._get_element(root, 'location/name')
                row['station_fr'] = self._get_element(
                    root, 'currentConditions/station')
                row['cond_fr'] = self._get_element(
                    root, 'currentConditions/condition')
                row['pres_fr'] = self._get_element(
                    root, 'currentConditions/pressure')
                row['prestnd_fr'] = self._get_element(
                    root, 'currentConditions/pressure', 'tendency')
                row['url_fr'] = 'https://meteo.gc.ca/city/pages/' \
                                '{}_metric_f.html'.format(citycode)

                row['national'] = 0
                if row['nom'] in NATIONAL_CITIES:
                    row['national'] = 1

                LOGGER.debug('Adding feature')
                LOGGER.debug('Setting geometry')

                conditions = {
                    'type': "Feature",
                    'properties': {
                        'identifier': citycode,
                        'nom': row['nom'],
                        'station_fr': row['station_fr'],
                        'icon': row['icon'],
                        'cond_fr': row['cond_fr'],
                        'temp': self.if_none('f', row['temp']),
                        'dewpoint': self.if_none('f', row['dewpoint']),
                        'windchill': self.if_none('i', row['windchill']),
                        'pres_fr': self.if_none('f', row['pres_fr']),
                        'prestnd_fr': row['prestnd_fr'],
                        'rel_hum': self.if_none('i', row['rel_hum']),
                        'speed': self.if_none('i', row['speed']),
                        'gust': self.if_none('i', row['gust']),
                        'direction': row['direction'],
                        'bearing': self.if_none('f', row['bearing']),
                        'timestamp': row['timestamp'],
                        'url_fr': row['url_fr'],
                        'national': int(row['national'])
                    },
                    'geometry': {
                        'type': "Point",
                        'coordinates': feature['geom']
                    }
                }

            conditions['properties'] = {
                key: val
                for key, val in conditions['properties'].items()
                if val != 'null'
            }  # noqa
            return conditions
示例#5
0
class HurricanesRealtimeLoader(BaseLoader):
    """Hurricanes Real-time loader"""

    def __init__(self, conn_config={}):
        """initializer"""

        BaseLoader.__init__(self)

        self.conn = ElasticsearchConnector(conn_config)
        self.filepath = None
        self.date_ = None
        self.fh = None
        self.storm_name = None
        self.storm_variable = None
        self.items = []

        # create storm variable indices if it don't exist
        for item in FILE_PROPERTIES:
            SETTINGS['mappings']['properties']['properties'][
                'properties'
            ] = FILE_PROPERTIES[item]
            self.conn.create(INDEX_NAME.format(item), SETTINGS)

    def parse_filename(self):
        """
        Parses a hurricane filename in order to get the date, forecast issued
        time, storm name, and  storm variable.
        :return: `bool` of parse status
        """
        # parse filepath
        pattern = '{date_}_{fh}_{storm_name}.{storm_variable}.' \
                  '{file_extension}'
        filename = self.filepath.name
        parsed_filename = parse(pattern, filename)

        # set class variables
        self.date_ = datetime.strptime(parsed_filename.named['date_'],
                                       '%Y%m%d')
        self.fh = parsed_filename.named['fh']
        self.storm_name = parsed_filename.named['storm_name']
        self.storm_variable = parsed_filename.named['storm_variable']

        return True

    def check_shapefile_deps(self):
        """
        Check that all shapefile dependencies are available
        :return: `bool` of check result
        """
        dependencies = ['.shp', '.shx', '.dbf', '.prj']
        return all([self.filepath.with_suffix(suffix).exists() for
                    suffix in dependencies])

    # TODO: Remove once upstream data is patched
    @staticmethod
    def clean_consecutive_coordinates(coordinates):
        """
        Temporary fix for issues with upstream data.
        Removes consecutive coordinate points from GeoJSON coordinates
        :param coordinates: list of GeoJSON coordinates
        :return:
        """
        return [[k for k, g in groupby(coordinate)] for
                coordinate in coordinates]

    def deactivate_old_forecasts(self):
        """
        Deactivates previously added forecasts for a specific storm name.
        :return: `bool` of deactivation status
        """
        query = {
            "script": "ctx._source.properties.active=false",
            "query": {
                "bool": {
                    "must": [
                        {"match": {"properties.STORMNAME": self.storm_name}},
                        {"match": {"properties.active": True}},
                    ]
                }
            }
        }

        try:
            self.conn.Elasticsearch.update_by_query(index=INDEX_NAME.format(
                self.storm_variable), body=query)
        except ConflictError:
            LOGGER.warning("Conflict error detected. Refreshing index and "
                           "retrying update by query.")
            self.conn.Elasticsearch.indices.refresh(index=INDEX_NAME.format(
                self.storm_variable))
            self.conn.Elasticsearch.update_by_query(index=INDEX_NAME.format(
                self.storm_variable), body=query)

        return True

    def generate_geojson_features(self):
        """
        Generates and yields a series of storm forecasts,
        one for each feature in <self.filepath>. Observations are returned as
        Elasticsearch bulk API upsert actions, with documents in GeoJSON to
        match the Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the storm
                  forecasts
        """
        driver = ogr.GetDriverByName('ESRI Shapefile')
        filepath = str(self.filepath.resolve())
        data = driver.Open(filepath, 0)
        lyr = data.GetLayer(0)
        file_datetime_str = strftime_rfc3339(self.date_)

        for feature in lyr:
            feature_json = feature.ExportToJson(as_object=True)
            feature_json['properties']['active'] = True
            feature_json['properties'][
                'filename'] = self.filepath.stem
            feature_json['properties'][
                'filedate'] = file_datetime_str  # noqa

            # TODO: Remove once upstream data is patched
            # clean rad consecutive coordinates in geometry (temporary fix)
            if self.storm_variable == 'rad':
                feature_json['geometry'][
                    'coordinates'] = self.clean_consecutive_coordinates(
                    feature_json['geometry']['coordinates'])

            # format pts ADVDATE
            if self.storm_variable == 'pts':
                feature_json['properties']['ADVDATE'] = \
                    strftime_rfc3339(
                        datetime.strptime(
                            feature_json['properties']['ADVDATE'],
                            '%y%m%d/%H%M'
                        )
                    )

            self.items.append(feature_json)

            action = {
                '_id': '{}-{}-{}-{}-{}'.format(self.storm_name,
                                               self.storm_variable,
                                               file_datetime_str,
                                               self.fh,
                                               feature_json['id']),
                '_index': INDEX_NAME.format(self.storm_variable),
                '_op_type': 'update',
                'doc': feature_json,
                'doc_as_upsert': True
            }

            yield action

    def load_data(self, filepath):
        """
        loads data from event to target
        :returns: `bool` of status result
        """

        self.filepath = Path(filepath)

        # set class variables from filename
        self.parse_filename()

        LOGGER.debug('Received file {}'.format(self.filepath))

        # check for shapefile dependencies
        if self.check_shapefile_deps():

            # deactivate old forecasts for current storm name
            self.deactivate_old_forecasts()

            # generate geojson features
            package = self.generate_geojson_features()
            self.conn.submit_elastic_package(package, request_size=80000)

            return True

        else:
            LOGGER.debug("All Shapefile dependencies not found. Ignoring "
                         "file...")
            return False
class ClimateArchiveLoader(BaseLoader):
    """Climat Archive Loader"""

    def __init__(self, db_conn_string, conn_config={}):
        """initializer"""

        super().__init__()

        self.conn = ElasticsearchConnector(conn_config)

        # setup DB connection
        try:
            self.db_conn = cx_Oracle.connect(db_conn_string)
        except Exception as err:
            msg = f'Could not connect to Oracle: {err}'
            LOGGER.critical(msg)
            raise click.ClickException(msg)

        self.cur = self.db_conn.cursor()

    def create_index(self, index):
        """
        Creates the Elasticsearch index at path. If the index already exists,
        it is deleted and re-created. The mappings for the two types are also
        created.

        :param index: the index to be created.
        """

        if index == 'stations':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "PROV_STATE_TERR_CODE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STN_ID": {"type": "integer"},
                                "STATION_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "ENG_PROV_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "FRE_PROV_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "COUNTRY": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "LATITUDE": {"type": "integer"},
                                "LONGITUDE": {"type": "integer"},
                                "TIMEZONE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "ELEVATION": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "CLIMATE_IDENTIFIER": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "TC_IDENTIFIER": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "WMO_IDENTIFIER": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STATION_TYPE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "NORMAL_CODE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PUBLICATION_CODE": {"type": "integer"},
                                "DISPLAY_CODE": {"type": "integer"},
                                "ENG_STN_OPERATOR_ACRONYM": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "FRE_STN_OPERATOR_ACRONYM": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "ENG_STN_OPERATOR_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "FRE_STN_OPERATOR_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "HAS_MONTHLY_SUMMARY": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "HAS_NORMALS_DATA": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "DLY_FIRST_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                                "DLY_LAST_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                                "FIRST_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                                "LAST_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            index_name = 'climate_station_information'
            self.conn.create(index_name, mapping, overwrite=True)

        if index == 'normals':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "STN_ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MONTH": {"type": "integer"},
                                "VALUE": {"type": "integer"},
                                "OCCURRENCE_COUNT": {"type": "integer"},
                                "PUBLICATION_CODE": {"type": "integer"},
                                "CLIMATE_IDENTIFIER": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "NORMAL_CODE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "NORMAL_ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PROVINCE_CODE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "E_NORMAL_ELEMENT_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "F_NORMAL_ELEMENT_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PERIOD": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PERIOD_BEGIN": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PERIOD_END": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STATION_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "YEAR_COUNT_NORMAL_PERIOD": {
                                    "type": "integer"
                                },
                                "MAX_DURATION_MISSING_YEARS": {
                                    "type": "integer"
                                },
                                "FIRST_YEAR_NORMAL_PERIOD": {
                                    "type": "integer"
                                },
                                "LAST_YEAR_NORMAL_PERIOD": {"type": "integer"},
                                "FIRST_YEAR": {"type": "integer"},
                                "LAST_YEAR": {"type": "integer"},
                                "TOTAL_OBS_COUNT": {"type": "integer"},
                                "PERCENT_OF_POSSIBLE_OBS": {"type": "integer"},
                                "CURRENT_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "FIRST_OCCURRENCE_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                                "DATE_CALCULATED": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            index_name = 'climate_normals_data'
            self.conn.create(index_name, mapping, overwrite=True)

        if index == 'monthly_summary':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "CLIMATE_IDENTIFIER": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STN_ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STATION_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PROVINCE_CODE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "LATITUDE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "LONGITUDE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MEAN_TEMPERATURE": {"type": "float"},
                                "NORMAL_MEAN_TEMPERATURE": {"type": "float"},
                                "MAX_TEMPERATURE": {"type": "float"},
                                "MIN_TEMPERATURE": {"type": "float"},
                                "TOTAL_SNOWFALL": {"type": "float"},
                                "NORMAL_SNOWFALL": {"type": "float"},
                                "TOTAL_PRECIPITATION": {"type": "float"},
                                "NORMAL_PRECIPITATION": {"type": "float"},
                                "BRIGHT_SUNSHINE": {"type": "float"},
                                "NORMAL_SUNSHINE": {"type": "float"},
                                "SNOW_ON_GROUND_LAST_DAY": {"type": "float"},
                                "DAYS_WITH_VALID_MIN_TEMP": {
                                    "type": "integer"
                                },
                                "DAYS_WITH_VALID_MEAN_TEMP": {
                                    "type": "integer"
                                },
                                "DAYS_WITH_VALID_MAX_TEMP": {
                                    "type": "integer"
                                },
                                "DAYS_WITH_VALID_SNOWFALL": {
                                    "type": "integer"
                                },
                                "DAYS_WITH_VALID_PRECIP": {"type": "integer"},
                                "DAYS_WITH_VALID_SUNSHINE": {
                                    "type": "integer"
                                },
                                "DAYS_WITH_PRECIP_GE_1MM": {"type": "integer"},
                                "HEATING_DEGREE_DAYS": {"type": "integer"},
                                "COOLING_DEGREE_DAYS": {"type": "integer"},
                                "LOCAL_YEAR": {"type": "integer"},
                                "LOCAL_MONTH": {"type": "integer"},
                                "LAST_UPDATED": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                                "LOCAL_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM",
                                },
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            index_name = 'climate_public_climate_summary'
            self.conn.create(index_name, mapping, overwrite=True)

        if index == 'daily_summary':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "CLIMATE_IDENTIFIER": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STN_ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "STATION_NAME": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "SOURCE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "ID": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MAX_TEMPERATURE_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MIN_TEMPERATURE_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MEAN_TEMPERATURE_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "PROVINCE_CODE": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MAX_REL_HUMIDITY_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MIN_REL_HUMIDITY_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "TOTAL_RAIN_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "TOTAL_SNOW_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "TOTAL_PRECIPITATION_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "SNOW_ON_GROUND_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "DIRECTION_MAX_GUST_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "SPEED_MAX_GUST_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "HEATING_DEGREE_DAYS_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "COOLING_DEGREE_DAYS_FLAG": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "MEAN_TEMPERATURE": {"type": "float"},
                                "TOTAL_RAIN": {"type": "float"},
                                "MAX_TEMPERATURE": {"type": "float"},
                                "MIN_TEMPERATURE": {"type": "float"},
                                "MAX_REL_HUMIDITY": {"type": "float"},
                                "MIN_REL_HUMIDITY": {"type": "float"},
                                "TOTAL_SNOW": {"type": "float"},
                                "SNOW_ON_GROUND": {"type": "float"},
                                "TOTAL_PRECIPITATION": {"type": "float"},
                                "DIRECTION_MAX_GUST": {"type": "float"},
                                "SPEED_MAX_GUST": {"type": "float"},
                                "HEATING_DEGREE_DAYS": {"type": "integer"},
                                "COOLING_DEGREE_DAYS": {"type": "integer"},
                                "LOCAL_YEAR": {"type": "integer"},
                                "LOCAL_MONTH": {"type": "integer"},
                                "LOCAL_DAY": {"type": "integer"},
                                "LOCAL_DATE": {
                                    "type": "date",
                                    "format": "yyyy-MM-dd HH:mm:ss",
                                },
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            index_name = 'climate_public_daily_data'
            self.conn.create(index_name, mapping, overwrite=True)

    def generate_stations(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute('select * from CCCS_PORTAL.STATION_INFORMATION')
        except Exception as err:
            LOGGER.error(
                f'Could not fetch records from oracle due to: {str(err)}.'
            )

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))
            for key in insert_dict:
                # This is a quick fix for trailing spaces and should not be
                # here. Data should be fixed on db side.
                try:
                    insert_dict[key] = insert_dict[key].strip()
                except Exception as err:
                    LOGGER.debug(
                        f'Could not strip value {insert_dict[key]} due to '
                        f'{str(err)}, skipping'
                    )

                # Transform Date fields from datetime to string.
                if 'DATE' in key:
                    insert_dict[key] = (
                        str(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            coords = [
                float(insert_dict['LONGITUDE_DECIMAL_DEGREES']),
                float(insert_dict['LATITUDE_DECIMAL_DEGREES']),
            ]
            del insert_dict['LONGITUDE_DECIMAL_DEGREES']
            del insert_dict['LATITUDE_DECIMAL_DEGREES']
            climate_identifier = insert_dict['CLIMATE_IDENTIFIER']
            wrapper = {
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {'type': 'Point', 'coordinates': coords},
            }

            action = {
                '_id': climate_identifier,
                '_index': 'climate_station_information',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }
            yield action

    def generate_normals(self, stn_dict, normals_dict, periods_dict):
        """
        Queries normals data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :param stn_dict: mapping of station IDs to station information.
        :param normals_dict: mapping of normal IDs to normals information.
        :param periods_dict: mapping of normal period IDs to
                            normal period information.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute('select * from CCCS_PORTAL.NORMALS_DATA')
        except Exception as err:
            LOGGER.error(
                f'Could not fetch records from oracle due to: {str(err)}.'
            )

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))

            for key in insert_dict:
                # Transform Date fields from datetime to string.
                if 'DATE' in key:
                    insert_dict[key] = (
                        str(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )
            insert_dict['ID'] = '{}.{}.{}'.format(
                insert_dict['STN_ID'],
                insert_dict['NORMAL_ID'],
                insert_dict['MONTH'],
            )
            if insert_dict['STN_ID'] in stn_dict:
                coords = stn_dict[insert_dict['STN_ID']]['coordinates']
                insert_dict['STATION_NAME'] = stn_dict[insert_dict['STN_ID']][
                    'STATION_NAME'
                ]
                insert_dict['PROVINCE_CODE'] = stn_dict[insert_dict['STN_ID']][
                    'PROVINCE_CODE'
                ]
                insert_dict['E_NORMAL_ELEMENT_NAME'] = normals_dict[
                    insert_dict['NORMAL_ID']
                ]['E_NORMAL_ELEMENT_NAME']
                insert_dict['F_NORMAL_ELEMENT_NAME'] = normals_dict[
                    insert_dict['NORMAL_ID']
                ]['F_NORMAL_ELEMENT_NAME']
                insert_dict['PERIOD'] = normals_dict[insert_dict['NORMAL_ID']][
                    'PERIOD'
                ]
                insert_dict['PERIOD_BEGIN'] = periods_dict[
                    insert_dict['NORMAL_PERIOD_ID']
                ]['PERIOD_BEGIN']
                insert_dict['PERIOD_END'] = periods_dict[
                    insert_dict['NORMAL_PERIOD_ID']
                ]['PERIOD_END']
                insert_dict['CLIMATE_IDENTIFIER'] = stn_dict[
                    insert_dict['STN_ID']
                ]['CLIMATE_IDENTIFIER']

                del insert_dict['NORMAL_PERIOD_ID']
                wrapper = {
                    'type': 'Feature',
                    'properties': insert_dict,
                    'geometry': {'type': 'Point', 'coordinates': coords},
                }
                action = {
                    '_id': insert_dict['ID'],
                    '_index': 'climate_normals_data',
                    '_op_type': 'update',
                    'doc': wrapper,
                    'doc_as_upsert': True,
                }
                yield action
            else:
                LOGGER.error(
                    f"Bad STN ID: {insert_dict['STN_ID']}, skipping"
                    f" records for this station"
                )

    def generate_monthly_data(self, stn_dict, date=None):
        """
        Queries monthly data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :param stn_dict: mapping of station IDs to station information.
        :param date: date to start fetching data from.
        :returns: generator of bulk API upsert actions.
        """

        if not date:
            try:
                self.cur.execute(
                    'select * from CCCS_PORTAL.PUBLIC_CLIMATE_SUMMARY'
                )
            except Exception as err:
                LOGGER.error(
                    f'Could not fetch records from oracle due to: {str(err)}.'
                )
        else:
            try:
                self.cur.execute(
                    (
                        f"select * from CCCS_PORTAL.PUBLIC_CLIMATE_SUMMARY "
                        f"WHERE LAST_UPDATED > TO_TIMESTAMP("
                        f"'{date} 00:00:00', 'YYYY-MM-DD HH24:MI:SS')"
                    )
                )
            except Exception as err:
                LOGGER.error(
                    f'Could not fetch records from oracle due to: {str(err)}.'
                )

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))
            # Transform Date fields from datetime to string.
            insert_dict['LAST_UPDATED'] = (
                str(insert_dict['LAST_UPDATED'])
                if insert_dict['LAST_UPDATED'] is not None
                else insert_dict['LAST_UPDATED']
            )

            insert_dict['ID'] = '{}.{}.{}'.format(
                insert_dict['STN_ID'],
                insert_dict['LOCAL_YEAR'],
                insert_dict['LOCAL_MONTH'],
            )
            if insert_dict['STN_ID'] in stn_dict:
                coords = stn_dict[insert_dict['STN_ID']]['coordinates']
                insert_dict['PROVINCE_CODE'] = stn_dict[insert_dict['STN_ID']][
                    'PROVINCE_CODE'
                ]
                wrapper = {
                    'type': 'Feature',
                    'properties': insert_dict,
                    'geometry': {'type': 'Point', 'coordinates': coords},
                }
                action = {
                    '_id': insert_dict['ID'],
                    '_index': 'climate_public_climate_summary',
                    '_op_type': 'update',
                    'doc': wrapper,
                    'doc_as_upsert': True,
                }
                yield action
            else:
                LOGGER.error(
                    f"Bad STN ID: {insert_dict['STN_ID']}, skipping"
                    f" records for this station"
                )

    def generate_daily_data(self, stn_dict, date=None):
        """
        Queries daily data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :param stn_dict: mapping of station IDs to station information.
        :param date: date to start fetching data from.
        :returns: generator of bulk API upsert actions.
        """

        for station in stn_dict:
            if not date:
                try:
                    self.cur.execute(
                        f'select * from CCCS_PORTAL.PUBLIC_DAILY_DATA '
                        f'where STN_ID={station}'
                    )
                except Exception as err:
                    LOGGER.error(
                        f'Could not fetch records from oracle due to:'
                        f' {str(err)}.'
                    )
            else:
                try:
                    self.cur.execute(
                        (
                            f"select * from CCCS_PORTAL.PUBLIC_DAILY_DATA "
                            f"where STN_ID={station} and "
                            f"LOCAL_DATE > TO_TIMESTAMP('{date} 00:00:00', "
                            f"'YYYY-MM-DD HH24:MI:SS')"
                        )
                    )
                except Exception as err:
                    LOGGER.error(
                        f'Could not fetch records from oracle due to:'
                        f' {str(err)}.'
                    )

            for row in self.cur:
                insert_dict = dict(
                    zip([x[0] for x in self.cur.description], row)
                )
                # Transform Date fields from datetime to string.
                insert_dict['LOCAL_DATE'] = (
                    str(insert_dict['LOCAL_DATE'])
                    if insert_dict['LOCAL_DATE'] is not None
                    else insert_dict['LOCAL_DATE']
                )

                insert_dict['ID'] = '{}.{}.{}.{}'.format(
                    insert_dict['CLIMATE_IDENTIFIER'],
                    insert_dict['LOCAL_YEAR'],
                    insert_dict['LOCAL_MONTH'],
                    insert_dict['LOCAL_DAY'],
                )
                if insert_dict['STN_ID'] in stn_dict:
                    coords = stn_dict[insert_dict['STN_ID']]['coordinates']
                    insert_dict['PROVINCE_CODE'] = stn_dict[
                        insert_dict['STN_ID']
                    ]['PROVINCE_CODE']
                    insert_dict['STATION_NAME'] = stn_dict[
                        insert_dict['STN_ID']
                    ]['STATION_NAME']
                    wrapper = {
                        'type': 'Feature',
                        'properties': insert_dict,
                        'geometry': {'type': 'Point', 'coordinates': coords},
                    }
                    action = {
                        '_id': insert_dict['ID'],
                        '_index': 'climate_public_daily_data',
                        '_op_type': 'update',
                        'doc': wrapper,
                        'doc_as_upsert': True,
                    }
                    yield action
                else:
                    LOGGER.error(
                        f"Bad STN ID: {insert_dict['STN_ID']}, skipping"
                        f" records for this station"
                    )

    def get_station_data(self, station, starting_from):
        """
        Creates a mapping of station ID to station coordinates and province
        name.

        :param cur: oracle cursor to perform queries against.

        :returns: A dictionary of dictionaries containing
                station coordinates and province name keyed by station ID.
        """
        stn_dict = collections.OrderedDict()
        try:
            if station:
                if starting_from:
                    self.cur.execute(
                        (
                            f'select STN_ID, LONGITUDE_DECIMAL_DEGREES, '
                            f'LATITUDE_DECIMAL_DEGREES, ENG_PROV_NAME, '
                            f'FRE_PROV_NAME, PROV_STATE_TERR_CODE, '
                            f'STATION_NAME, CLIMATE_IDENTIFIER '
                            f'from CCCS_PORTAL.STATION_INFORMATION '
                            f'where STN_ID >= {station} '
                            f'order by STN_ID'
                        )
                    )
                else:
                    self.cur.execute(
                        (
                            f'select STN_ID, LONGITUDE_DECIMAL_DEGREES, '
                            f'LATITUDE_DECIMAL_DEGREES, ENG_PROV_NAME, '
                            f'FRE_PROV_NAME, PROV_STATE_TERR_CODE, '
                            f'STATION_NAME, CLIMATE_IDENTIFIER '
                            f'from CCCS_PORTAL.STATION_INFORMATION '
                            f'where STN_ID = {station} '
                            f'order by STN_ID'
                        )
                    )
            else:
                self.cur.execute(
                    (
                        'select STN_ID, LONGITUDE_DECIMAL_DEGREES, '
                        'LATITUDE_DECIMAL_DEGREES, ENG_PROV_NAME, '
                        'FRE_PROV_NAME, PROV_STATE_TERR_CODE, '
                        'STATION_NAME, CLIMATE_IDENTIFIER '
                        'from CCCS_PORTAL.STATION_INFORMATION '
                        'order by STN_ID'
                    )
                )
        except Exception as err:
            LOGGER.error(
                f'Could not fetch records from oracle due to: {str(err)}.'
            )

        for row in self.cur:
            stn_dict[row[0]] = {
                'coordinates': [row[1], row[2]],
                'ENG_PROV_NAME': row[3],
                'FRE_PROV_NAME': row[4],
                'PROVINCE_CODE': row[5].strip(),  # remove the strip
                'STATION_NAME': row[6],
                'CLIMATE_IDENTIFIER': row[7].strip(),
            }
        return stn_dict

    def get_normals_data(self):
        """
        Creates a mapping of normal ID to pub_name and period.

        :param cur: oracle cursor to perform queries against.

        :returns: A dictionary of dictionaries containing
                pub_name and period keyed by normal ID.
        """
        normals_dict = {}
        try:
            self.cur.execute(
                (
                    'select NORMAL_ID, E_NORMAL_ELEMENT_NAME, '
                    'F_NORMAL_ELEMENT_NAME, PERIOD '
                    'from CCCS_PORTAL.VALID_NORMALS_ELEMENTS'
                )
            )
        except Exception as err:
            LOGGER.error(
                f'Could not fetch records from oracle due to: {str(err)}.'
            )

        for row in self.cur:
            normals_dict[row[0]] = {
                'E_NORMAL_ELEMENT_NAME': row[1],
                'F_NORMAL_ELEMENT_NAME': row[2],
                'PERIOD': row[3],
            }
        return normals_dict

    def get_normals_periods(self):
        """
        Creates a mapping of normal period ID to period begin and end.

        :param cur: oracle cursor to perform queries against.

        :returns: A dictionary of dictionaries containing
                period begin and end keyed by normal period ID.
        """
        period_dict = {}
        try:
            self.cur.execute(
                (
                    'select NORMAL_PERIOD_ID, PERIOD_BEGIN, PERIOD_END '
                    'from CCCS_PORTAL.NORMAL_PERIODS'
                )
            )
        except Exception as err:
            LOGGER.error(
                f'Could not fetch records from oracle due to: {str(err)}.'
            )

        for row in self.cur:
            period_dict[row[0]] = {
                'PERIOD_BEGIN': row[1],
                'PERIOD_END': row[2],
            }
        return period_dict
示例#7
0
class LtceLoader(BaseLoader):
    """LTCE data loader"""

    def __init__(self, db_string=None, conn_config={}):
        """initializer"""

        BaseLoader.__init__(self)
        self.conn = ElasticsearchConnector(conn_config)
        self.db_conn = None

        # setup DB connection
        if db_string is not None:
            try:
                self.db_conn = cx_Oracle.connect(db_string)
                self.cur = self.db_conn.cursor()
            except Exception as err:
                msg = 'Could not connect to Oracle: {}'.format(err)
                LOGGER.critical(msg)
                raise click.ClickException(msg)
        else:
            LOGGER.debug("No DB connection string passed. Indexing disabled.")
            self.db_conn = self.cur = None

        for item in MAPPINGS:
            SETTINGS['mappings']['properties']['properties'][
                'properties'
            ] = MAPPINGS[item]
            self.conn.create(INDEX_NAME.format(item), SETTINGS)

    def get_stations_info(self, element_name, station_id):
        """
        Queries LTCE station data for a given element name (DAILY MINIMUM
        TEMPERATURE, DAILY MAXIMUM TEMPERATURE, etc.), and virtual station ID.
        Returns the ealiest start date of all returned stations and the end
        date climate identifier, and coordinates of the most recently threaded
        station.
        :param element_name: `str` of element name
        :param station_id: `str` of virtual climate station id
        :return: `dict` of stations information
        """
        query = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "term": {
                                        "properties.VIRTUAL_CLIMATE_ID.raw": station_id  # noqa
                                    }
                                },
                                {
                                    "term": {
                                        "properties.ELEMENT_NAME_E.raw": element_name  # noqa
                                    }
                                },
                            ]
                        }
                    }
                }
            }
        }

        results = self.conn.Elasticsearch.search(
            body=query,
            index='ltce_stations',
            _source=[
                'properties.CLIMATE_IDENTIFIER',
                'properties.ENG_STN_NAME',
                'properties.FRE_STN_NAME',
                'properties.START_DATE',
                'properties.END_DATE',
                'properties.PROVINCE_CODE',
                'geometry.coordinates',
            ],
        )

        results = [result['_source'] for result in results['hits']['hits']]

        oldest_station = None
        most_recent_station = None

        for index, station in enumerate(results):
            # retrieve station start and end date
            dates = (
                station['properties']['START_DATE'],
                station['properties']['END_DATE'],
            )

            # convert station dates to datetime objects
            (
                station['properties']['START_DATE'],
                station['properties']['END_DATE'],
            ) = (start_date, end_date) = [
                datetime.strptime(date, DATETIME_RFC3339_FMT)
                if date is not None
                else None
                for date in dates
            ]

            # assign first station as oldest and most recent
            if index == 0:
                oldest_station = station
                most_recent_station = station
                continue

            # then compare all remaining stations and replace as necessary
            if start_date < oldest_station['properties']['START_DATE']:
                oldest_station = station
            if most_recent_station['properties']['END_DATE'] is not None and (
                end_date is None
                or end_date > most_recent_station['properties']['END_DATE']
            ):
                most_recent_station = station

        stations_info = {
            'record_begin': strftime_rfc3339(
                oldest_station['properties']['START_DATE']
            ),
            'record_end': strftime_rfc3339(
                most_recent_station['properties']['END_DATE']
            )
            if most_recent_station['properties']['END_DATE']
            else None,
            'climate_identifier': most_recent_station['properties'][
                'CLIMATE_IDENTIFIER'
            ],
            'eng_stn_name': most_recent_station['properties']['ENG_STN_NAME'],
            'fre_stn_name': most_recent_station['properties']['FRE_STN_NAME'],
            'coords': [
                most_recent_station['geometry']['coordinates'][0],
                most_recent_station['geometry']['coordinates'][1],
            ],
            'province_code': most_recent_station['properties'][
                'PROVINCE_CODE'
            ],
        }

        return stations_info

    def generate_stations(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.*,"
                    "ARKEON2DWH.STATION_INFORMATION.ENG_STN_NAME,"
                    "ARKEON2DWH.STATION_INFORMATION.FRE_STN_NAME,"
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.LAT,"
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.LON,"
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.PROVINCECODE "
                    "FROM ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW "
                    "LEFT JOIN ARKEON2DWH.STATION_INFORMATION "
                    "ON ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.STN_ID = "
                    "ARKEON2DWH.STATION_INFORMATION.STN_ID "
                    "LEFT JOIN ARKEON2DWH.WXO_CITY_INFORMATION_MVW "
                    "ON ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.WXO_CITY_CODE = "
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.CITYCODE "
                    "WHERE "
                    "ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.ELEMENT_NAME_E IN "
                    "('DAILY MINIMUM TEMPERATURE', 'DAILY MAXIMUM TEMPERATURE',"  # noqa
                    "'DAILY TOTAL PRECIPITATION', 'DAILY TOTAL SNOWFALL')"
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))
            for key in insert_dict:
                if key in ['START_DATE', 'END_DATE']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            es_id = slugify(
                '{}-{}-{}-{}-{}'.format(
                    insert_dict['VIRTUAL_CLIMATE_ID'],
                    insert_dict["ELEMENT_NAME_E"],
                    insert_dict["CLIMATE_IDENTIFIER"],
                    insert_dict["START_DATE"],
                    insert_dict["END_DATE"],
                )
            )

            coords = [
                float(insert_dict['LON']),
                float(insert_dict['LAT']),
            ]

            # rename PROVINCECODE field to PROVINCE_CODE
            insert_dict['PROVINCE_CODE'] = insert_dict['PROVINCECODE']

            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'STN_ID',
                'ENG_PROV_NAME',
                'FRE_PROV_NAME',
                'REGION_CODE',
                'CRITERIA',
                'NOTES',
                'VIRTUAL_STN_INFO_UPDATE_ID',
                'CURRENT_FLAG',
                'LON',
                'LAT',
                'PROVINCECODE',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {'type': 'Point', 'coordinates': coords},
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_stations',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action

    def generate_daily_temp_extremes(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT t1.*, t2.*, t3.*, t4.*, t5.*, t6.*, t7.*, t8.* "
                    "FROM ARKEON2DWH.RECORD_HIGH_VIRTUAL_MAX_TEMP t1 "
                    "JOIN ARKEON2DWH.RECORD_LOW_VIRTUAL_MAX_TEMP t2 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t2.LOCAL_DAY "
                    "JOIN ARKEON2DWH.RECORD_LOW_VIRTUAL_MIN_TEMP t3 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t3.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t3.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t3.LOCAL_DAY "
                    "JOIN ARKEON2DWH.RECORD_HIGH_VIRTUAL_MIN_TEMP t4 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t4.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t4.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t4.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_HIGH_VIRTUAL_MAX_TEMP t5 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t5.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t5.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t5.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_LOW_VIRTUAL_MAX_TEMP t6 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t6.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t6.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t6.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_HIGH_VIRTUAL_MIN_TEMP t7 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t7.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t7.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t7.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_LOW_VIRTUAL_MIN_TEMP t8 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t8.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t8.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t8.LOCAL_DAY "
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        # dictionnary to store stations information once retrieved
        stations_dict = {}
        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))

            for key in insert_dict:
                if key in ['LAST_UPDATED']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID']
            es_id = '{}-{}-{}'.format(
                insert_dict['VIRTUAL_CLIMATE_ID'],
                insert_dict["LOCAL_MONTH"],
                insert_dict["LOCAL_DAY"],
            )

            # check if we have station IDs record begin and end. If not
            # retrieve the information and store in stations_dict
            if virtual_climate_id not in stations_dict:
                stations_dict[virtual_climate_id] = {}
                stations_dict[virtual_climate_id][
                    'MIN'
                ] = self.get_stations_info(
                    'DAILY MINIMUM TEMPERATURE', virtual_climate_id
                )
                stations_dict[virtual_climate_id][
                    'MAX'
                ] = self.get_stations_info(
                    'DAILY MAXIMUM TEMPERATURE', virtual_climate_id
                )

            # check if TEMEPERATURE MIN/MAX for most recent threaded station
            # have same climate identifier value
            min_climate_identifier = stations_dict[virtual_climate_id]['MIN'][
                'climate_identifier'
            ]
            max_climate_identifier = stations_dict[virtual_climate_id]['MAX'][
                'climate_identifier'
            ]

            if min_climate_identifier == max_climate_identifier:
                insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['climate_identifier']
                insert_dict['ENG_STN_NAME'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['eng_stn_name']
                insert_dict['FRE_STN_NAME'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['fre_stn_name']
                insert_dict['PROVINCE_CODE'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['province_code']

            else:
                LOGGER.error(
                    f'Currently threaded station climate identifier value '
                    f'does not match between DAILY MINIMUM TEMPERATURE'
                    f'({min_climate_identifier}) and DAILY MAXIMUM '
                    f'TEMPERATURE({max_climate_identifier}) station threads '
                    f'for virtual climate ID {virtual_climate_id}.'
                )
                continue

            # set new fields
            for level in ['MIN', 'MAX']:
                # set new insert_dict keys
                insert_dict[
                    '{}_TEMP_RECORD_BEGIN'.format(level)
                ] = stations_dict[virtual_climate_id][level]['record_begin']
                insert_dict[
                    '{}_TEMP_RECORD_END'.format(level)
                ] = stations_dict[virtual_climate_id][level]['record_end']

            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'LOCAL_TIME',
                'VIRTUAL_MEAS_DISPLAY_CODE',
                'ENG_STN_NAME',
                'FRE_STN_NAME',
                'CLIMATE_IDENTIFIER',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {
                    'type': 'Point',
                    'coordinates': stations_dict[virtual_climate_id]['MAX'][
                        'coords'
                    ],
                },
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_temp_extremes',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action

    def generate_daily_precip_extremes(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT t1.*, t2.* "
                    "FROM ARKEON2DWH.RECORD_VIRTUAL_PRECIPITATION t1 "
                    "JOIN ARKEON2DWH.EXTREME_VIRTUAL_PRECIPITATION t2 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t2.LOCAL_DAY "
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        stations_dict = {}

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))

            for key in insert_dict:
                if key in ['LAST_UPDATED']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID']
            es_id = '{}-{}-{}'.format(
                insert_dict['VIRTUAL_CLIMATE_ID'],
                insert_dict["LOCAL_MONTH"],
                insert_dict["LOCAL_DAY"],
            )

            # check if we have station IDs record begin and end if not retrieve
            if virtual_climate_id not in stations_dict:
                stations_dict[virtual_climate_id] = self.get_stations_info(
                    'DAILY TOTAL PRECIPITATION', virtual_climate_id
                )

            insert_dict['RECORD_BEGIN'] = stations_dict[virtual_climate_id][
                'record_begin'
            ]
            insert_dict['RECORD_END'] = stations_dict[virtual_climate_id][
                'record_end'
            ]

            insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[
                virtual_climate_id
            ]['climate_identifier']
            insert_dict['ENG_STN_NAME'] = stations_dict[virtual_climate_id][
                'eng_stn_name'
            ]
            insert_dict['FRE_STN_NAME'] = stations_dict[virtual_climate_id][
                'fre_stn_name'
            ]

            insert_dict['PROVINCE_CODE'] = stations_dict[virtual_climate_id][
                'province_code'
            ]
            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'LOCAL_TIME',
                'VIRTUAL_MEAS_DISPLAY_CODE',
                'ENG_STN_NAME',
                'FRE_STN_NAME',
                'CLIMATE_IDENTIFIER',
                'LAST_UPDATED',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {
                    'type': 'Point',
                    'coordinates': stations_dict[virtual_climate_id]['coords'],
                },
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_precip_extremes',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action

    def generate_daily_snow_extremes(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT t1.*, t2.* "
                    "FROM ARKEON2DWH.RECORD_VIRTUAL_SNOWFALL t1 "
                    "JOIN ARKEON2DWH.EXTREME_VIRTUAL_SNOWFALL t2 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t2.LOCAL_DAY "
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        stations_dict = {}

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))

            for key in insert_dict:
                if key in ['LAST_UPDATED']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID']
            es_id = '{}-{}-{}'.format(
                insert_dict['VIRTUAL_CLIMATE_ID'],
                insert_dict["LOCAL_MONTH"],
                insert_dict["LOCAL_DAY"],
            )

            # check if we have station IDs record begin and end if not retrieve
            if virtual_climate_id not in stations_dict:
                stations_dict[virtual_climate_id] = self.get_stations_info(
                    'DAILY TOTAL SNOWFALL', virtual_climate_id
                )

            insert_dict['RECORD_BEGIN'] = stations_dict[virtual_climate_id][
                'record_begin'
            ]
            insert_dict['RECORD_END'] = stations_dict[virtual_climate_id][
                'record_end'
            ]

            insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[
                virtual_climate_id
            ]['climate_identifier']
            insert_dict['ENG_STN_NAME'] = stations_dict[virtual_climate_id][
                'eng_stn_name'
            ]
            insert_dict['FRE_STN_NAME'] = stations_dict[virtual_climate_id][
                'fre_stn_name'
            ]

            insert_dict['PROVINCE_CODE'] = stations_dict[virtual_climate_id][
                'province_code'
            ]

            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'LOCAL_TIME',
                'VIRTUAL_MEAS_DISPLAY_CODE',
                'ENG_STN_NAME',
                'FRE_STN_NAME',
                'CLIMATE_IDENTIFIER',
                'LAST_UPDATED',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {
                    'type': 'Point',
                    'coordinates': stations_dict[virtual_climate_id]['coords'],
                },
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_snow_extremes',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action
示例#8
0
class AhccdLoader(BaseLoader):
    """AHCCD Loader"""

    def __init__(self, conn_config={}):
        """initializer"""

        super().__init__()

        self.conn = ElasticsearchConnector(conn_config)

    def create_index(self, index):
        """
        Creates the Elasticsearch index at self.conn. If the index already
        exists, it is deleted and re-created. The mappings for the two types
        are also created.

        :param index: Identifier for the index to be created.
        """

        if index == 'annual':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "identifier__identifiant": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "lat__lat": {"type": "float"},
                                "lon__long": {"type": "float"},
                                "province__province": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period_group__groupe_periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period_value__valeur_periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "pressure_sea_level__pression_niveau_mer": {
                                    "type": "float"
                                },
                                "pressure_sea_level_units__pression_niveau_mer_unite": {  # noqa
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "pressure_station__pression_station": {
                                    "type": "float"
                                },
                                "pressure_station_units__pression_station_unites": {  # noqa
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "rain__pluie": {"type": "float"},
                                "rain_units__pluie_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "snow__neige": {"type": "float"},
                                "snow_units__neige_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_id__id_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_max__temp_max": {"type": "float"},
                                "temp_max_units__temp_max_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_mean__temp_moyenne": {"type": "float"},
                                "temp_mean_units__temp_moyenne_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_min__temp_min": {"type": "float"},
                                "temp_min_units__temp_min_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "total_precip__precip_totale": {
                                    "type": "float"
                                },
                                "total_precip_units__precip_totale_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "wind_speed__vitesse_vent": {"type": "float"},
                                "wind_speed_units__vitesse_vent_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "year__annee": {"type": "integer"},
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            self.conn.create('ahccd_annual', mapping=mapping, overwrite=True)

        if index == 'monthly':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "date": {
                                    "type": "date",
                                    "format": "yyyy-MM||yyyy",
                                },
                                "identifier__identifiant": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "lat__lat": {"type": "float"},
                                "lon__long": {"type": "float"},
                                "province__province": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period_group__groupe_periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period_value__valeur_periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "pressure_sea_level__pression_niveau_mer": {
                                    "type": "float"
                                },
                                "pressure_sea_level_units__pression_niveau_mer_unite": {  # noqa
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "pressure_station__pression_station": {
                                    "type": "float"
                                },
                                "pressure_station_units__pression_station_unites": {  # noqa
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "rain__pluie": {"type": "float"},
                                "rain_units__pluie_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "snow__neige": {"type": "float"},
                                "snow_units__neige_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_id__id_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_max__temp_max": {"type": "float"},
                                "temp_max_units__temp_max_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_mean__temp_moyenne": {"type": "float"},
                                "temp_mean_units__temp_moyenne_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_min__temp_min": {"type": "float"},
                                "temp_min_units__temp_min_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "total_precip__precip_totale": {
                                    "type": "float"
                                },
                                "total_precip_units__precip_totale_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "wind_speed__vitesse_vent": {"type": "float"},
                                "wind_speed_units__vitesse_vent_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "year__annee": {"type": "integer"},
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            self.conn.create('ahccd_monthly', mapping=mapping, overwrite=True)

        if index == 'seasonal':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "identifier__identifiant": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "lat__lat": {"type": "float"},
                                "lon__long": {"type": "float"},
                                "province__province": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period_group__groupe_periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period_value__valeur_periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "pressure_sea_level__pression_niveau_mer": {
                                    "type": "float"
                                },
                                "pressure_sea_level_units__pression_niveau_mer_unite": {  # noqa
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "pressure_station__pression_station": {
                                    "type": "float"
                                },
                                "pressure_station_units__pression_station_unites": {  # noqa
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "rain__pluie": {"type": "float"},
                                "rain_units__pluie_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "snow__neige": {"type": "float"},
                                "snow_units__neige_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_id__id_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_max__temp_max": {"type": "float"},
                                "temp_max_units__temp_max_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_mean__temp_moyenne": {"type": "float"},
                                "temp_mean_units__temp_moyenne_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "temp_min__temp_min": {"type": "float"},
                                "temp_min_units__temp_min_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "total_precip__precip_totale": {
                                    "type": "float"
                                },
                                "total_precip_units__precip_totale_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "wind_speed__vitesse_vent": {"type": "float"},
                                "wind_speed_units__vitesse_vent_unites": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "year__annee": {"type": "integer"},
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            self.conn.create('ahccd_seasonal', mapping=mapping, overwrite=True)

        if index == 'stations':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "identifier__identifiant": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_id__id_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_name__nom_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "measurement_type__type_mesure": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period__periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "trend_value__valeur_tendance": {
                                    "type": "float"
                                },
                                "elevation__elevation": {"type": "float"},
                                "province__province": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "joined__rejoint": {"type": "integer"},
                                "year_range__annees": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            self.conn.create('ahccd_stations', mapping=mapping, overwrite=True)

        if index == 'trends':
            mapping = {
                "settings": {"number_of_shards": 1, "number_of_replicas": 0},
                "mappings": {
                    "_meta": {"geomfields": {"geometry": "POINT"}},
                    "properties": {
                        "type": {"type": "text"},
                        "properties": {
                            "properties": {
                                "identifier__identifiant": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_id__id_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "station_name__nom_station": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "measurement_type__type_mesure": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "period__periode": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "trend_value__valeur_tendance": {
                                    "type": "float"
                                },
                                "elevation__elevation": {"type": "float"},
                                "province__province": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                                "joined__rejoint": {"type": "integer"},
                                "year_range__annees": {
                                    "type": "text",
                                    "fields": {"raw": {"type": "keyword"}},
                                },
                            }
                        },
                        "geometry": {"type": "geo_shape"},
                    },
                },
            }

            self.conn.create('ahccd_trends', mapping=mapping, overwrite=True)

    def generate_docs(self, fp, index):
        """
        Reads AHCCD and CMIP5 data from file(s) at fp and reformats them
        so they can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param fp: the location of the raw data file(s) to load.
        :param index: name of index to load.
        :returns: generator of bulk API upsert actions.
        """

        if index not in [
            'stations',
            'monthly',
            'annual',
            'seasonal',
            'trends',
        ]:
            LOGGER.error('Unrecognized AHCCD data type {}'.format(index))
            return

        try:
            with open(fp, 'r') as f:
                json_source = f.read()
                contents = json.loads(json_source)
        except Exception as err:
            LOGGER.error(f'Could not open JSON file due to: {str(err)}.')
            return

        for record in contents['features']:
            if index == 'annual':
                index_name = 'ahccd_annual'
            elif index == 'seasonal':
                index_name = 'ahccd_seasonal'
            elif index == 'stations':
                index_name = 'ahccd_stations'
                stn_id = record['properties']['station_id__id_station']
                record['properties']['identifier__identifiant'] = stn_id
            elif index == 'monthly':
                index_name = 'ahccd_monthly'
                record['properties']['date'] = '{}-{}'.format(
                    record['properties']['identifier__identifiant'].split('.')[
                        1
                    ],
                    record['properties']['identifier__identifiant'].split('.')[
                        2
                    ],
                )
                del record['properties']['year__annee']
            elif index == 'trends':
                index_name = 'ahccd_trends'
                identifier = '{}.{}.{}'.format(
                    record['properties']['station_id__id_station'],
                    record['properties']['period__periode'],
                    record['properties']['measurement_type__type_mesure'],
                )
                record['properties']['identifier__identifiant'] = identifier

            action = {
                '_id': record['properties']['identifier__identifiant'],
                '_index': index_name,
                '_op_type': 'update',
                'doc': record,
                'doc_as_upsert': True,
            }
            yield action