Exemplo n.º 1
0
    def find_updated_data(self, raster_url_list):

        updated_raster_url_list = []

        config_sheet_datetime_text = gs.get_value('tech_title', self.name,
                                                  'last_updated', self.gfw_env)
        config_sheet_datetime = datetime.datetime.strptime(
            config_sheet_datetime_text, '%m/%d/%Y')

        # order is important here-- key names are the same + don't want to overwrite proper timestamps
        if self.name == 'umd_landsat_alerts':
            bucket = 'gfw-gee-glad-export'
        else:
            bucket = 'terra-i'

        bucket_timestamps = {}

        output_dict = aws.get_timestamps(bucket)

        # add this to our current dict
        bucket_timestamps.update(output_dict)

        for raster_url in raster_url_list:

            raster_name = urlparse.urlparse(raster_url).path.replace('/', '')
            raster_timestamp = bucket_timestamps[raster_name]

            if raster_timestamp > config_sheet_datetime:
                updated_raster_url_list.append(raster_url)

        return updated_raster_url_list
Exemplo n.º 2
0
    def check_current_version(self, wdpa_gdb):
        """
        Check the filename of the zip we've just downloaded against what we currently have in the metadata doc
        If we're up to date, exit this workflow, logging that we've 'checked' the dataset
        Otherwise continue to process
        :param wdpa_gdb: the unzipped gdb just downloaded from wdpa
        :return:
        """

        # Parameters required to check the metadata response spreadsheet
        unique_col = 'Technical Title'
        unique_val = 'wdpa_protected_areas'
        update_col = 'Frequency of Updates'
        sheet_name = 'Form Responses 1'
        gs_key = r'1hJ48cMrADMEJ67L5hTQbT5hhV20YCJHpN1NwjXiC3pI'

        current_version_text = gs.get_value(unique_col, unique_val, update_col,
                                            sheet_name, gs_key)

        gdb_name = os.path.splitext(os.path.basename(wdpa_gdb))[0]
        download_version = gdb_name.replace('WDPA_', '').replace('_Public', '')

        download_month, download_year = self.parse_month_abbrev(
            download_version)

        # Format to match what's in the metadata
        download_version_text = 'Monthly. Current version: {0}, {1}.'.format(
            download_month, download_year)

        # Versions match; no need to update
        if current_version_text == download_version_text:

            logging.info('No new data on the wdpa site')

            # Important for the script that reads the log file and sends an email
            # Including this 'Checked' message will show that we checked the layer but it didn't need updating
            logging.critical('Checked | {0}'.format(self.name))
            sys.exit(0)

        # Update the value in the metadata table and continue processing the dataset
        else:
            logging.debug(
                'Current WDPA version text is {0}, downloaded version is {1} Updating '
                'dataset now.'.format(current_version_text,
                                      download_version_text))
            gs.set_value(unique_col, unique_val, update_col, sheet_name,
                         download_version_text, gs_key)

            # update the citation as well
            citation_str = r'IUCN and UNEP-WCMC ({y}), The World Database on Protected Areas (WDPA) [On-line], ' \
                           r'{m}, {y}, Cambridge, UK: UNEP-WCMC. Available at: www.protectedplanet.net. Accessed ' \
                           r'through Global Forest Watch in [insert month/year]. ' \
                           r'www.globalforestwatch.org'.format(m=download_month, y=download_year)

            gs.set_value(unique_col, unique_val, 'Citation', sheet_name,
                         citation_str, gs_key)
Exemplo n.º 3
0
    def check_current_version(self, wdpa_gdb):
        """
        Check the filename of the zip we've just downloaded against what we currently have in the metadata doc
        If we're up to date, exit this workflow, logging that we've 'checked' the dataset
        Otherwise continue to process
        :param wdpa_gdb: the unzipped gdb just downloaded from wdpa
        :return:
        """

        # Parameters required to check the metadata response spreadsheet
        unique_col = "Technical Title"
        unique_val = "wdpa_protected_areas"
        update_col = "Frequency of Updates"
        sheet_name = "sheet1"
        gs_key = r"1hJ48cMrADMEJ67L5hTQbT5hhV20YCJHpN1NwjXiC3pI"

        current_version_text = gs.get_value(unique_col, unique_val, update_col, sheet_name, gs_key)

        gdb_name = os.path.splitext(os.path.basename(wdpa_gdb))[0]
        download_version = gdb_name.replace("WDPA_", "").replace("_Public", "")

        download_version = self.parse_month_abbrev(download_version)

        # Format to match what's in the metadata
        download_version_text = "Monthly. Current version: {0}.".format(download_version)

        # Versions match; no need to update
        if current_version_text == download_version_text:

            logging.info("No new data on the wdpa site")

            # Important for the script that reads the log file and sends an email
            # Including this 'Checked' message will show that we checked the layer but it didn't need updating
            logging.critical("Checked | {0}".format(self.name))
            sys.exit(0)

        # Update the value in the metadata table and continue processing the dataset
        else:
            logging.debug(
                "Current WDPA version text is {0}, downloaded version is {1} Updating "
                "dataset now.".format(current_version_text, download_version_text)
            )
    def find_updated_data(self, raster_url_list):

        updated_raster_url_list = []

        config_sheet_datetime_text = gs.get_value('tech_title', self.name, 'last_updated', self.gfw_env)
        config_sheet_datetime = datetime.datetime.strptime(config_sheet_datetime_text, '%m/%d/%Y')

        first_url = raster_url_list[0]
        netloc = urlparse.urlparse(first_url).netloc

        bucket = netloc.split('.')[0]
        bucket_timestamps = aws.get_timestamps(bucket)

        for raster_url in raster_url_list:

            raster_name = urlparse.urlparse(raster_url).path.replace('/', '')
            raster_timestamp = bucket_timestamps[raster_name]

            if raster_timestamp > config_sheet_datetime:
                updated_raster_url_list.append(raster_url)

        return updated_raster_url_list