Пример #1
0
    def execute(self, context):
        input_paths = context["task_instance"].xcom_pull(self.get_inputs_from,
                                                         key=XCOM_RETURN_KEY)
        if input_paths is None:
            log.info("Nothing to process")
            return None

        output_paths = []
        for input_path in input_paths:
            levels = get_overview_levels(self.max_overview_level)
            log.info("Generating overviews for {!r}...".format(input_path))
            command = get_gdaladdo_command(
                input_path,
                overview_levels=levels,
                resampling_method=self.resampling_method,
                compress_overview=self.compress_overview)
            output_path = input_path
            output_paths.append(output_path)
            env = {'LD_LIBRARY_PATH': '/usr/local/lib:$LD_LIBRARY_PATH'}
            bo = BashOperator(task_id='bash_operator_addo_{}'.format(
                os.path.basename(input_path)),
                              env=env,
                              bash_command=command)
            bo.execute(context)

        log.info(output_paths)
        return output_paths
Пример #2
0
    def execute(self, context):
        log.info('--------------------GDAL_PLUGIN Warp running------------')
        task_instance = context['task_instance']
        log.info("""
            target_srs: {}
            tile_size: {}
            overwrite: {}
            dstdir: {}
            get_inputs_from: {}
            """.format(
            self.target_srs,
            self.tile_size,
            self.overwrite,
            self.dstdir,
            self.get_inputs_from,
        )
        )

        dstdir = self.dstdir

        input_paths = task_instance.xcom_pull(self.get_inputs_from, key=XCOM_RETURN_KEY)
        if input_paths is None:
            log.info('Nothing to process')
            return None

        output_paths = []
        for srcfile in input_paths:
            log.info('srcfile: %s', srcfile)
            srcfilename = os.path.basename(srcfile)
            dstfile = os.path.join(dstdir, srcfilename)
            log.info('dstfile: %s', dstfile)

            # build gdalwarp command
            self.overwrite = '-overwrite' if self.overwrite else ''
            gdalwarp_command = (
                    'gdalwarp ' + self.overwrite + ' -t_srs ' + self.target_srs +
                    ' -co TILED=YES -co BLOCKXSIZE=' + self.tile_size +
                    ' -co BLOCKYSIZE=' + self.tile_size + ' ' + srcfile + ' ' +
                    dstfile
            )
            log.info('The complete GDAL warp command is: %s', gdalwarp_command)
            env = {
                'LD_LIBRARY_PATH': '/usr/local/lib:$LD_LIBRARY_PATH'
            }
            bo = BashOperator(
                task_id="bash_operator_warp",
                env=env,
                bash_command=gdalwarp_command
            )
            bo.execute(context)
            output_paths.append(dstfile)

        return output_paths
Пример #3
0
    def execute(self, context):
        input_paths = context["task_instance"].xcom_pull(self.get_inputs_from, key=XCOM_RETURN_KEY)

        if input_paths is None:
            log.info("Nothing to process")
            return None

        # If message from XCom is a string with single file path, turn it into a string
        if isinstance(input_paths, six.string_types):
            input_paths = [input_paths]

        if not len(input_paths):
            log.info("Nothing to process")
            return None

        log.info(input_paths[0])
        working_dir = os.path.dirname(input_paths[0])
        try:
            os.makedirs(working_dir)
        except OSError as exc:
            if exc.errno == 17:
                pass  # directory already exists
            else:
                raise

        output_paths = []
        for input_path in input_paths:
            output_img_filename = 'translated_{}'.format(
                os.path.basename(input_path))
            output_path = os.path.join(working_dir, output_img_filename)
            output_paths.append(output_path)
            command = get_gdal_translate_command(
                source=input_path, destination=output_path,
                output_type=self.output_type,
                creation_options=self.creation_options
            )

            log.info("The complete GDAL translate command is: {}".format(command))
            env = {
                'LD_LIBRARY_PATH': '/usr/local/lib:$LD_LIBRARY_PATH'
            }
            b_o = BashOperator(
                task_id="bash_operator_translate",
                env=env,
                bash_command=command
            )
            b_o.execute(context)

        log.info(output_paths)
        return output_paths
Пример #4
0
    def execute(self, context):
        log.info(context)
        log.info("###########")
        log.info("## RSYNC ##")
        log.info('Host: %s', self.host)
        log.info('User: %s', self.remote_usr)
        log.info('Remote dir: %s', self.remote_dir)
        log.info('SSH Key: %s', self.ssh_key_file)

        # check default XCOM key in task_id 'get_inputs_from'
        files_str = ""
        files = context['task_instance'].xcom_pull(
            task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY)

        # stop processing if there are no products
        if files is None:
            log.info("Nothing to process.")
            self._do_skip_downstream_tasks(context)

        if isinstance(files, six.string_types):
            files_str = files
        else:
            for f in files:
                files_str += " " + f
            log.info("Retrieving input from task_id '{}'' and key '{}'".format(
                self.get_inputs_from, XCOM_RETURN_KEY))

        bash_command = 'rsync -avHPze "ssh -i ' + self.ssh_key_file + ' -o StrictHostKeyChecking=no" ' + files_str + ' ' + self.remote_usr + '@' + self.host + ':' + self.remote_dir
        bo = BashOperator(task_id='bash_operator_rsync_',
                          bash_command=bash_command)
        bo.execute(context)

        # construct list of filenames uploaded to remote host
        files_list = files_str.split()
        filenames_list = list(
            os.path.join(self.remote_dir, os.path.basename(path))
            for path in files_list)
        if filenames_list and len(filenames_list) > 0:
            log.info("Uploaded files: {}".format(pprint.pformat(files_list)))
            return filenames_list
        self._do_skip_downstream_tasks(context)
Пример #5
0
    def execute(self, context):
        products = list()
        ids = []

        if self.input_product is not None:
            log.info("Processing single product: " + self.input_product)
            products.append(self.input_product)
        elif self.get_inputs_from is not None:
            log.info("Getting inputs from: " + self.get_inputs_from)
            inputs = context['task_instance'].xcom_pull(
                task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY)
            for input in inputs:
                products.append(input)
        else:
            self.downloaded_products = context['task_instance'].xcom_pull(
                'dhus_download_task', key='downloaded_products')
            if self.downloaded_products is not None and len(
                    self.downloaded_products) != 0:
                products = self.downloaded_products.keys()
                log.info(self.downloaded_products)
                for p in self.downloaded_products:
                    ids.append(self.downloaded_products[p]["id"])
                print "downloaded products keys :", self.downloaded_products.keys(
                )[0]

        if products is None or len(products) == 0:
            log.info("Nothing to process.")
            return

        thumbnail_paths = list()
        for product in products:
            log.info("Processing {}".format(product))
            with s2reader.open(product) as safe_product:
                for granule in safe_product.granules:
                    try:
                        zipf = zipfile.ZipFile(product, 'r')
                        imgdata = zipf.read(granule.pvi_path, 'r')
                        img = Blob(imgdata)
                        img = Image(img)
                        img.scale(self.thumb_size_x + 'x' + self.thumb_size_y)
                        img.quality(80)
                        thumbnail_name = product.strip(
                            ".zip") + "/thumbnail.jpg"
                        if os.path.isdir(product.strip(".zip")):
                            product_rmdir_cmd = "rm -r {} ".format(
                                product.strip(".zip"))
                            product_rmdir_BO = BashOperator(
                                task_id="product_rmdir_{}".format(
                                    product.split("/")[-1].strip(".zip")),
                                bash_command=product_rmdir_cmd)
                            product_rmdir_BO.execute(context)
                        product_mkdir_cmd = "mkdir {} ".format(
                            product.strip(".zip"))
                        product_mkdir_BO = BashOperator(
                            task_id="product_mkdir_{}".format(
                                product.split("/")[-1].strip(".zip")),
                            bash_command=product_mkdir_cmd)
                        product_mkdir_BO.execute(context)
                        if self.output_dir is not None:
                            thumbnail_name = os.path.join(
                                self.output_dir, "thumbnail.jpeg")
                            log.info("Writing thumbnail to {}".format(
                                thumbnail_name))
                            img.write(thumbnail_name)
                        else:
                            img.write(str(thumbnail_name))
                        thumbnail_paths.append(thumbnail_name)
                        # XCOM expects a single file so we push it here:
                        context['task_instance'].xcom_push(
                            key='thumbnail_jpeg_abs_path',
                            value=str(thumbnail_name))
                        context['task_instance'].xcom_push(key='ids',
                                                           value=ids)
                        break
                    except BaseException as e:
                        log.error(
                            "Unable to extract thumbnail from {}: {}".format(
                                product, e))
        return thumbnail_paths
Пример #6
0
    def execute(self, context):
        if self.get_inputs_from != None:
            log.info("Getting inputs from: {}".format(self.get_inputs_from))
            self.downloaded_products, self.archived_products = context[
                'task_instance'].xcom_pull(task_ids=self.get_inputs_from,
                                           key=XCOM_RETURN_KEY)
        else:
            log.info("Getting inputs from: dhus_download_task")
            self.downloaded_products = context['task_instance'].xcom_pull(
                'dhus_download_task', key='downloaded_products')
        if self.downloaded_products is None:
            log.info("Nothing to process.")
            return

        for product in self.downloaded_products.keys():
            log.info("Processing: {}".format(product))
            with s2reader.open(product) as s2_product:
                coords = []
                links = []
                metadata = s2_product._product_metadata
                granule = s2_product.granules[0]
                granule_metadata = granule._metadata
                product_footprint = [
                    [[m.replace(" ", ",")]
                     for m in str(s2_product.footprint).replace(", ", ",").
                     partition('((')[-1].rpartition('))')[0].split(",")]
                ]
                for item in product_footprint[0]:
                    [x_coordinate, y_coordinate] = item[0].split(",")
                    coords.append([float(x_coordinate), float(y_coordinate)])
                final_metadata_dict = {
                    "type": "Feature",
                    "geometry": {
                        "type": "Polygon",
                        "coordinates": [coords]
                    },
                    "properties": {
                        "eop:identifier":
                        s2_product.manifest_safe_path.rsplit('.SAFE', 1)[0],
                        "timeStart":
                        s2_product.product_start_time,
                        "timeEnd":
                        s2_product.product_stop_time,
                        "originalPackageLocation":
                        os.path.join(
                            self.original_package_download_base_url,
                            os.path.basename(self.archived_products.pop(0))),
                        "thumbnailURL":
                        None,
                        "quicklookURL":
                        None,
                        "eop:parentIdentifier":
                        "SENTINEL2",
                        "eop:productionStatus":
                        None,
                        "eop:acquisitionType":
                        None,
                        "eop:orbitNumber":
                        s2_product.sensing_orbit_number,
                        "eop:orbitDirection":
                        s2_product.sensing_orbit_direction,
                        "eop:track":
                        None,
                        "eop:frame":
                        None,
                        "eop:swathIdentifier":
                        metadata.find('.//Product_Info/Datatake').
                        attrib['datatakeIdentifier'],
                        "opt:cloudCover":
                        int(
                            float(
                                metadata.findtext(
                                    ".//Cloud_Coverage_Assessment"))),
                        "opt:snowCover":
                        None,
                        "eop:productQualityStatus":
                        None,
                        "eop:productQualityDegradationStatus":
                        None,
                        "eop:processorName":
                        None,
                        "eop:processingCenter":
                        None,
                        "eop:creationDate":
                        None,
                        "eop:modificationDate":
                        None,
                        "eop:processingDate":
                        None,
                        "eop:sensorMode":
                        None,
                        "eop:archivingCenter":
                        granule_metadata.findtext('.//ARCHIVING_CENTRE'),
                        "eop:processingMode":
                        None,
                        "eop:availabilityTime":
                        s2_product.generation_time,
                        "eop:acquisitionStation":
                        None,
                        "eop:acquisitionSubtype":
                        None,
                        "eop:startTimeFromAscendingNode":
                        None,
                        "eop:completionTimeFromAscendingNode":
                        None,
                        "eop:illuminationAzimuthAngle":
                        metadata.findtext('.//Mean_Sun_Angle/AZIMUTH_ANGLE'),
                        "eop:illuminationZenithAngle":
                        metadata.findtext('.//Mean_Sun_Angle/ZENITH_ANGLE'),
                        "eop:illuminationElevationAngle":
                        None,
                        "eop:resolution":
                        None
                    }
                }
                for i in self.bands_res.values():
                    features_list = []
                    granule_counter = 1
                    for granule in s2_product.granules:
                        granule_coords = []
                        granule_coordinates = [[
                            [m.replace(" ", ",")]
                            for m in str(granule.footprint).replace(", ", ",").
                            partition('((')[-1].rpartition('))')[0].split(",")
                        ]]

                        for item in granule_coordinates[0]:
                            [granule_x_coordinate,
                             granule_y_coordinate] = item[0].split(",")
                            granule_coords.append([
                                float(granule_x_coordinate),
                                float(granule_y_coordinate)
                            ])
                        zipped_product = zipfile.ZipFile(product)
                        for file_name in zipped_product.namelist():
                            if file_name.endswith(
                                    '.jp2'
                            ) and not file_name.endswith('PVI.jp2'):
                                features_list.append({"type": "Feature", "geometry": { "type": "Polygon", "coordinates": [granule_coords]},\
                       "properties": {\
                       "location":os.path.join(self.remote_dir, granule.granule_path.rsplit("/")[-1], "IMG_DATA", file_name.rsplit("/")[-1]), "band": self.bands_dict[file_name.rsplit("/")[-1].rsplit(".")[0][-3:]]},\
                       "id": "GRANULE.{}".format(granule_counter)})
                                granule_counter += 1
            final_granules_dict = {
                "type": "FeatureCollection",
                "features": features_list
            }

            timeStart, timeEnd = final_metadata_dict["properties"][
                "timeStart"], final_metadata_dict["properties"]["timeEnd"]
            # create description.html and dump it to file
            log.info("Creating description.html")
            tr = TemplatesResolver()
            htmlAbstract = tr.generate_product_abstract({
                "timeStart":
                timeStart,
                "timeEnd":
                timeEnd,
                "originalPackageLocation":
                final_metadata_dict["properties"]["originalPackageLocation"]
            })
            log.debug(pprint.pformat(htmlAbstract))
            final_metadata_dict['htmlDescription'] = htmlAbstract

            with open(product.strip(".zip") + '/description.html',
                      'w') as product_outfile:
                product_outfile.write(htmlAbstract)
            # Note here that the SRID is a property of the granule not the product
            final_metadata_dict["properties"]["crs"] = granule.srid
            with open(product.strip(".zip") + '/product.json',
                      'w') as product_outfile:
                json.dump(final_metadata_dict, product_outfile, indent=4)
            with open(product.strip(".zip") + '/granules.json',
                      'w') as granules_outfile:
                json.dump(final_granules_dict, granules_outfile, indent=4)

            product_identifier = s2_product.manifest_safe_path.rsplit(
                '.SAFE', 1)[0]
            bbox = get_bbox_from_granules_coordinates(granule_coordinates)

            ows_links_dict = create_owslinks_dict(
                product_identifier=product_identifier,
                timestart=timeStart,
                timeend=timeEnd,
                granule_bbox=bbox,
                gs_workspace=self.gs_workspace,
                gs_wms_layer=self.gs_wms_layer,
                gs_wms_width=self.gs_wms_width,
                gs_wms_height=self.gs_wms_height,
                gs_wms_format=self.gs_wms_format,
                gs_wms_version=self.gs_wms_version,
                gs_wfs_featuretype=self.gs_wfs_featuretype,
                gs_wfs_format=self.gs_wfs_format,
                gs_wfs_version=self.gs_wfs_version,
                gs_wcs_coverage_id=self.gs_wcs_coverage_id,
                gs_wcs_scale_i=self.gs_wcs_scale_i,
                gs_wcs_scale_j=self.gs_wcs_scale_j,
                gs_wcs_format=self.gs_wcs_format,
                gs_wcs_version=self.gs_wcs_version,
            )

            log.info("ows links: {}".format(pprint.pformat(ows_links_dict)))

            with open(product.strip(".zip") + '/owsLinks.json',
                      'w') as owslinks_outfile:
                json.dump(ows_links_dict, owslinks_outfile, indent=4)

        self.custom_archived = []
        for archive_line in self.downloaded_products.keys():
            jp2_files_paths = []
            archive_path = archive_line
            archived_product = zipfile.ZipFile(archive_line, 'r')
            for file_name in archived_product.namelist():
                if file_name.endswith(
                        '.jp2') and not file_name.endswith('PVI.jp2'):
                    archived_product.extract(file_name,
                                             archive_path.strip(".zip"))
                    jp2_files_paths.append(
                        os.path.join(archive_path.strip(".zip"), file_name))
                    parent_dir = os.path.dirname(jp2_files_paths[0])
                if file_name.endswith('MTD_TL.xml'):
                    archived_product.extract(file_name,
                                             archive_path.strip(".zip"))
                    mtd_tl_xml = os.path.join(archive_path.strip(".zip"),
                                              file_name)
            tree = ET.parse(mtd_tl_xml)
            root = tree.getroot()
            geometric_info = root.find(
                root.tag.split('}', 1)[0] + "}Geometric_Info")
            tile_geocoding = geometric_info.find("Tile_Geocoding")
            wld_files = []
            prj_files = []
            for jp2_file in jp2_files_paths:
                wld_name = os.path.splitext(jp2_file)[0]
                gdalinfo_cmd = "gdalinfo {} > {}".format(
                    jp2_file, wld_name + ".prj")
                gdalinfo_BO = BashOperator(
                    task_id="bash_operator_gdalinfo_{}".format(wld_name[-3:]),
                    bash_command=gdalinfo_cmd)
                gdalinfo_BO.execute(context)
                sed_cmd = "sed -i -e '1,4d;29,$d' {}".format(wld_name + ".prj")
                sed_BO = BashOperator(task_id="bash_operator_sed_{}".format(
                    wld_name[-3:]),
                                      bash_command=sed_cmd)
                sed_BO.execute(context)
                prj_files.append(wld_name + ".prj")
                wld_file = open(wld_name + ".wld", "w")
                wld_files.append(wld_name + ".wld")
                for key, value in self.bands_res.items():
                    if wld_name[-3:] in value:
                        element = key
                geo_position = tile_geocoding.find(
                    './/Geoposition[@resolution="{}"]'.format(element))
                wld_file.write(
                    geo_position.find("XDIM").text + "\n" + "0" + "\n" + "0" +
                    "\n")
                wld_file.write(geo_position.find("YDIM").text + "\n")
                wld_file.write(geo_position.find("ULX").text + "\n")
                wld_file.write(geo_position.find("ULY").text + "\n")
            parent_dir = os.path.dirname(jp2_files_paths[0])
            self.custom_archived.append(os.path.dirname(parent_dir))
            log.info(os.path.dirname(parent_dir))
        log.info(self.custom_archived)
        context['task_instance'].xcom_push(key='downloaded_products',
                                           value=self.downloaded_products)
        context['task_instance'].xcom_push(
            key='downloaded_products_with_wldprj',
            value=' '.join(self.custom_archived))
        return self.custom_archived