Пример #1
0
    def stream_from_sqs(
        self,
        sqs_queue,
        product: Optional[OutputProduct] = None,
        visibility_timeout: int = 300,
        **kw,
    ) -> Iterator[Task]:
        from odc.aws.queue import get_messages, get_queue
        from ._sqs import SQSWorkToken

        product = self._resolve_product(product)

        if isinstance(sqs_queue, str):
            sqs_queue = get_queue(sqs_queue)

        for msg in get_messages(sqs_queue, visibility_timeout=visibility_timeout, **kw):
            # TODO: switch to JSON for SQS message body
            token = SQSWorkToken(msg, visibility_timeout)
            tidx = parse_task(msg.body)
            yield self.load_task(tidx, product, source=token)
Пример #2
0
def cli(queue, to_queue, limit, dryrun):
    """
    Redrives all the messages from the given sqs queue to the destination
    """

    logging.basicConfig(
        level=logging.INFO,
        format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s",
        stream=sys.stdout,
    )

    _log = logging.getLogger(__name__)

    dead_queue = get_queue(queue)
    alive_queue = get_queue(to_queue)

    messages = get_messages(dead_queue)

    count = 0

    count_messages = dead_queue.attributes.get("ApproximateNumberOfMessages")

    if count_messages == 0:
        _log.info("No messages to redrive")
        return

    if not dryrun:
        for message in messages:
            response = alive_queue.send_message(MessageBody=message.body)
            if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
                message.delete()
                count += 1
                if limit and count >= limit:
                    break
            else:
                _log.error(f"Unable to send message {message} to queue")
        _log.info(f"Completed sending {count} messages to the queue")
    else:
        _log.warning(
            f"DRYRUN enabled, would have pushed approx {count_messages} messages to the queue"
        )
Пример #3
0
    def get_tasks_from_queue(self, queue, limit, queue_timeout):
        """Retrieve messages from the named queue, returning an iterable of (AlchemistTasks, SQS Messages)"""
        alive_queue = get_queue(queue)
        messages = get_messages(alive_queue,
                                limit,
                                visibility_timeout=queue_timeout)

        for message in messages:
            message_body = json.loads(message.body)
            uuid = message_body.get("id", None)
            if uuid is None:
                # This is probably a message created from an SNS, so it's double
                # JSON dumped
                message_body = json.loads(message_body["Message"])
            transform = message_body.get("transform", None)

            if transform and transform != self.transform_name:
                _LOG.error(
                    f"Your transform doesn't match the transform in the message. Ignoring {uuid}"
                )
                continue
            task = self.generate_task_by_uuid(message_body["id"])
            if task:
                yield task, message
Пример #4
0
def queue_to_odc(
    queue,
    dc: Datacube,
    products: list,
    record_path=None,
    transform=None,
    limit=None,
    update=False,
    archive=False,
    allow_unsafe=False,
    odc_metadata_link=False,
    region_code_list_uri=None,
    **kwargs,
) -> Tuple[int, int]:

    ds_success = 0
    ds_failed = 0

    region_codes = None
    if region_code_list_uri:
        try:
            region_codes = set(
                pd.read_csv(region_code_list_uri).values.ravel())
        except FileNotFoundError as e:
            logging.error(f"Could not find region_code file with error: {e}")
        if len(region_codes) == 0:
            raise SQStoDCException(
                f"Region code list is empty, please check the list at: {region_code_list_uri}"
            )

    doc2ds = Doc2Dataset(dc.index, products=products, **kwargs)

    # This is a generator of messages
    messages = get_messages(queue, limit)

    for message in messages:
        try:
            # Extract metadata from message
            metadata = extract_metadata_from_message(message)
            if archive:
                # Archive metadata
                do_archiving(metadata, dc)
            else:
                if not record_path:
                    # Extract metadata and URI from a STAC or similar
                    # json structure for indexing
                    metadata, uri = handle_json_message(
                        metadata, transform, odc_metadata_link)
                else:
                    # Extract metadata from an S3 bucket notification
                    # or similar for indexing
                    metadata, uri = handle_bucket_notification_message(
                        metadata, record_path)

                # If we have a region_code filter, do it here
                if region_code_list_uri:
                    region_code = dicttoolz.get_in(
                        ["properties", "odc:region_code"], metadata)
                    if region_code not in region_codes:
                        # We  don't want to keep this one, so delete the message
                        message.delete()
                        # And fail it...
                        raise SQStoDCException(
                            f"Region code {region_code} not in list of allowed region codes, ignoring this dataset."
                        )

            # Index the dataset
            do_index_update_dataset(metadata, uri, dc, doc2ds, update,
                                    allow_unsafe)
            ds_success += 1
            # Success, so delete the message.
            message.delete()
        except SQStoDCException as err:
            logging.error(err)
            ds_failed += 1

    return ds_success, ds_failed