Exemplo n.º 1
0
    def _create_data_stream(self, data):
        """
        Creates the data stream in case of object processing
        """
        extra_get_args = {}

        if 'url' in data:
            url = data['url']
            logger.info('Getting dataset from {}'.format(url.path))
            if url.data_byte_range is not None:
                range_str = 'bytes={}-{}'.format(*url.data_byte_range)
                extra_get_args['Range'] = range_str
                logger.info('Chunk: {} - Range: {}'.format(url.part, extra_get_args['Range']))
            resp = requests.get(url.path, headers=extra_get_args, stream=True)
            url.data_stream = resp.raw

        if 'obj' in data:
            obj = data['obj']
            obj.storage_backend
            storage_handler = Storage(self.pywren_config, obj.storage_backend).get_storage_handler()
            logger.info('Getting dataset from {}://{}/{}'.format(obj.storage_backend, obj.bucket, obj.key))
            if obj.data_byte_range is not None:
                extra_get_args['Range'] = 'bytes={}-{}'.format(*obj.data_byte_range)
                logger.info('Chunk: {} - Range: {}'.format(obj.part, extra_get_args['Range']))
                sb = storage_handler.get_object(obj.bucket, obj.key, stream=True, extra_get_args=extra_get_args)
                obj.data_stream = WrappedStreamingBodyPartition(sb, obj.chunk_size, obj.data_byte_range)
            else:
                obj.data_stream = storage_handler.get_object(obj.bucket, obj.key, stream=True)
Exemplo n.º 2
0
    def _load_object(self, data):
        """
        Loads the object in /tmp in case of object processing
        """
        extra_get_args = {}

        if 'url' in data:
            url = data['url']
            logger.info('Getting dataset from {}'.format(url.path))
            if url.data_byte_range is not None:
                range_str = 'bytes={}-{}'.format(*url.data_byte_range)
                extra_get_args['Range'] = range_str
                logger.info('Chunk: {} - Range: {}'.format(
                    url.part, extra_get_args['Range']))
            resp = requests.get(url.path, headers=extra_get_args, stream=True)
            url.data_stream = resp.raw

        if 'obj' in data:
            obj = data['obj']
            logger.info('Getting dataset from {}://{}/{}'.format(
                obj.backend, obj.bucket, obj.key))

            if obj.backend == self.internal_storage.backend:
                storage = self.internal_storage.storage
            else:
                storage = Storage(pywren_config=self.pywren_config,
                                  storage_backend=obj.backend)

            if obj.data_byte_range is not None:
                extra_get_args['Range'] = 'bytes={}-{}'.format(
                    *obj.data_byte_range)
                logger.info('Chunk: {} - Range: {}'.format(
                    obj.part, extra_get_args['Range']))
                sb = storage.get_object(obj.bucket,
                                        obj.key,
                                        stream=True,
                                        extra_get_args=extra_get_args)
                wsb = WrappedStreamingBodyPartition(sb, obj.chunk_size,
                                                    obj.data_byte_range)
                obj.data_stream = wsb
            else:
                sb = storage.get_object(obj.bucket,
                                        obj.key,
                                        stream=True,
                                        extra_get_args=extra_get_args)
                obj.data_stream = sb