Exemplo n.º 1
0
        def __iter__(self):
            current_date = self.start_date
            while self.end_date >= current_date:
                with TemporaryFile() as temp_file:
                    self.reader = ScribeReader(
                        self.stream_name,
                        s3_connections=[self.s3_connection],
                        ostream=temp_file
                    )

                    for _ in self.reader.get_for_date(current_date):
                        temp_file.flush()
                        temp_file.seek(0)
                        for line in temp_file:
                            yield line
                        temp_file.seek(0)
                        temp_file.truncate(0)

                    current_date += timedelta(1)
Exemplo n.º 2
0
    class DateRangeContextManager(object):
        def __init__(self, host, stream_name, start_date, end_date, aws_access_key_id, aws_secret_access_key):
            self.stream_name = stream_name
            self.start_date = start_date
            self.end_date = end_date
            s3_host, s3_bucket = get_s3_info(host, stream_name)
            bucket, prefix = _split_bucket_and_prefix(s3_bucket)
            self.s3_connection = ScribeS3(
                s3_host=s3_host,
                s3_bucket=bucket,
                s3_key_prefix=prefix,
                aws_access_key_id=aws_access_key_id,
                aws_secret_access_key=aws_secret_access_key,
            )

        def __enter__(self):
            return self

        def __exit__(self, exc_type, exc_value, traceback):
            pass

        def __iter__(self):
            current_date = self.start_date
            while self.end_date >= current_date:
                with TemporaryFile() as temp_file:
                    self.reader = ScribeReader(
                        self.stream_name,
                        s3_connections=[self.s3_connection],
                        ostream=temp_file
                    )

                    for _ in self.reader.get_for_date(current_date):
                        temp_file.flush()
                        temp_file.seek(0)
                        for line in temp_file:
                            yield line
                        temp_file.seek(0)
                        temp_file.truncate(0)

                    current_date += timedelta(1)