def get_access_raw(marker=None, end=None, limit=10000, chunk_output=False): """ This iterates over all logs in a container Useful when there are greater than 10,000 logs in a container, as most swift servers are configured to limit each request to that number. """ c = StandardClient(**settings.swiftly_config) processing = True while processing: result = c.get_container(settings.LOG_CONTAINER, marker=marker, end_marker=end, limit=limit)[-1] if result: marker = result[-1]['name'] if chunk_output: yield result else: for obj in result: yield obj else: processing = False
def get_log_data(name): """ Function to get the log lines for processing :param name: (string) Name of the log in the access_raw container """ c = StandardClient(**settings.swiftly_config) res = c.get_object(settings.LOG_CONTAINER, name, stream=False) gz = gzip.GzipFile(mode='rb', fileobj=io.BytesIO(res[-1])) with io.TextIOWrapper(io.BufferedReader(gz)) as f: for line in f: if line_is_valid(line): yield line
from StringIO import StringIO from swiftly.client import StandardClient import os import random import gzip import datetime import cPickle as pickle import sys # from custom_bloom import filter_logs, get_logs, CustomBloomFilter from custom_bloom_filter import filter_logs, get_logs, CustomBloomFilter, SIZE_OF_BLOOMFILTER, NO_OF_HASH_FUNCTION client = StandardClient( auth_url='https://swauth.ord1.swift.racklabs.com/auth/v1.0', auth_user='******', auth_key='VHZmEKSJm6nNs', insecure=True) def get_objects_by_date_range(start="2017/04/16/10", end="2017/04/16/11"): container = client.get_container("access_raw", marker=start, end_marker=end, decode_json=True) c = container[2] object_list = container[3] print "- " * 5 print "-- Container Stats --" print "Total size of access_raw: {}".format(