Exemplo n.º 1
0
def get_access_raw(marker=None, end=None, limit=10000, chunk_output=False):
    """
    This iterates over all logs in a container

    Useful when there are greater than 10,000 logs in a container, as most
    swift servers are configured to limit each request to that number.

    """
    c = StandardClient(**settings.swiftly_config)

    processing = True
    while processing:
        result = c.get_container(settings.LOG_CONTAINER,
                                 marker=marker,
                                 end_marker=end,
                                 limit=limit)[-1]

        if result:
            marker = result[-1]['name']
            if chunk_output:
                yield result
            else:
                for obj in result:
                    yield obj
        else:
            processing = False
Exemplo n.º 2
0
def get_log_data(name):
    """ Function to get the log lines for processing

    :param name: (string) Name of the log in the access_raw container
    """
    c = StandardClient(**settings.swiftly_config)
    res = c.get_object(settings.LOG_CONTAINER, name, stream=False)

    gz = gzip.GzipFile(mode='rb', fileobj=io.BytesIO(res[-1]))
    with io.TextIOWrapper(io.BufferedReader(gz)) as f:
        for line in f:
            if line_is_valid(line):
                yield line
from StringIO import StringIO
from swiftly.client import StandardClient
import os
import random
import gzip
import datetime
import cPickle as pickle
import sys

# from custom_bloom import filter_logs, get_logs, CustomBloomFilter
from custom_bloom_filter import filter_logs, get_logs, CustomBloomFilter, SIZE_OF_BLOOMFILTER, NO_OF_HASH_FUNCTION

client = StandardClient(
    auth_url='https://swauth.ord1.swift.racklabs.com/auth/v1.0',
    auth_user='******',
    auth_key='VHZmEKSJm6nNs',
    insecure=True)


def get_objects_by_date_range(start="2017/04/16/10", end="2017/04/16/11"):
    container = client.get_container("access_raw",
                                     marker=start,
                                     end_marker=end,
                                     decode_json=True)
    c = container[2]
    object_list = container[3]

    print "- " * 5
    print "-- Container Stats --"
    print "Total size of access_raw: {}".format(