def main(args): read_client = KronosClient(args.read_url, namespace=args.read_namespace) write_client = KronosClient(args.write_url, namespace=args.write_namespace, blocking=False) start_time = time.time() time_step = timedelta(seconds=args.copy_period_seconds) for stream in args.stream_file: stream = stream.rstrip() print 'Starting stream', stream, time.time() - start_time start = args.start # Keep track of the last ID we read, so we re-run queries from # there. last_read_id = None while start <= args.end: print '...start is', start, time.time() - start_time end = min(args.end, start + time_step) if last_read_id is None: read_stream = read_client.get(stream, start, end) else: read_stream = read_client.get(stream, None, end, start_id=last_read_id) for event in read_stream: if event[ID_FIELD] != last_read_id: last_read_id = event[ID_FIELD] del event[ID_FIELD] write_client.put({stream: [event]}) start += time_step write_client.flush() print 'Completed stream', stream, time.time() - start_time
def _run_query(self, start_time, end_time, unique_id=None): """Executes a Python query string and returns events Acts as a wrapper around exec that injects necessary local variables into the scope of the user-provided query blob. :param start_time: Python datetime to be injected into query :param end_time: Python datetime to be injected into query :param unique_id: An unused flag that allows the scheduler to hash this function uniquely based on its args when it passes through """ # XXX(derek): DEPRECATION WARNING # Use of the implicit Kronos client in pycode queries is deprecated client = KronosClient(self._app.config['KRONOS_URL'], namespace=self._app.config['KRONOS_NAMESPACE'], blocking=False, sleep_block=0.2) locals_dict = { 'kronos_client': client, 'events': [], 'start_time': start_time, 'end_time': end_time, } try: exec self._query in {}, locals_dict # No globals. except: _, exception, tb = sys.exc_info() raise PyCodeError(exception, traceback.format_tb(tb)) # Retrieve the `events` variable as computed by the pycode. events = locals_dict.get('events', []) return events
def main(args): client = KronosClient(args.kronos_url) results = client.get(args.stream, args.start, args.end, namespace=args.namespace) if args.display == 'print': if args.type == 'json': events = [] for event in results: events.append(event) print json.dumps(events) elif args.type == 'one-per-line': for event in results: print event elif args.display == 'csv': writer = csv.DictWriter(sys.stdout, args.fields) if not args.remove_header: writer.writeheader() for event in results: row_values = {} for field in args.fields: field_value = get_property(event, field) row_values[field] = (field_value.encode('utf-8') if isinstance( field_value, unicode) else field_value) writer.writerow(row_values) elif args.display == 'aggregate': aggregates = aggregate_stream(results, AGGREGATORS[args.aggregator], args.field, args.time_bucket_width) print 'Bucket, Aggregate' for bucket, aggregate in aggregates: print '%s, %s' % (datetime.fromtimestamp(bucket), aggregate) else: raise Exception('Invalid display option {}'.format(args.display))
def _run_query(self, start_time, end_time, unique_id=None): """Executes a Python query string and returns events Acts as a wrapper around exec that injects necessary local variables into the scope of the user-provided query blob. :param start_time: Python datetime to be injected into query :param end_time: Python datetime to be injected into query :param unique_id: An unused flag that allows the scheduler to hash this function uniquely based on its args when it passes through """ client = KronosClient(self._app.config['KRONOS_URL'], namespace=self._app.config['KRONOS_NAMESPACE'], blocking=False, sleep_block=0.2) locals_dict = { 'kronos_client': client, 'events': [], 'start_time': start_time, 'end_time': end_time, } try: exec self._query in {}, locals_dict # No globals. except: _, exception, tb = sys.exc_info() raise PyCodeError(exception, traceback.format_tb(tb)) events = sorted(locals_dict.get('events', []), key=lambda event: event['@time']) return events
def streams(): kc = KronosClient(current_app.config['KRONOS_URL'], namespace=current_app.config['KRONOS_NAMESPACE']) kstreams = kc.get_streams(namespace=current_app.config['KRONOS_NAMESPACE']) kstreams = sorted(kstreams) return { 'streams': kstreams, }
def execute_kronos_stream(self, node): from pykronos import KronosClient client = KronosClient(node.host, blocking=True) return client.get(node.stream, node.start_time, node.end_time, namespace=node.namespace)
def main(args): client = KronosClient(args.kronos_url, namespace=args.namespace) unfiltered_streams = [(stream, None, args.user_field) for stream in args.streams] stream_sizes = funnel_analyze(client, unfiltered_streams, args.start, args.end, args.end, {}, None) # TODO(marcua): print something more meaningful here. print stream_sizes
def streams(): client = KronosClient(app.config['KRONOS_URL'], namespace=app.config['KRONOS_NAMESPACE']) kronos_streams = client.get_streams(namespace=app.config['KRONOS_NAMESPACE']) kronos_streams = sorted(kronos_streams) return { 'streams': kronos_streams, }
def setUp(self): self.kronos_client = KronosClient('http://localhost:9191') self.index_path = '1.0/index' self.source_path = '1.0/sources' self.streams_path = '1.0/streams/kronos' self.schema_path = '1.0/streams/kronos/%s' self.query_path = '1.0/query' self.server_url = 'http://localhost:9192/%s' self.executor = None
def get_events(i): client = KronosClient(node._host, blocking=True) start_time = node.start_time + (i * delta) if i == executor.parallelism - 1: end_time = node.end_time else: end_time = start_time + delta - 1 return list( client.get(node.stream, start_time, end_time, namespace=node.namespace))
def get_events(i): from pykronos import KronosClient client = KronosClient(node.host, blocking=True) start_time = node.start_time + (i * delta) if i == self.parallelism - 1: end_time = node.end_time else: end_time = start_time + delta - 1 return list(client.get(node.stream, start_time, end_time, namespace=node.namespace))
def load_test_data(args): donations = ZipFile(StringIO(urllib2.urlopen(DONATIONS_FILE_URL).read())) donations = StringIO(donations.read('%s.csv' % DONATIONS_FILE_NAME)) events = [] rows = csv.DictReader(donations) for row in rows: row[TIMESTAMP_FIELD] = parse(row['contb_receipt_dt']) events.append(row) kc = KronosClient(args.kronos_url) kc.put({'donations': events})
def main(args): client = KronosClient(args.kronos_url) headers = [ 'stream', 'total_events', 'events_per_day', 'events_per_sec', 'payload_total_bytes', 'payload_avg_bytes', 'payload_med_bytes', 'payload_95_bytes', 'payload_99_bytes', 'schema', ] if args.csv: csv_file = open(args.csv, 'w') writer = csv.DictWriter(csv_file, headers) writer.writeheader() else: print '-' * 79 for stream in client.get_streams(): total_events = 0 payloads = [] for event in client.get(stream, args.start, args.end): payloads.append(len(ujson.dumps(event))) total_events += 1 if total_events == 0: indent('%s has no events' % stream, 2) print '-' * 79 continue timeframe_sec = (args.end - args.start).total_seconds() schema = client.infer_schema(stream)['schema'] context = dict( zip(headers, [ stream, total_events, (float(total_events) / timeframe_sec) * 60 * 60 * 24, float(total_events) / timeframe_sec, np.sum(payloads), np.mean(payloads), np.median(payloads), np.percentile(payloads, 95), np.percentile(payloads, 99), schema, ])) if args.csv: writer.writerow(context) else: indent(output % context, 2) print '-' * 79
def main(args): client = KronosClient(args.kronos_url, namespace=args.namespace, blocking=False) increment = timedelta(microseconds=args.microseconds_between_events) event = {'property%s' % (idx): idx for idx in xrange(args.properties_per_event)} start_time = time.time() for idx in xrange(args.num_events): event[TIMESTAMP_FIELD] = args.start + (idx * increment) client.put({args.stream: [event]}) if (idx % args.chunk_size) == 0: print 'Completed', idx, 'events', time.time() - start_time client.flush() client.flush()
def main(args): client = KronosClient(args.kronos_url) if args.fetch_timeout: start = args.start end = args.end limit = None else: start = 0 end = datetime.utcnow() limit = 1000 for stream in client.get_streams(namespace=args.namespace): if not (args.read_latency or args.fetch_timeout): print stream elif check_stream(client, args.namespace, stream, start, end, limit, args.fetch_timeout, args.read_latency): print stream
def infer_schema(stream_name=None): kc = KronosClient(current_app.config['KRONOS_URL'], namespace=current_app.config['KRONOS_NAMESPACE']) schema = kc.infer_schema(stream_name, namespace=current_app.config['KRONOS_NAMESPACE']) return schema
def __init__(self, query, timeframe, bucket_width=None, untrusted_time=None, metis=False): """Initialize QueryCompute :param query: A string of python code to execute as a Jia query. :param timeframe: A timeframe dictionary. It specifies a mode, which can be 'recent' or 'range'. Depending on which mode is selected, some of the other parameters will be unused. The unused parameters come from the frontend for the purposes of storing default/previous values. If the mode is recent, only 'value' and 'scale' are used. If the mode is 'range', only 'from' and 'to' are used. Example timeframe: timeframe = { 'mode': 'recent', 'value': 1, 'scale': 'days', 'from': 'Sat Jun 10 2014 00:00:00', 'to': 'Sun Jun 11 2014 00:00:00', } :param bucket_width: Optional bucket width in seconds :param untrusted_time: Optional untrusted time interval in seconds :param metis: Send `query` to metis for computation """ try: self._app = current_app self._app.config # The above line won't fail, but this one will except RuntimeError: from scheduler import get_app self._app = get_app() self._query = query self._bucket_width = bucket_width self._untrusted_time = untrusted_time self._metis = metis self._start_time, self._end_time = self._get_timeframe_bounds(timeframe, bucket_width) self._cache_client = KronosClient( self._app.config['CACHE_KRONOS_URL'], namespace=self._app.config['CACHE_KRONOS_NAMESPACE'], blocking=False, sleep_block=0.2) # The query is sent through as an unused unique_id argument so that the # QueryCache hash can properly uniquely identify it unique = { 'unique_id': self._query } if self._metis: query_func = self._run_metis elif self._app.config['ALLOW_PYCODE']: query_func = self._run_query else: raise ValueError("`metis` must be `True` if ALLOW_PYCODE is not enabled") if self._bucket_width: bucket_width_timedelta = datetime.timedelta(seconds=bucket_width) self._query_cache = QueryCache(self._cache_client, query_func, bucket_width_timedelta, self._app.config['CACHE_KRONOS_NAMESPACE'], query_function_kwargs=unique)
def setUp(self): self.blocking_client = KronosClient('http://localhost:9191/', blocking=True) self.nonblocking_client = KronosClient('http://localhost:9191/', blocking=False, sleep_block=0.2)
def infer_schema(stream_name=None): client = KronosClient(app.config['KRONOS_URL'], namespace=app.config['KRONOS_NAMESPACE']) schema = client.infer_schema(stream_name, namespace=app.config['KRONOS_NAMESPACE']) return schema
def main(args): client1 = KronosClient(args.kronos_url1, namespace=args.namespace1) client2 = KronosClient(args.kronos_url2, namespace=args.namespace2) if args.streams_file: streams = map( lambda s: (s, s), # Use same stream name for both. filter(lambda s: len(s), open(args.streams_file).read().split('\n'))) else: streams = [(args.stream1, args.stream2)] for stream1_name, stream2_name in streams: if args.num_samples: samples = [] for _ in xrange(args.num_samples): start = random.randint(args.start, args.end - args.sample_interval) samples.append((start, start + args.sample_interval)) else: samples = [(args.start, args.end)] total_stream1 = 0 extra_stream1 = 0 total_stream2 = 0 extra_stream2 = 0 for start, end in samples: stream1 = client1.get(stream1_name, start, end) stream2 = client2.get(stream2_name, start, end) # Sorting of events with the same timestamp may vary across backends, # hence we can't do a simple loop comparison. We need to aggregate all # events with the same timestamp from both streams and then compare the # two sets. stream1_hashes = set() stream2_hashes = set() current_timestamp = None while True: event1 = get_next(stream1) event2 = get_next(stream2) # Are both streams exhausted? if not (event1 or event2): break # Pick the smaller timestamp from the two events. min_timestamp = min(event1.get(TIMESTAMP_FIELD, sys.maxint), event2.get(TIMESTAMP_FIELD, sys.maxint)) if current_timestamp is None: current_timestamp = min_timestamp # If min_timestamp is greater than current_timestamp, then aggregate # stats for current_timestamp and roll over. if min_timestamp > current_timestamp: total_stream1 += len(stream1_hashes) total_stream2 += len(stream2_hashes) extra_stream1 += len(stream1_hashes - stream2_hashes) extra_stream2 += len(stream2_hashes - stream1_hashes) stream1_hashes.clear() stream2_hashes.clear() current_timestamp = min_timestamp if event1: assert event1[TIMESTAMP_FIELD] >= current_timestamp if event1[TIMESTAMP_FIELD] == current_timestamp: del event1[ID_FIELD] stream1_hashes.add( hashlib.sha224(json.dumps( event1, sort_keys=True)).hexdigest()) else: stream1 = push_back(event1, stream1) if event2: assert event2[TIMESTAMP_FIELD] >= current_timestamp if event2[TIMESTAMP_FIELD] == current_timestamp: del event2[ID_FIELD] stream2_hashes.add( hashlib.sha224(json.dumps( event2, sort_keys=True)).hexdigest()) else: stream2 = push_back(event2, stream2) print 'Diff: [%s/%s], [%s/%s]' % (args.namespace1, stream1_name, args.namespace2, stream2_name) print '< total: %d' % total_stream1 print '> total: %d' % total_stream2 print '< extra: %d' % extra_stream1 print '> extra: %d' % extra_stream2 print
def execute(self, node, executor): client = KronosClient(node._host, blocking=True) return client.get(node.stream, node.start_time, node.end_time, namespace=node.namespace)
import time import uuid from kronos.common.time import epoch_time_to_kronos_time from kronos.conf.constants import TIMESTAMP_FIELD from pykronos import KronosClient kronos = KronosClient('http://localhost:9191/', blocking=True) def timeit(desc, func, *args, **kwargs): start = time.time() func(*args, **kwargs) print ' - %s took %ss' % (desc, time.time() - start) def _make_event(t): return { TIMESTAMP_FIELD: epoch_time_to_kronos_time(t), 'property1': str(uuid.uuid4()), 'property2': str(uuid.uuid4()), 'property3': str(uuid.uuid4()) } def insert(stream, n, chunk_size=10000): for _ in xrange(0, n, chunk_size): events = [] for t in xrange(0, chunk_size): events.append(_make_event(t % 1000)) kronos.put({stream: events})
def setUp(self): self.kronos_client = KronosClient('http://localhost:9191') self.index_path = '1.0/index' self.query_path = '1.0/query' self.server_url = 'http://localhost:9192/%s' self.executor = None