Exemplo n.º 1
0
 def infer_schema(self, stream, namespace=None):
   response = self.http_client.post(
     self.infer_schema_path,
     data=marshal.dumps({'stream': stream, 'namespace': namespace}),
     buffered=True)
   self.assertEqual(response.status_code, 200)
   return marshal.loads(response.data)
Exemplo n.º 2
0
def infer_schema(namespace, stream):
    now = epoch_time_to_kronos_time(time.time())
    backend, configuration = router.backend_to_retrieve(namespace, stream)
    events = backend.retrieve(namespace, stream, 0, now, None, configuration, order=ResultOrder.DESCENDING, limit=100)
    schema_type = NullType()
    for event in events:
        schema_type = schema_type.combine(get_schema_type(marshal.loads(event)))
    schema = schema_type.to_dict()
    schema["$schema"] = "http://json-schema.org/draft-04/schema"
    return schema
Exemplo n.º 3
0
 def delete(self, stream, start_time, end_time, start_id=None, namespace=None):
   data = {'stream': stream, 'end_time': end_time}
   if start_id:
     data['start_id'] = start_id
   else:
     data['start_time'] = start_time
   if namespace is not None:
     data['namespace'] = namespace
   response = self.http_client.post(path=self.delete_path,
                                    data=marshal.dumps(data),
                                    buffered=True)
   self.assertEqual(response.status_code, 200)
   response = marshal.loads(response.data)
   self.assertTrue(response[SUCCESS_FIELD])
   return response
Exemplo n.º 4
0
 def delete(self, stream, start_time, end_time, start_id=None, namespace=None):
   data = {'stream': stream, 'end_time': end_time}
   if start_id:
     data['start_id'] = start_id
   else:
     data['start_time'] = start_time
   if namespace is not None:
     data['namespace'] = namespace
   response = self.http_client.post(path=self.delete_path,
                                    data=marshal.dumps(data),
                                    buffered=True)
   self.assertEqual(response.status_code, 200)
   response = marshal.loads(response.data)
   self.assertTrue(response[SUCCESS_FIELD])
   return response
Exemplo n.º 5
0
 def put(self, stream_or_mapping, events=None, namespace=None):
   data = {}
   if isinstance(stream_or_mapping, dict):
     data['events'] = stream_or_mapping
   else:
     self.assertTrue(events is not None)
     data['events'] = {stream_or_mapping: events}
   if namespace is not None:
     data['namespace'] = namespace
   response = self.http_client.post(path=self.put_path,
                                    data=marshal.dumps(data),
                                    buffered=True)
   self.assertEqual(response.status_code, 200)
   response = marshal.loads(response.data)
   self.assertTrue(response[SUCCESS_FIELD])
   return response
Exemplo n.º 6
0
 def put(self, stream_or_mapping, events=None, namespace=None):
   data = {}
   if isinstance(stream_or_mapping, dict):
     data['events'] = stream_or_mapping
   else:
     self.assertTrue(events is not None)
     data['events'] = {stream_or_mapping: events}
   if namespace is not None:
     data['namespace'] = namespace
   response = self.http_client.post(path=self.put_path,
                                    data=marshal.dumps(data),
                                    buffered=True)
   self.assertEqual(response.status_code, 200)
   response = marshal.loads(response.data)
   self.assertTrue(response[SUCCESS_FIELD])
   return response
Exemplo n.º 7
0
    def test_stream_splitting(self):
        """ Inserts events that fall into multiple time buckets and ensures that
    each time bucket has the right number of events at the end and all the
    events in each bucket fall into its time range. """

        stream_name = "TestCassandraBackend_test_stream_splitting"
        stream = self.namespace.get_stream(stream_name, self.width, self.shards)
        settings.storage.cassandra.timewidth_seconds = 2
        router.reload()

        # Each bucket interval has width of 2 seconds, so this should distribute
        # events in 5 buckets: [0, 2), [2, 4), [4, 6), [6, 8), [8, 10).
        for i in xrange(100):
            self.put(stream_name, [{TIMESTAMP_FIELD: epoch_time_to_kronos_time(i % 10)}])
        events = self.get(stream_name, 0, epoch_time_to_kronos_time(10))
        self.assertEqual(len(events), 100)

        bucket_to_events = defaultdict(list)
        for start_time in (0, 2, 4, 6, 8):
            # Fetch events directly from each bucket.
            for shard in xrange(self.shards):
                stream_shard = StreamShard(
                    stream.namespace,
                    stream_name,
                    epoch_time_to_kronos_time(start_time),
                    self.width,
                    shard,
                    False,
                    MAX_LIMIT,
                    100,
                )
                events = stream_shard.iterator(
                    uuid_from_time(start_time, UUIDType.LOWEST), uuid_from_time(start_time + self.width_seconds)
                )
                bucket_to_events[start_time].extend(marshal.loads(event.json) for event in events)

        num_events = 0
        for start_time, events in bucket_to_events.iteritems():
            # Each bucket should have 20 events and they must fall in the bucket's
            # time range.
            self.assertEqual(len(events), 20)
            for event in events:
                time = kronos_time_to_epoch_time(event[TIMESTAMP_FIELD])
                self.assertTrue(time >= start_time)
                self.assertTrue(time < start_time + self.width_seconds)
            num_events += len(events)
        self.assertEqual(num_events, 100)
Exemplo n.º 8
0
def infer_schema(namespace, stream):
    now = epoch_time_to_kronos_time(time.time())
    backend, configuration = router.backend_to_retrieve(namespace, stream)
    events = backend.retrieve(namespace,
                              stream,
                              0,
                              now,
                              None,
                              configuration,
                              order=ResultOrder.DESCENDING,
                              limit=100)
    schema_type = NullType()
    for event in events:
        schema_type = schema_type.combine(get_schema_type(
            marshal.loads(event)))
    schema = schema_type.to_dict()
    schema['$schema'] = 'http://json-schema.org/draft-04/schema'
    return schema
Exemplo n.º 9
0
  def test_stream_splitting(self):
    ''' Inserts events that fall into multiple time buckets and ensures that
    each time bucket has the right number of events at the end and all the
    events in each bucket fall into its time range. '''

    stream_name = 'TestCassandraBackend_test_stream_splitting'
    stream = self.namespace.get_stream(stream_name, self.width, self.shards)
    settings.storage.cassandra.timewidth_seconds = 2
    router.reload()

    # Each bucket interval has width of 2 seconds, so this should distribute
    # events in 5 buckets: [0, 2), [2, 4), [4, 6), [6, 8), [8, 10).
    for i in xrange(100):
      self.put(stream_name, [{TIMESTAMP_FIELD:
                              epoch_time_to_kronos_time(i % 10)}])
    events = self.get(stream_name, 0, epoch_time_to_kronos_time(10))
    self.assertEqual(len(events), 100)

    bucket_to_events = defaultdict(list)
    for start_time in (0, 2, 4, 6, 8):
      # Fetch events directly from each bucket.
      for shard in xrange(self.shards):
        stream_shard = StreamShard(stream.namespace, stream_name,
                                   epoch_time_to_kronos_time(start_time),
                                   self.width, shard, False,
                                   MAX_LIMIT, 100)
        events = stream_shard.iterator(
          uuid_from_time(start_time, UUIDType.LOWEST),
          uuid_from_time(start_time + self.width_seconds))
        bucket_to_events[start_time].extend(marshal.loads(event.json)
                                            for event in events)

    num_events = 0
    for start_time, events in bucket_to_events.iteritems():
      # Each bucket should have 20 events and they must fall in the bucket's
      # time range.
      self.assertEqual(len(events), 20)
      for event in events:
        time = kronos_time_to_epoch_time(event[TIMESTAMP_FIELD])
        self.assertTrue(time >= start_time)
        self.assertTrue(time < start_time + self.width_seconds)
      num_events += len(events)
    self.assertEqual(num_events, 100)
Exemplo n.º 10
0
    def wrapper(environment, start_response):
      try:
        start_time = time.time()

        if function.func_name not in (_serving_mode_endpoints
                                      [settings.serving_mode]):
          start_response('403 Forbidden',
                         [('Content-Type', 'application/json')])
          return marshal.dumps({
            ERRORS_FIELD: ['kronosd is configured to block access to this '
                           'endpoint.'],
            SUCCESS_FIELD: False,
            TOOK_FIELD: '%fms' % (1000 * (time.time() - start_time))
          })
        req_method = environment['REQUEST_METHOD']

        # If the request method is not allowed, return 405.
        if req_method not in methods:
          start_response('405 Method Not Allowed',
                         [('Allow', ', '.join(methods)),
                          ('Content-Type', 'application/json')])
          return marshal.dumps({
            ERRORS_FIELD: ['%s method not allowed' % req_method],
            SUCCESS_FIELD: False,
            TOOK_FIELD: '%fms' % (1000 * (time.time() - start_time))
          })

        headers = []
        remote_origin = environment.get('HTTP_ORIGIN')

        if req_method == 'OPTIONS':
          # This is a CORS preflight request so check that the remote domain is
          # allowed and respond with appropriate CORS headers.
          # http://www.html5rocks.com/static/images/cors_server_flowchart.png
          if is_remote_allowed(remote_origin):
            headers.extend([
              ('Access-Control-Allow-Origin', remote_origin),
              ('Access-Control-Allow-Credentials', 'true'),
              ('Access-Control-Allow-Headers', ', '.join(
                ('Accept', 'Content-Type', 'Origin', 'X-Requested-With'))),
              ('Access-Control-Allow-Methods', ', '.join(methods))
            ])
          # We just tell the client that CORS is ok. Client will follow up
          # with another request to get the answer.
          start_response('200 OK', headers)
          return ''

        # All POST bodies must be json, so decode it here.
        if req_method == 'POST':
          try:
            environment['json'] = marshal.loads(environment['wsgi.input']
                                                .read())
          except ValueError:
            start_response('400 Bad Request',
                           [('Content-Type', 'application/json')])
            return marshal.dumps({
              ERRORS_FIELD: ['Request body must be valid JSON.'],
              SUCCESS_FIELD: False,
              TOOK_FIELD: '%fms' % (1000 * (time.time() - start_time))
            })

        # All responses are JSON.
        headers.append(('Content-Type', 'application/json'))

        if remote_origin:
          headers.append(('Access-Control-Allow-Origin', remote_origin))

        response = function(environment, start_response, headers)
        if not isinstance(response, types.GeneratorType):
          response[TOOK_FIELD] = '%fms' % (1000 * (time.time() - start_time))
          response = marshal.dumps(response)
        return response
      except Exception, e:
        log.exception('endpoint: uncaught exception!')
        start_response('400 Bad Request',
                       [('Content-Type', 'application/json')])
        return marshal.dumps({
          ERRORS_FIELD: [repr(e)],
          SUCCESS_FIELD: False,
          TOOK_FIELD: '%fms' % (1000 * (time.time() - start_time))
        })
Exemplo n.º 11
0
 def index(self):
   response = self.http_client.get(path=self.index_path)
   self.assertEqual(response.status_code, 200)
   return marshal.loads(response.data)
Exemplo n.º 12
0
        def wrapper(environment, start_response):
            try:
                start_time = time.time()

                if function.func_name not in (
                        _serving_mode_endpoints[settings.serving_mode]):
                    start_response('403 Forbidden',
                                   [('Content-Type', 'application/json')])
                    return marshal.dumps({
                        ERRORS_FIELD: [
                            'kronosd is configured to block access to this '
                            'endpoint.'
                        ],
                        SUCCESS_FIELD:
                        False,
                        TOOK_FIELD:
                        '%fms' % (1000 * (time.time() - start_time))
                    })
                req_method = environment['REQUEST_METHOD']

                # If the request method is not allowed, return 405.
                if req_method not in methods:
                    start_response('405 Method Not Allowed',
                                   [('Allow', ', '.join(methods)),
                                    ('Content-Type', 'application/json')])
                    return marshal.dumps({
                        ERRORS_FIELD: ['%s method not allowed' % req_method],
                        SUCCESS_FIELD:
                        False,
                        TOOK_FIELD:
                        '%fms' % (1000 * (time.time() - start_time))
                    })

                headers = []
                remote_origin = environment.get('HTTP_ORIGIN')

                if req_method == 'OPTIONS':
                    # This is a CORS preflight request so check that the remote domain is
                    # allowed and respond with appropriate CORS headers.
                    # http://www.html5rocks.com/static/images/cors_server_flowchart.png
                    if is_remote_allowed(remote_origin):
                        headers.extend([
                            ('Access-Control-Allow-Origin', remote_origin),
                            ('Access-Control-Allow-Credentials', 'true'),
                            ('Access-Control-Allow-Headers', ', '.join(
                                ('Accept', 'Content-Type', 'Origin',
                                 'X-Requested-With'))),
                            ('Access-Control-Allow-Methods',
                             ', '.join(methods))
                        ])
                    # We just tell the client that CORS is ok. Client will follow up
                    # with another request to get the answer.
                    start_response('200 OK', headers)
                    return ''

                # All POST bodies must be json, so decode it here.
                if req_method == 'POST':
                    try:
                        environment['json'] = marshal.loads(
                            environment['wsgi.input'].read())
                    except ValueError:
                        start_response('400 Bad Request',
                                       [('Content-Type', 'application/json')])
                        return marshal.dumps({
                            ERRORS_FIELD: ['Request body must be valid JSON.'],
                            SUCCESS_FIELD:
                            False,
                            TOOK_FIELD:
                            '%fms' % (1000 * (time.time() - start_time))
                        })

                # All responses are JSON.
                headers.append(('Content-Type', 'application/json'))

                if remote_origin:
                    headers.append(
                        ('Access-Control-Allow-Origin', remote_origin))

                response = function(environment, start_response, headers)
                if not isinstance(response, types.GeneratorType):
                    response[TOOK_FIELD] = '%fms' % (
                        1000 * (time.time() - start_time))
                    response = marshal.dumps(response)
                return response
            except Exception, e:
                log.exception('endpoint: uncaught exception!')
                start_response('400 Bad Request',
                               [('Content-Type', 'application/json')])
                return marshal.dumps({
                    ERRORS_FIELD: [repr(e)],
                    SUCCESS_FIELD:
                    False,
                    TOOK_FIELD:
                    '%fms' % (1000 * (time.time() - start_time))
                })
Exemplo n.º 13
0
 def index(self):
   response = self.http_client.get(path=self.index_path)
   self.assertEqual(response.status_code, 200)
   return marshal.loads(response.data)