def enqueue(self, items, batch=None, pipe=None): """ Put items into the queue. The items will be pushed into Redis as part of a single (given) pipe in batches corresponding to the given batch argument. """ if batch is None: batch = self.batch if batch == 0: batch = len(items) if self.json: # simplejson.dumps returns Unicode strings items = [simplejson.dumps(item, encoding='utf-8').encode('utf-8') for item in items] if self.compress: items = [util.encode_gzip(item, encoding=None) for item in items] if pipe is not None: self._push(pipe, items, batch) else: with redis_pipeline(self.redis_client) as pipe: self._push(pipe, items, batch)
def send(self, queue_items): # ignore metadata reports = [item["report"] for item in queue_items] headers = { "Content-Encoding": "gzip", "Content-Type": "application/json", "User-Agent": "ichnaea", } response = requests.post( self.config.url, data=util.encode_gzip(json.dumps({ "items": reports }).encode(), compresslevel=5), headers=headers, timeout=60.0, ) # log upload_status and trigger exception for bad responses # this causes the task to be re-tried METRICS.incr( "data.export.upload", tags=self.stats_tags + ["status:%s" % response.status_code], ) response.raise_for_status()
def test_truncated_gzip(self, app, celery, raven): headers = {"Content-Encoding": "gzip"} body = util.encode_gzip(b'{"items": []}')[:-2] app.post( self.url, body, headers=headers, content_type="application/json", status=400 ) assert self.queue(celery).size() == 0
def test_gzip(self): cell, query = self._one_cell_query() data = {"items": [query]} body = util.encode_gzip(dumps(data)) headers = {"Content-Encoding": "gzip"} self.app.post(self.url, body, headers=headers, content_type="application/json", status=self.status) self._assert_queue_size(1)
def send(self, url, data): stats_client = self.stats_client stats_prefix = 'items.export.%s.' % self.export_name headers = { 'Content-Encoding': 'gzip', 'Content-Type': 'application/json', 'User-Agent': 'ichnaea', } with stats_client.timer(stats_prefix + 'upload'): response = requests.post( url, data=encode_gzip(data), headers=headers, timeout=60.0, verify=False, # TODO switch this back on ) # log upload_status and trigger exception for bad responses # this causes the task to be re-tried response_code = response.status_code stats_client.incr('%supload_status.%s' % (stats_prefix, response_code)) response.raise_for_status() # only log successful uploads stats_client.incr(stats_prefix + 'batches') return True
def enqueue(self, items, batch=None, pipe=None): """ Put items into the queue. The items will be pushed into Redis as part of a single (given) pipe in batches corresponding to the given batch argument. """ if batch is None: batch = self.batch if batch == 0: batch = len(items) if self.json: # simplejson.dumps returns Unicode strings items = [ simplejson.dumps(item, encoding='utf-8').encode('utf-8') for item in items ] if self.compress: items = [util.encode_gzip(item, encoding=None) for item in items] if pipe is not None: self._push(pipe, items, batch) else: with redis_pipeline(self.redis_client) as pipe: self._push(pipe, items, batch)
def send(self, url, data): year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again api_key = self.queue_key queue_prefix = self.export_queue.queue_prefix if self.queue_key.startswith(queue_prefix): api_key = self.queue_key[len(queue_prefix):] key_name = self.path.format( api_key=api_key, year=year, month=month, day=day) key_name += uuid.uuid1().hex + '.json.gz' try: with self.stats_client.timed(self.stats_prefix + 'upload', tags=self.stats_tags): conn = boto.connect_s3() bucket = conn.get_bucket(self.bucket) with closing(boto.s3.key.Key(bucket)) as key: key.key = key_name key.content_encoding = 'gzip' key.content_type = 'application/json' key.set_contents_from_string( util.encode_gzip(data, compresslevel=7)) self.stats_client.incr( self.stats_prefix + 'upload', tags=self.stats_tags + ['status:success']) except Exception: # pragma: no cover self.raven_client.captureException() self.stats_client.incr( self.stats_prefix + 'upload', tags=self.stats_tags + ['status:failure'])
def test_gzip(self, app, celery): data = {'items': [{}]} body = util.encode_gzip(dumps(data)) headers = {'Content-Encoding': 'gzip'} res = self._call(app, body, headers=headers, method='post') assert res.headers['Access-Control-Allow-Origin'] == '*' assert res.headers['Access-Control-Max-Age'] == '2592000' assert self.queue(celery).size() == 0
def test_gzip(self): wifis = WifiShardFactory.build_batch(2) query = self.model_query(wifis=wifis) body = util.encode_gzip(json.dumps(query)) headers = {"Content-Encoding": "gzip"} res = self._call(body=body, headers=headers, method="post", status=self.not_found.code) self.check_response(res, "not_found")
def test_gzip(self, app, data_queues): wifis = WifiShardFactory.build_batch(2) query = self.model_query(wifis=wifis) body = util.encode_gzip(json.dumps(query).encode()) headers = {"Content-Encoding": "gzip"} res = self._call( app, body=body, headers=headers, method="post", status=self.not_found.code ) self.check_response(data_queues, res, "not_found")
def test_gzip(self): data = {'cell': [{'mcc': FRANCE_MCC, 'mnc': 2, 'lac': 3, 'cid': 4}]} body = util.encode_gzip(json.dumps(data)) headers = { 'Content-Encoding': 'gzip', } res = self.app.post('/v1/search?key=test', body, headers=headers, content_type='application/json', status=200) self.assertEqual(res.content_type, 'application/json') self.assertEqual(res.json, {'status': 'not_found'})
def test_gzip(self, app, data_queues, logs): """A gzip-encoded body is uncompressed first.""" wifis = WifiShardFactory.build_batch(2) query = self.model_query(wifis=wifis) body = util.encode_gzip(json.dumps(query).encode()) headers = {"Content-Encoding": "gzip"} res = self._call(app, body=body, headers=headers, method="post", status=404) self.check_response(data_queues, res, "not_found") assert logs.only_entry["wifi_valid"] == 2
def test_gzip(self): cell, query = self._one_cell_query() data = {'items': [query]} body = util.encode_gzip(dumps(data)) headers = {'Content-Encoding': 'gzip'} res = self.app.post( self.url, body, headers=headers, content_type='application/json', status=self.status) self.assertEqual(res.headers['Access-Control-Allow-Origin'], '*') self.assertEqual(res.headers['Access-Control-Max-Age'], '2592000') self._assert_queue_size(1)
def test_gzip(self, app, celery): cell, query = self._one_cell_query() data = {'items': [query]} body = util.encode_gzip(dumps(data)) headers = {'Content-Encoding': 'gzip'} res = app.post( self.url, body, headers=headers, content_type='application/json', status=self.status) assert res.headers['Access-Control-Allow-Origin'] == '*' assert res.headers['Access-Control-Max-Age'] == '2592000' assert self.queue(celery).size() == 1
def test_gzip(self): wifis = WifiShardFactory.build_batch(2) query = self.model_query(wifis=wifis) body = util.encode_gzip(json.dumps(query)) headers = { 'Content-Encoding': 'gzip', } res = self._call(body=body, headers=headers, method='post', status=self.not_found.code) self.check_response(res, 'not_found')
def test_gzip(self, app, data_queues): wifis = WifiShardFactory.build_batch(2) query = self.model_query(wifis=wifis) body = util.encode_gzip(json.dumps(query)) headers = { 'Content-Encoding': 'gzip', } res = self._call(app, body=body, headers=headers, method='post', status=self.not_found.code) self.check_response(data_queues, res, 'not_found')
def test_gzip(self): app = self.app data = {"cell": [{"mcc": FRANCE_MCC, "mnc": 2, "lac": 3, "cid": 4}]} body = util.encode_gzip(dumps(data)) headers = { 'Content-Encoding': 'gzip', } res = app.post('/v1/search?key=test', body, headers=headers, content_type='application/json', status=200) self.assertEqual(res.content_type, 'application/json') self.assertEqual(res.json, {"status": "not_found"})
def send(self, url, data): headers = {"Content-Encoding": "gzip", "Content-Type": "application/json", "User-Agent": "ichnaea"} with self.stats_client.timed(self.stats_prefix + "upload", tags=self.stats_tags): response = requests.post(url, data=util.encode_gzip(data, compresslevel=5), headers=headers, timeout=60.0) # log upload_status and trigger exception for bad responses # this causes the task to be re-tried self.stats_client.incr( self.stats_prefix + "upload", tags=self.stats_tags + ["status:%s" % response.status_code] ) response.raise_for_status()
def test_gzip(self): cell = CellFactory.build() query = self.model_query(cells=[cell]) body = util.encode_gzip(json.dumps(query)) headers = { 'Content-Encoding': 'gzip', } res = self._call(body=body, headers=headers, method='post', status=self.not_found.code) self.check_response(res, 'not_found')
def test_gzip(self): app = self.app data = {"items": [{"lat": 1.0, "lon": 2.0, "wifi": [{"key": "aaaaaaaaaaaa"}]}]} body = util.encode_gzip(dumps(data)) headers = { 'Content-Encoding': 'gzip', } res = app.post('/v1/submit?key=test', body, headers=headers, content_type='application/json', status=204) self.assertEqual(res.body, '')
def test_truncated_gzip(self, app, data_queues): wifis = WifiShardFactory.build_batch(2) query = self.model_query(wifis=wifis) body = util.encode_gzip(json.dumps(query).encode())[:-2] headers = {"Content-Encoding": "gzip"} res = self._call(app, body=body, headers=headers, method="post", status=400) detail = ( "GZIPDecodeError(\"EOFError('Compressed file ended before the" " end-of-stream marker was reached')\")" ) self.check_response(data_queues, res, "parse_error", details={"decode": detail})
def test_gzip(self): cell, query = self._one_cell_query() data = {'items': [query]} body = util.encode_gzip(dumps(data)) headers = {'Content-Encoding': 'gzip'} res = self.app.post(self.url, body, headers=headers, content_type='application/json', status=self.status) self.assertEqual(res.headers['Access-Control-Allow-Origin'], '*') self.assertEqual(res.headers['Access-Control-Max-Age'], '2592000') self.assertEqual(self.queue.size(), 1)
def test_gzip(self, app, celery): cell, query = self._one_cell_query() data = {"items": [query]} body = util.encode_gzip(dumps(data).encode()) headers = {"Content-Encoding": "gzip"} res = app.post( self.url, body, headers=headers, content_type="application/json", status=self.status, ) assert res.headers["Access-Control-Allow-Origin"] == "*" assert res.headers["Access-Control-Max-Age"] == "2592000" assert self.queue(celery).size() == 1
def send(self, queue_items): # ignore metadata reports = [item['report'] for item in queue_items] _, bucketname, path = urlparse(self.config.url)[:3] # s3 key names start without a leading slash path = path.lstrip('/') if not path.endswith('/'): path += '/' year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again parts = self.queue_key.split(':') source = parts[1] api_key = parts[2] obj_name = path.format(source=source, api_key=api_key, year=year, month=month, day=day) obj_name += uuid.uuid1().hex + '.json.gz' try: data = util.encode_gzip(simplejson.dumps({'items': reports}), compresslevel=7) s3 = boto3.resource('s3') bucket = s3.Bucket(bucketname) obj = bucket.Object(obj_name) obj.put( Body=data, ContentEncoding='gzip', ContentType='application/json', ) self.task.stats_client.incr('data.export.upload', tags=self.stats_tags + ['status:success']) except Exception: # pragma: no cover self.task.stats_client.incr('data.export.upload', tags=self.stats_tags + ['status:failure']) raise
def send(self, queue_items): # ignore metadata reports = [item['report'] for item in queue_items] _, bucket, path = urlparse(self.config.url)[:3] # s3 key names start without a leading slash path = path.lstrip('/') if not path.endswith('/'): path += '/' year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again parts = self.queue_key.split(':') if len(parts) == 3: source = parts[1] api_key = parts[2] else: # pragma: no cover # BBB source = 'gnss' api_key = parts[-1] key_name = path.format( source=source, api_key=api_key, year=year, month=month, day=day) key_name += uuid.uuid1().hex + '.json.gz' try: conn = boto.connect_s3() bucket = conn.get_bucket(bucket, validate=False) with closing(boto.s3.key.Key(bucket)) as key: key.key = key_name key.content_encoding = 'gzip' key.content_type = 'application/json' key.set_contents_from_string( util.encode_gzip(simplejson.dumps({'items': reports}), compresslevel=7)) self.task.stats_client.incr( 'data.export.upload', tags=self.stats_tags + ['status:success']) except Exception: # pragma: no cover self.task.stats_client.incr( 'data.export.upload', tags=self.stats_tags + ['status:failure']) raise
def send(self, queue_items): # ignore metadata reports = [item["report"] for item in queue_items] _, bucketname, path = urlparse(self.config.url)[:3] # s3 key names start without a leading slash path = path.lstrip("/") if not path.endswith("/"): path += "/" year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again parts = self.queue_key.split(":") source = parts[1] api_key = parts[2] obj_name = path.format(source=source, api_key=api_key, year=year, month=month, day=day) obj_name += uuid.uuid1().hex + ".json.gz" try: data = util.encode_gzip(json.dumps({ "items": reports }).encode(), compresslevel=7) s3 = boto3.resource("s3") bucket = s3.Bucket(bucketname) obj = bucket.Object(obj_name) obj.put(Body=data, ContentEncoding="gzip", ContentType="application/json") METRICS.incr("data.export.upload", tags=self.stats_tags + ["status:success"]) except Exception: METRICS.incr("data.export.upload", tags=self.stats_tags + ["status:failure"]) raise
def send(self, queue_items): # ignore metadata reports = [item['report'] for item in queue_items] _, bucketname, path = urlparse(self.config.url)[:3] # s3 key names start without a leading slash path = path.lstrip('/') if not path.endswith('/'): path += '/' year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again parts = self.queue_key.split(':') source = parts[1] api_key = parts[2] obj_name = path.format( source=source, api_key=api_key, year=year, month=month, day=day) obj_name += uuid.uuid1().hex + '.json.gz' try: data = util.encode_gzip(simplejson.dumps({'items': reports}), compresslevel=7) s3 = boto3.resource('s3') bucket = s3.Bucket(bucketname) obj = bucket.Object(obj_name) obj.put( Body=data, ContentEncoding='gzip', ContentType='application/json', ) self.task.stats_client.incr( 'data.export.upload', tags=self.stats_tags + ['status:success']) except Exception: # pragma: no cover self.task.stats_client.incr( 'data.export.upload', tags=self.stats_tags + ['status:failure']) raise
def test_gzip(self): app = self.app data = { "items": [{ "lat": 1.0, "lon": 2.0, "wifi": [{ "key": "aaaaaaaaaaaa" }] }] } body = util.encode_gzip(dumps(data)) headers = { 'Content-Encoding': 'gzip', } res = app.post('/v1/submit?key=test', body, headers=headers, content_type='application/json', status=204) self.assertEqual(res.body, '')
def send(self, url, data): headers = { 'Content-Encoding': 'gzip', 'Content-Type': 'application/json', 'User-Agent': 'ichnaea', } with self.stats_client.timed(self.stats_prefix + 'upload', tags=self.stats_tags): response = requests.post( url, data=util.encode_gzip(data, compresslevel=5), headers=headers, timeout=60.0, ) # log upload_status and trigger exception for bad responses # this causes the task to be re-tried self.stats_client.incr( self.stats_prefix + 'upload', tags=self.stats_tags + ['status:%s' % response.status_code]) response.raise_for_status()
def send(self, queue_items): # ignore metadata reports = [item['report'] for item in queue_items] _, bucket, path = urlparse(self.export_queue.url)[:3] # s3 key names start without a leading slash path = path.lstrip('/') if not path.endswith('/'): path += '/' year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again api_key = self.queue_key.split(':')[-1] key_name = path.format(api_key=api_key, year=year, month=month, day=day) key_name += uuid.uuid1().hex + '.json.gz' try: conn = boto.connect_s3() bucket = conn.get_bucket(bucket, validate=False) with closing(boto.s3.key.Key(bucket)) as key: key.key = key_name key.content_encoding = 'gzip' key.content_type = 'application/json' key.set_contents_from_string( util.encode_gzip(simplejson.dumps({'items': reports}), compresslevel=7)) self.task.stats_client.incr('data.export.upload', tags=self.stats_tags + ['status:success']) except Exception: # pragma: no cover self.task.stats_client.incr('data.export.upload', tags=self.stats_tags + ['status:failure']) raise
def send(self, queue_items): # ignore metadata reports = [item['report'] for item in queue_items] headers = { 'Content-Encoding': 'gzip', 'Content-Type': 'application/json', 'User-Agent': 'ichnaea', } response = requests.post( self.config.url, data=util.encode_gzip(simplejson.dumps({'items': reports}), compresslevel=5), headers=headers, timeout=60.0, ) # log upload_status and trigger exception for bad responses # this causes the task to be re-tried self.task.stats_client.incr( 'data.export.upload', tags=self.stats_tags + ['status:%s' % response.status_code]) response.raise_for_status()
def send(self, queue_items): # ignore metadata reports = [item['report'] for item in queue_items] headers = { 'Content-Encoding': 'gzip', 'Content-Type': 'application/json', 'User-Agent': 'ichnaea', } response = requests.post( self.config.url, data=util.encode_gzip(simplejson.dumps({'items': reports}), compresslevel=5), headers=headers, timeout=60.0, ) # log upload_status and trigger exception for bad responses # this causes the task to be re-tried self.task.stats_client.incr('data.export.upload', tags=self.stats_tags + ['status:%s' % response.status_code]) response.raise_for_status()
def test_roundtrip_gzip(self): data = util.decode_gzip(util.encode_gzip(b'foo')) assert data == u'foo'
def test_no_encoding(self): data = util.encode_gzip(b'\x00ab', encoding=None) self.assertTrue(isinstance(data, bytes)) result = util.decode_gzip(data, encoding=None) self.assertTrue(isinstance(result, bytes)) self.assertEqual(result, b'\x00ab')
def test_encode_gzip(self): data = util.encode_gzip("foo") assert data[:4] == self.gzip_foo[:4] assert data[-13:] == self.gzip_foo[-13:]
def test_roundtrip_gzip(self): data = util.decode_gzip(util.encode_gzip(b"foo")) assert data == b"foo"
def test_encode_gzip(self): data = util.encode_gzip(b"foo") # Test around the 4-byte timestamp assert data[:4] == self.gzip_foo[:4] assert data[8:] == self.gzip_foo[8:]
def test_roundtrip_gzip(self): data = util.decode_gzip(util.encode_gzip(b'foo')) self.assertEqual(data, u'foo')
def test_no_encoding(self): data = util.encode_gzip(b'\x00ab', encoding=None) assert isinstance(data, bytes) result = util.decode_gzip(data, encoding=None) assert isinstance(result, bytes) assert result == b'\x00ab'
def test_encode_gzip_bytes(self): data = util.encode_gzip(b'foo') assert data[:4] == self.gzip_foo[:4] assert data[-13:] == self.gzip_foo[-13:]
def test_encode_gzip_bytes(self): data = util.encode_gzip(b'foo') self.assertEqual(data[:4], self.gzip_foo[:4]) self.assertEqual(data[-13:], self.gzip_foo[-13:])