def put_records(stream_name, records): stream = get_stream(stream_name) for dest in stream['Destinations']: if 'ESDestinationDescription' in dest: es_dest = dest['ESDestinationDescription'] es_index = es_dest['IndexName'] es_type = es_dest['TypeName'] es = connect_elasticsearch() for record in records: obj_id = uuid.uuid4() data = base64.b64decode(record['Data']) body = json.loads(data) try: es.create(index=es_index, doc_type=es_type, id=obj_id, body=body) except Exception as e: LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc())) raise e if 'S3DestinationDescription' in dest: s3_dest = dest['S3DestinationDescription'] bucket = bucket_name(s3_dest['BucketARN']) prefix = s3_dest['Prefix'] s3 = get_s3_client() for record in records: data = base64.b64decode(record['Data']) obj_name = str(uuid.uuid4()) obj_path = '%s%s' % (prefix, obj_name) try: s3.Object(bucket, obj_path).put(Body=data) except Exception as e: LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc())) raise e
def check_infra_elasticsearch(expect_shutdown=False): out = None try: # check Elasticsearch es = aws_stack.connect_elasticsearch() out = es.indices.get_aliases().keys() except Exception, e: pass
def check_infra_elasticsearch(expect_shutdown=False, print_error=False): out = None try: # check Elasticsearch es = aws_stack.connect_elasticsearch() out = es.cat.aliases() except Exception, e: if print_error: print('Elasticsearch health check failed: %s %s' % (e, traceback.format_exc()))
def put_records(stream_name, records): stream = get_stream(stream_name) for dest in stream['Destinations']: if 'ESDestinationDescription' in dest: es_dest = dest['ESDestinationDescription'] es_index = es_dest['IndexName'] es_type = es_dest['TypeName'] es = connect_elasticsearch() for record in records: obj_id = uuid.uuid4() # DirectPut if 'Data' in record: data = base64.b64decode(record['Data']) # KinesisAsSource elif 'data' in record: data = base64.b64decode(record['data']) body = json.loads(data) try: es.create(index=es_index, doc_type=es_type, id=obj_id, body=body) except Exception as e: LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc())) raise e if 'S3DestinationDescription' in dest: s3_dest = dest['S3DestinationDescription'] bucket = bucket_name(s3_dest['BucketARN']) prefix = s3_dest.get('Prefix', '') s3 = get_s3_client() for record in records: # DirectPut if 'Data' in record: data = base64.b64decode(record['Data']) # KinesisAsSource elif 'data' in record: data = base64.b64decode(record['data']) obj_name = str(uuid.uuid4()) obj_path = '%s%s%s' % (prefix, '' if prefix.endswith('/') else '/', obj_name) try: s3.Object(bucket, obj_path).put(Body=data) except Exception as e: LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc())) raise e
def check_infra_elasticsearch(expect_shutdown=False, print_error=False): out = None try: # check Elasticsearch es = aws_stack.connect_elasticsearch() out = es.cat.aliases() except Exception as e: if print_error: LOGGER.error('Elasticsearch health check failed: %s %s' % (e, traceback.format_exc())) if expect_shutdown: assert out is None else: assert isinstance(out, six.string_types)
def put_records(stream_name, records): stream = get_stream(stream_name) if not stream: return error_not_found(stream_name) for dest in stream['Destinations']: if 'ESDestinationDescription' in dest: es_dest = dest['ESDestinationDescription'] es_index = es_dest['IndexName'] es_type = es_dest.get('TypeName') es = connect_elasticsearch(endpoint=es_dest.get('ClusterEndpoint'), domain=es_dest.get('DomainARN')) for record in records: obj_id = uuid.uuid4() # DirectPut if 'Data' in record: data = base64.b64decode(record['Data']) # KinesisAsSource elif 'data' in record: data = base64.b64decode(record['data']) body = json.loads(data) try: es.create(index=es_index, doc_type=es_type, id=obj_id, body=body) except Exception as e: LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc())) raise e if 'S3DestinationDescription' in dest: s3_dest = dest['S3DestinationDescription'] bucket = bucket_name(s3_dest['BucketARN']) prefix = s3_dest.get('Prefix', '') s3 = connect_to_resource('s3') batched_data = b''.join([ base64.b64decode(r.get('Data') or r['data']) for r in records ]) obj_path = get_s3_object_path(stream_name, prefix) try: s3.Object(bucket, obj_path).put(Body=batched_data) except Exception as e: LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc())) raise e return {'RecordId': str(uuid.uuid4())}
def put_records(stream_name: str, records: List[Dict]) -> Dict: """Put a list of records to the firehose stream - either directly from a PutRecord API call, or received from an underlying Kinesis stream (if 'KinesisStreamAsSource' is configured)""" stream = get_stream(stream_name) if not stream: return error_not_found(stream_name) # preprocess records, add any missing attributes add_missing_record_attributes(records) for dest in stream.get("Destinations", []): # apply processing steps to incoming items proc_config = {} for child in dest.values(): proc_config = (isinstance(child, dict) and child.get("ProcessingConfiguration") or proc_config) if proc_config.get("Enabled") is not False: for processor in proc_config.get("Processors", []): # TODO: run processors asynchronously, to avoid request timeouts on PutRecord API calls records = preprocess_records(processor, records) if "ESDestinationDescription" in dest: es_dest = dest["ESDestinationDescription"] es_index = es_dest["IndexName"] es_type = es_dest.get("TypeName") es = connect_elasticsearch(endpoint=es_dest.get("ClusterEndpoint"), domain=es_dest.get("DomainARN")) for record in records: obj_id = uuid.uuid4() data = "{}" # DirectPut if "Data" in record: data = base64.b64decode(record["Data"]) # KinesisAsSource elif "data" in record: data = base64.b64decode(record["data"]) body = json.loads(data) try: es.create(index=es_index, doc_type=es_type, id=obj_id, body=body) except Exception as e: LOG.error("Unable to put record to stream: %s %s" % (e, traceback.format_exc())) raise e if "S3DestinationDescription" in dest: s3_dest = dest["S3DestinationDescription"] bucket = s3_bucket_name(s3_dest["BucketARN"]) prefix = s3_dest.get("Prefix", "") s3 = connect_to_resource("s3") batched_data = b"".join([ base64.b64decode(r.get("Data") or r["data"]) for r in records ]) obj_path = get_s3_object_path(stream_name, prefix) try: s3.Object(bucket, obj_path).put(Body=batched_data) except Exception as e: LOG.error("Unable to put record to stream: %s %s" % (e, traceback.format_exc())) raise e if "HttpEndpointDestinationDescription" in dest: http_dest = dest["HttpEndpointDestinationDescription"] end_point = http_dest["EndpointConfiguration"] url = end_point["Url"] record_to_send = { "requestId": str(uuid.uuid4()), "timestamp": (int(time.time())), "records": [], } for record in records: data = record.get("Data") or record.get("data") record_to_send["records"].append({"data": data}) headers = { "Content-Type": "application/json", } try: requests.post(url, json=record_to_send, headers=headers) except Exception as e: LOG.info( "Unable to put Firehose records to HTTP endpoint %s: %s %s" % (url, e, traceback.format_exc())) raise e return {"RecordId": str(uuid.uuid4())}
def put_records(stream_name, records): stream = get_stream(stream_name) if not stream: return error_not_found(stream_name) for dest in stream.get("Destinations", []): if "ESDestinationDescription" in dest: es_dest = dest["ESDestinationDescription"] es_index = es_dest["IndexName"] es_type = es_dest.get("TypeName") es = connect_elasticsearch(endpoint=es_dest.get("ClusterEndpoint"), domain=es_dest.get("DomainARN")) for record in records: obj_id = uuid.uuid4() # DirectPut if "Data" in record: data = base64.b64decode(record["Data"]) # KinesisAsSource elif "data" in record: data = base64.b64decode(record["data"]) body = json.loads(data) try: es.create(index=es_index, doc_type=es_type, id=obj_id, body=body) except Exception as e: LOG.error("Unable to put record to stream: %s %s" % (e, traceback.format_exc())) raise e if "S3DestinationDescription" in dest: s3_dest = dest["S3DestinationDescription"] bucket = bucket_name(s3_dest["BucketARN"]) prefix = s3_dest.get("Prefix", "") s3 = connect_to_resource("s3") batched_data = b"".join([ base64.b64decode(r.get("Data") or r["data"]) for r in records ]) obj_path = get_s3_object_path(stream_name, prefix) try: s3.Object(bucket, obj_path).put(Body=batched_data) except Exception as e: LOG.error("Unable to put record to stream: %s %s" % (e, traceback.format_exc())) raise e if "HttpEndpointDestinationDescription" in dest: http_dest = dest["HttpEndpointDestinationDescription"] end_point = http_dest["EndpointConfiguration"] url = end_point["Url"] record_to_send = { "requestId": str(uuid.uuid4()), "timestamp": (int(time.time())), "records": [], } for record in records: data = record.get("Data") or record.get("data") record_to_send["records"].append({"data": data}) headers = { "Content-Type": "application/json", } try: requests.post(url, json=record_to_send, headers=headers) except Exception as e: LOG.info( "Unable to put Firehose records to HTTP endpoint %s: %s %s" % (url, e, traceback.format_exc())) raise e return {"RecordId": str(uuid.uuid4())}