def main(): args = parseArguments() host = args.host port = args.port # Create the REST API client engine_client = EngineApiClient(host, BASE_URL, port) while True: (http_status_code, response) = engine_client.getJobs() if http_status_code != 200: print(http_status_code, json.dumps(response)) break jobs = response['documents'] if (len(jobs) == 0): print "Deleted all jobs" break print "Deleting %d jobs" % (len(jobs)), for job in jobs: (http_status_code, response) = engine_client.delete(job['id']) if http_status_code != 200: print(http_status_code, json.dumps(response)) else: sys.stdout.write('.') sys.stdout.flush() print
def main(): args = parseArguments() # read the config file config = ConfigParser.RawConfigParser() try: # insert a section header into the config so # ConfigParser will read it without complaint with open(args.config, "r") as config_file: ini_str = '[root]\n' + config_file.read() ini_fp = StringIO.StringIO(ini_str) config.readfp(ini_fp) except IOError: print "Error opening file " + args.config return try: region = config.get('root', 'region') access_id = config.get('root', 'aws_access_key_id') secret_key = config.get('root', 'aws_secret_access_key') except ConfigParser.NoOptionError as e: print e return # AWS CloudWatch connection cloudwatch_conn = boto.ec2.cloudwatch.connect_to_region( region, aws_access_key_id=access_id, aws_secret_access_key=secret_key) if cloudwatch_conn == None: print "Error unknown region " + region return # The Prelert REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) # If no job ID is supplied create a new job job_id = createJob(args.job_id, engine_client) if job_id == None: return # default start date is None meaning run realtime start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") start_date = replaceTimezoneWithUtc(start_date) if start_date == None: runRealtime(job_id, cloudwatch_conn, engine_client) else: # historical mode, check for an end date end_date = replaceTimezoneWithUtc(datetime.utcnow()) if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") end_date = replaceTimezoneWithUtc(end_date) runHistorical(job_id, start_date, end_date, cloudwatch_conn, engine_client) print "Closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() job_id = args.jobid # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) logging.info("Subscribing to job '" + job_id + "' for alerts") printHeader() while True: try: (http_status_code, response) = engine_client.alerts_longpoll(job_id, normalized_probability_threshold=args.normalizedProbability, anomaly_score_threshold=args.anomalyScore, timeout=args.timeout) if http_status_code != 200: print (http_status_code, json.dumps(response)) break if response['timeout'] == False: printAlert(response) except KeyboardInterrupt: print "Exiting script..."
def main(): args = parseArguments() host = args.host port = args.port base_url = BASE_URL # Create the REST API client engine_client = EngineApiClient(host, base_url, port) while True: (http_status_code, response) = engine_client.getJobs() if http_status_code != 200: print (http_status_code, json.dumps(response)) break jobs = response['documents'] if (len(jobs) == 0): print "Deleted all jobs" break print "Deleting %d jobs" % (len(jobs)), for job in jobs: (http_status_code, response) = engine_client.delete(job['id']) if http_status_code != 200: print (http_status_code, json.dumps(response)) else: sys.stdout.write('.') sys.stdout.flush() print
def main(): setupLogging() args = parseArguments() job_id = args.jobid # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) # Get all the buckets up to now logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id, include_records=False, anomaly_score_filter_value=args.anomalyScore, normalized_probability_filter_value=args.normalizedProbability) if http_status_code != 200: print (http_status_code, json.dumps(response)) return printHeader() printBuckets(response) if args.continue_poll: if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 else: next_bucket_id = None while True: # Wait POLL_INTERVAL_SECS then query for any new buckets time.sleep(POLL_INTERVAL_SECS) (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, start_date=str(next_bucket_id), end_date=None, include_records=False, anomaly_score_filter_value=args.anomalyScore, normalized_probability_filter_value=args.normalizedProbability) if http_status_code != 200: print (http_status_code, json.dumps(response)) break printBuckets(response) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1
def main(): setupLogging() args = parseArguments() # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) job_config = '{"analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\ "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }' logging.info("Creating job") (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print (http_status_code, json.dumps(response)) return job_id = response['id'] logging.info("Uploading data to " + job_id) file = open(args.file, 'rb') (http_status_code, response) = engine_client.upload(job_id, file) if http_status_code != 202: print (http_status_code, json.dumps(response)) return logging.info("Closing job " + job_id) (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print (http_status_code, json.dumps(response)) return logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print (http_status_code, json.dumps(response)) else: print "Date,Anomaly Score,Max Normalized Probablility" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability'])
def main(): setupLogging() args = parseArguments() host = args.host port = args.port base_url = BASE_URL job_id = args.jobid # Create the REST API client engine_client = EngineApiClient(host, base_url, port) # Get all the buckets up to now logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print (http_status_code, json.dumps(response)) return print "Date,BucketId,AnomalyScore" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['id'], bucket['anomalyScore']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 else: next_bucket_id = None # Wait POLL_INTERVAL_SECS then query for any new buckets while True: time.sleep(POLL_INTERVAL_SECS) (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print (http_status_code, json.dumps(response)) break for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['id'], bucket['anomalyScore']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1
def main(): setupLogging() args = parseArguments() job_id = args.jobid # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) # Get all the records up to now logging.info("Get records for job " + job_id) skip = 0 take = 200 (http_status_code, response) = engine_client.getRecords( job_id, skip, take, normalized_probability_filter_value=args.normalizedProbability, anomaly_score_filter_value=args.anomalyScore) if http_status_code != 200: print(http_status_code, json.dumps(response)) return hit_count = int(response['hitCount']) printHeader() printRecords(response['documents']) while (skip + take) < hit_count: skip += take (http_status_code, response) = engine_client.getRecords( job_id, skip, take, normalized_probability_filter_value=args.normalizedProbability, anomaly_score_filter_value=args.anomalyScore) if http_status_code != 200: print(http_status_code, json.dumps(response)) return printRecords(response['documents'])
def main(): setupLogging() args = parseArguments() job_id = args.jobid # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) # Get all the records up to now logging.info("Get records for job " + job_id) skip = 0 take = 200 (http_status_code, response) = engine_client.getRecords(job_id, skip, take, normalized_probability_filter_value=args.normalizedProbability, anomaly_score_filter_value=args.anomalyScore) if http_status_code != 200: print (http_status_code, json.dumps(response)) return hit_count = int(response['hitCount']) printHeader() printRecords(response['documents']) while (skip + take) < hit_count: skip += take (http_status_code, response) = engine_client.getRecords(job_id, skip, take, normalized_probability_filter_value=args.normalizedProbability, anomaly_score_filter_value=args.anomalyScore) if http_status_code != 200: print (http_status_code, json.dumps(response)) return printRecords(response['documents'])
def main(): setupLogging() args = parseArguments() # Create the REST API client engine_client = EngineApiClient(args.host, BASE_URL, args.port) job_config = '{"analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[{"function":"metric","fieldName":"responsetime","byFieldName":"airline"}] },\ "dataDescription" : {"fieldDelimiter":",", "timeField":"time", "timeFormat":"yyyy-MM-dd HH:mm:ssX"} }' logging.info("Creating job") (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print(http_status_code, json.dumps(response)) return job_id = response['id'] logging.info("Uploading data to " + job_id) file = open(args.file, 'rb') (http_status_code, response) = engine_client.upload(job_id, file) if http_status_code != 202: print(http_status_code, json.dumps(response)) return logging.info("Closing job " + job_id) (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print(http_status_code, json.dumps(response)) return logging.info("Get result buckets for job " + job_id) (http_status_code, response) = engine_client.getAllBuckets(job_id) if http_status_code != 200: print(http_status_code, json.dumps(response)) else: print "Date,Anomaly Score,Max Normalized Probablility" for bucket in response: print "{0},{1},{2}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability'])
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) job_id = args.job_id if job_id == None: (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) job_id = response['id'] print "Created job with id " + str(job_id) print "Using job id " + job_id data_type = config['type'] raw_query = insertDateRangeFilter(config['search']) timezone = UTC() doc_count = 0 try: query_end_time = datetime.now(timezone) - timedelta( seconds=args.update_interval) while True: query_start_time = query_end_time query_end_time = datetime.now(timezone) query_str = json.dumps( replaceDateArgs(raw_query, query_start_time, query_end_time)) index_name = logstashIndex(query_start_time, args.update_interval) skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: print "Error: missing logstash index '" + index_name + "'" # upload to the API content = json.dumps(elasticSearchDocsToDicts( hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) content = json.dumps( elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print json.dumps(response) doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) duration = datetime.now(timezone) - query_end_time sleep_time = max(args.update_interval - duration.seconds, 0) print "sleeping for " + str(sleep_time) + " seconds" if sleep_time > 0.0: time.sleep(sleep_time) except KeyboardInterrupt: print "Interrupt caught closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # default start date is None meaning 'all time' start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # default end date is today end_date = datetime.today() if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) data_type = config['type'] search_body = json.dumps(config['search']) # If no start date find the first logstash index containing our docs if start_date == None: start_date = findDateOfFirstIndex(es_client, data_type, search_body) if start_date == None: print "No documents found with the query " + search_body return # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) if http_status != 201: print "Error creatting job" print http_status, json.dumps(response) return job_id = response['id'] print "Created job with id " + str(job_id) doc_count = 0 for index_name in nextLogStashIndex(start_date, end_date): print "Reading from index " + index_name skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: # Index not found try the next one continue # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) continue doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts( hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print json.dumps(response) continue doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) (http_status, response) = engine_client.close(job_id) if http_status != 202: print "Error closing job" print http_status, json.dumps(response) return print "{0} records successfully written to job {1}".format( str(doc_count), job_id)
def main(): args = parseArguments() start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset()) # interval between the generated timestamps for the records interval = timedelta(seconds=300) if args.duration <= 0: end_date = datetime.now(UtcOffset()) else: duration = timedelta(hours=args.duration) end_date = start_date + duration job_config = '{\ "analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[\ {"fieldName":"In Discards","byFieldName":"host"},\ {"fieldName":"In Octets","byFieldName":"host"},\ {"fieldName":"Out Discards","byFieldName":"host"},\ {"fieldName":"Out Octets","byFieldName":"host"} \ ]\ },\ "dataDescription" : {\ "fieldDelimiter":",",\ "timeField":"time",\ "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\ }\ }' engine_client = EngineApiClient(args.host, BASE_URL, args.port) (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print (http_status_code, json.dumps(response)) return job_id = response['id'] print 'Job created with Id = ' + job_id # get the csv header (the first record generated) record_generator = generateRecords(args.file, start_date, interval, end_date) header = ','.join(next(record_generator)) header += '\n' count = 0 try: # for the results next_bucket_id = 1 print print "Date,Anomaly Score,Max Normalized Probablility" data = header for record in record_generator: # format as csv and append new line csv = ','.join(record) + '\n' data += csv # print data count += 1 if count == 100: (http_status_code, response) = engine_client.upload(job_id, data) if http_status_code != 202: print (http_status_code, json.dumps(response)) break # get the latest results... (http_status_code, response) = engine_client.getBucketsByDate(job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print (http_status_code, json.dumps(response)) break # and print them for bucket in response: print "{0},{1},{2},{3}".format(bucket['timestamp'], bucket['anomalyScore'], bucket['maxNormalizedProbability']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 # must send the header every time data = header count = 0 # sleep a little while (optional this can be removed) #time.sleep(0.1) except KeyboardInterrupt: print "Keyboard interrupt closing job..." (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print (http_status_code, json.dumps(response))
def main(): args = parseArguments() # read the config file config = ConfigParser.RawConfigParser() try: # insert a section header into the config so # ConfigParser will read it without complaint with open(args.config, "r") as config_file: ini_str = '[root]\n' + config_file.read() ini_fp = StringIO.StringIO(ini_str) config.readfp(ini_fp) except IOError: print "Error opening file " + args.config return try: region = config.get('root', 'region') access_id = config.get('root', 'aws_access_key_id') secret_key = config.get('root', 'aws_secret_access_key') except ConfigParser.NoOptionError as e: print e return # AWS CloudWatch connection cloudwatch_conn = boto.ec2.cloudwatch.connect_to_region(region, aws_access_key_id=access_id, aws_secret_access_key=secret_key) if cloudwatch_conn == None: print "Error unknown region " + region return # The Prelert REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) # If no job ID is supplied create a new job job_id = createJob(args.job_id, engine_client) if job_id == None: return # default start date is None meaning run realtime start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") start_date = replaceTimezoneWithUtc(start_date) if start_date == None: runRealtime(job_id, cloudwatch_conn, engine_client) else: # historical mode, check for an end date end_date = replaceTimezoneWithUtc(datetime.utcnow()) if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") end_date = replaceTimezoneWithUtc(end_date) runHistorical(job_id, start_date, end_date, cloudwatch_conn, engine_client) print "Closing job..." engine_client.close(job_id)
def main(): args = parseArguments() start_date = datetime(2014, 05, 18, 0, 0, 0, 0, UtcOffset()) # interval between the generated timestamps for the records interval = timedelta(seconds=300) if args.duration <= 0: end_date = datetime.now(UtcOffset()) else: duration = timedelta(hours=args.duration) end_date = start_date + duration job_config = '{\ "analysisConfig" : {\ "bucketSpan":3600,\ "detectors" :[\ {"fieldName":"In Discards","byFieldName":"host"},\ {"fieldName":"In Octets","byFieldName":"host"},\ {"fieldName":"Out Discards","byFieldName":"host"},\ {"fieldName":"Out Octets","byFieldName":"host"} \ ]\ },\ "dataDescription" : {\ "fieldDelimiter":",",\ "timeField":"time",\ "timeFormat":"yyyy-MM-dd\'T\'HH:mm:ssXXX"\ }\ }' engine_client = EngineApiClient(args.host, BASE_URL, args.port) (http_status_code, response) = engine_client.createJob(job_config) if http_status_code != 201: print(http_status_code, json.dumps(response)) return job_id = response['id'] print 'Job created with Id = ' + job_id # get the csv header (the first record generated) record_generator = generateRecords(args.file, start_date, interval, end_date) header = ','.join(next(record_generator)) header += '\n' count = 0 try: # for the results next_bucket_id = 1 print print "Date,Bucket ID,Anomaly Score,Max Normalized Probablility" data = header for record in record_generator: # format as csv and append new line csv = ','.join(record) + '\n' data += csv # print data count += 1 if count == 100: (http_status_code, response) = engine_client.upload(job_id, data) if http_status_code != 202: print(http_status_code, json.dumps(response)) break # get the latest results... (http_status_code, response) = engine_client.getBucketsByDate( job_id=job_id, start_date=str(next_bucket_id), end_date=None) if http_status_code != 200: print(http_status_code, json.dumps(response)) break # and print them for bucket in response: print "{0},{1},{2},{3}".format( bucket['timestamp'], bucket['id'], bucket['anomalyScore'], bucket['maxNormalizedProbability']) if len(response) > 0: next_bucket_id = int(response[-1]['id']) + 1 # must send the header every time data = header count = 0 # sleep a little while (optional this can be removed) #time.sleep(0.1) except KeyboardInterrupt: print "Keyboard interrupt closing job..." (http_status_code, response) = engine_client.close(job_id) if http_status_code != 202: print(http_status_code, json.dumps(response))
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) # The REST API client engine_client = EngineApiClient(args.api_host, API_BASE_URL, args.api_port) job_id = args.job_id if job_id == None: (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) job_id = response['id'] print "Created job with id " + str(job_id) print "Using job id " + job_id data_type = config['type'] raw_query = insertDateRangeFilter(config['search']) timezone = UTC() doc_count = 0 try: query_end_time = datetime.now(timezone) - timedelta(seconds=args.update_interval) while True: query_start_time = query_end_time query_end_time = datetime.now(timezone) query_str = json.dumps(replaceDateArgs(raw_query, query_start_time, query_end_time)) index_name = logstashIndex(query_start_time) skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: print "Error: missing logstash index '" + index_name + "'" # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=query_str, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print json.dumps(response) doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) duration = datetime.now(timezone) - query_end_time sleep_time = max(args.update_interval - duration.seconds, 0) print "sleeping for " + str(sleep_time) + " seconds" if sleep_time > 0.0: time.sleep(sleep_time) except KeyboardInterrupt: print "Interrupt caught closing job..." engine_client.close(job_id)
def main(): setupLogging() args = parseArguments() # read the config file try: with open(args.file, "r") as config_file: config = json.load(config_file) except IOError: print "Error opening file " + args.file return # default start date is None meaning 'all time' start_date = None if args.start_date != None: start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # default end date is today end_date = datetime.today() if args.end_date != None: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") # The ElasticSearch client es_client = Elasticsearch(args.es_host + ":" + str(args.es_port)) data_type = config['type'] search_body = json.dumps(config['search']) # If no start date find the first logstash index containing our docs if start_date == None: start_date = findDateOfFirstIndex(es_client, data_type, search_body) if start_date == None: print "No documents found with the query " + search_body return # The REST API client engine_client = EngineApiClient(args.api_host, ABI_BASE_URL, args.api_port) (http_status, response) = engine_client.createJob(json.dumps(config['job_config'])) if http_status != 201: print "Error creatting job" print http_status, json.dumps(response) return job_id = response['id'] print "Created job with id " + str(job_id) doc_count = 0 for index_name in nextLogStashIndex(start_date, end_date): print "Reading from index " + index_name skip = 0 try: # Query the documents from ElasticSearch and write to the Engine hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) except elasticsearch.exceptions.NotFoundError: # Index not found try the next one continue # upload to the API content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print "Error uploading log content to the Engine" print http_status, json.dumps(response) continue doc_count += len(hits['hits']['hits']) # get any other docs hitcount = int(hits['hits']['total']) while hitcount > (skip + MAX_DOC_TAKE): skip += MAX_DOC_TAKE hits = es_client.search(index=index_name, doc_type=data_type, body=search_body, from_=skip, size=MAX_DOC_TAKE) content = json.dumps(elasticSearchDocsToDicts(hits['hits']['hits'])) (http_status, response) = engine_client.upload(job_id, content) if http_status != 202: print json.dumps(response) continue doc_count += len(hits['hits']['hits']) print "Uploaded {0} records".format(str(doc_count)) engine_client.close(job_id) print "{0} records successfully written to job {1}".format(str(doc_count), job_id)