def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description= '''A tool which takes a weighted listing of keyword searches and presents aggregations of this data to the user.''' ) parser.add_argument('-a', '--allocationid', metavar='int', dest='allocation_id', default=-1, help='The allocation ID of the job.') parser.add_argument('-j', '--jobid', metavar='int', dest='job_id', default=-1, help='The job ID of the job.') parser.add_argument('-s', '--jobidsecondary', metavar='int', dest='job_id_secondary', default=0, help='The secondary job ID of the job (default : 0).') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help= 'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Displays the top --size logs matching the --errormap mappings.') parser.add_argument( '--size', metavar='size', dest='size', default=10, help='The number of results to be returned. (default=10)') parser.add_argument('-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None, help='A list of hostnames to filter the results to.') parser.add_argument( '--errormap', metavar="file", dest="err_map_file", default=None, help='A map of errors to scan the user jobs for, including weights.') args = parser.parse_args() # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target is None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 # Load the weighted error mapping. error_map = None if args.err_map_file: error_map = JSONSerializer().loads(open(args.err_map_file).read()) if error_map is None: parser.print_help() print("Error map '%s', could not be loaded" % args.err_map_file) return 2 # Open a connection to the elastic cluster, if this fails is wrong on the server. es = Elasticsearch(args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60) # Execute the query on the cast-allocation index. try: tr_res = cast.search_job(es, args.allocation_id, args.job_id, args.job_id_secondary) except exceptions.RequestError as e: cast.print_request_error(e) return 4 total_hits = cast.deep_get(tr_res, "hits", "total") # Finding no matches with valid search criteria is a legit case. # return 0, not 3 if total_hits == None: print("# Sorry. Could not find any matching results.") return 0 if total_hits != 1: print( "This implementation only supports queries where the hit count is equal to 1." ) return 3 # TODO make this code more fault tolerant hits = cast.deep_get(tr_res, "hits", "hits") tr_data = cast.deep_get(hits[0], "_source", "data") # --------------------------------------------------------------------------------------------- # Build the hostnames string: if args.hosts is None: args.hosts = tr_data.get("compute_nodes") hostnames = { "multi_match": { "query": " ".join(args.hosts), "type": "best_fields", "fields": ["hostname", "source"], "tie_breaker": 0.3, "minimum_should_match": 1 } } # --------------------------------------------------------------------------------------------- (ranges, should_match) = cast.build_timestamp_range( tr_data.get("begin_time"), cast.deep_get(tr_data, "history", "end_time")) ranges.append(hostnames) # --------------------------------------------------------------------------------------------- # Build a body for the mapping query. body = { "_source": ["@timestamp"], "size": args.size, } # Check the keywords supplied by the json. results = {} for error in error_map: (category, result) = build_mapping_query(es, body.copy(), ranges, error) results[category] = result print(" ") # Print the results. for category, response in sorted( results.iteritems(), key=lambda (k, v): cast.deep_get(v, "hits", "max_score"), reverse=True): # Get aggregations. aggregations = response.get("aggregations", []) total = cast.deep_get(response, "hits", "total") print("\"{0}\" Max Score : {1}".format( category, cast.deep_get(response, "hits", "max_score"))) print("\"{0}\" Count : {1}".format(category, total)) if aggregations is not None: # Sort aggregations by document count. for (aggregation, value) in sorted(aggregations.iteritems(), key=lambda (k, v): v.get("doc_count"), reverse=True): print(" \"{0}\" : {1}".format(aggregation, value.get("doc_count"))) if args.verbose: hits = cast.deep_get(response, "hits", "hits") print("\nTop {0} \"{1}\" Results:".format(len(hits), category)) print("-" * 42) for hit in hits: print(json.dumps(hit["_source"])) print("=" * 42) print(" ")
def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description= '''A tool for finding keywords in the "message" field during the run time of a job.''' ) parser.add_argument('-a', '--allocationid', metavar='int', dest='allocation_id', default=-1, help='The allocation ID of the job.') parser.add_argument('-j', '--jobid', metavar='int', dest='job_id', default=-1, help='The job ID of the job.') parser.add_argument('-s', '--jobidsecondary', metavar='int', dest='job_id_secondary', default=0, help='The secondary job ID of the job (default : 0).') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help= 'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".' ) parser.add_argument( '-k', '--keywords', metavar='key', dest='keywords', nargs='*', default=['.*'], help= 'A list of keywords to search for in the Big Data Store. Case insensitive regular expressions (default : .*). If your keyword is a phrase (e.g. "xid 13") regular expressions are not supported at this time.' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Displays any logs that matched the keyword search.') parser.add_argument( '--size', metavar='size', dest='size', default=30, help='The number of results to be returned. (default=30)') parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None, help= 'A list of hostnames to filter the results to (filters on the "hostname" field, job independent).' ) args = parser.parse_args() # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target is None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 # Open a connection to the elastic cluster, if this fails is wrong on the server. es = Elasticsearch(args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60) # Execute the query on the cast-allocation index. try: tr_res = cast.search_job(es, args.allocation_id, args.job_id, args.job_id_secondary) except exceptions.RequestError as e: cast.print_request_error(e) return 4 total_hits = cast.deep_get(tr_res, "hits", "total") print("Got {0} Hit(s) for specified job, searching for keywords.".format( total_hits)) if total_hits != 1: print( "This implementation only supports queries where the hit count is equal to 1." ) return 3 # TODO make this code more fault tolerant hits = cast.deep_get(tr_res, "hits", "hits") tr_data = cast.deep_get(hits[0], "_source", "data") # --------------------------------------------------------------------------------------------- # TODO Add utility script to do this. # Build the hostnames string: if args.hosts is None: args.hosts = tr_data.get("compute_nodes") hostnames = { "multi_match": { "query": " ".join(args.hosts), "type": "best_fields", "fields": ["hostname", "source"], "tie_breaker": 0.3, "minimum_should_match": 1 } } # --------------------------------------------------------------------------------------------- # TODO Add a utility script to manage this. date_format = '%Y-%m-%d %H:%M:%S.%f' print_format = '%Y-%m-%d %H:%M:%S:%f' search_format = 'epoch_millis' # Determine the timerange: start_time = datetime.strptime(tr_data.get("begin_time"), date_format) timestamp_range = { "gte": start_time.strftime('%s000'), "format": search_format } # If a history is present end_time is end_time, otherwise it's now. if "history" in tr_data: end_time = datetime.strptime( tr_data.get("history").get("end_time"), date_format) timestamp_range["lte"] = end_time.strftime('%s999') timerange = {"range": {"@timestamp": timestamp_range}} # --------------------------------------------------------------------------------------------- # Build the message query. keywords = {} should_keywords = [] for key in args.keywords: if key.find(" ") == -1: should = {"regexp": {"message": key.lower()}} else: should = {"match_phrase": {"message": key}} should_keywords.append(should) keywords[key] = {"filter": should} # --------------------------------------------------------------------------------------------- # Submit the query body = { "query": { "bool": { "must": [timerange, hostnames, { "exists": { "field": "message" } }], "should": should_keywords, "minimum_should_match": 1 } }, "sort": ["timestamp"], "_source": ["timestamp", "message", "hostname"], "size": args.size, "aggs": keywords } try: key_res = es.search(index="_all", body=body) except exceptions.RequestError as e: cast.print_request_error(e) return 4 # Print the count table. total = cast.deep_get(key_res, 'hits', 'total') print("Got {0} keyword hits:\n".format(total)) aggregations = key_res.get("aggregations") max_width = 7 for key in args.keywords: max_width = max(max_width, len(key)) print('{0: >{1}} | Count'.format("Keyword", max_width)) for agg in aggregations: print('{0: >{1}} | {2}'.format( agg, max_width, cast.deep_get(aggregations, agg, "doc_count"))) print(" ") # Verbosely print the hits if args.verbose: hits = key_res.get('hits', {"hits": []})["hits"] print("Displaying {0} of {1} logs:".format(len(hits), total)) for hit in hits: source = hit["_source"] print("{0} {1} | {2}".format(source.get("timestamp"), source.get("hostname"), source.get("message")))
def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description='''A tool for finding a list of the supplied user's jobs.''') parser.add_argument( '-u', '--user', metavar='username', dest='user', default=None, help="The user name to perform the query on, either this or -U must be set.") parser.add_argument( '-U', '--userid', metavar='userid', dest='userid', default=None, help="The user id to perform the query on, either this or -u must be set.") parser.add_argument( '--size', metavar='size', dest='size', default=1000, help='The number of results to be returned. (default=1000)') parser.add_argument( '--state', metavar='state', dest='state', default=None, help='Searches for jobs matching the supplied state.') parser.add_argument( '--starttime', metavar='YYYY-MM-DDTHH:MM:SS', dest='starttime', default=None, help='A timestamp representing the beginning of the absolute range to look for failed jobs, if not set no lower bound will be imposed on the search.') parser.add_argument( '--endtime', metavar='YYYY-MM-DDTHH:MM:SS', dest='endtime', default=None, help='A timestamp representing the ending of the absolute range to look for failed jobs, if not set no upper bound will be imposed on the search.') # TODO should this be a percentage? parser.add_argument( '--commonnodes', metavar='threshold', dest='commonnodes', default=-1, help='Displays a list of nodes that the user jobs had in common if set. Only nodes with collisions exceeding the threshold are shown. (Default: -1)') parser.add_argument( '-v', '--verbose', action='store_true', help='Displays all retrieved fields from the `cast-allocation` index.') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".') args = parser.parse_args() # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target == None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 if args.user is None and args.userid is None: parser.print_help() print("Missing user, --user or --userid must be supplied.") return 2 # Open a connection to the elastic cluster, if this fails is wrong on the server. es = Elasticsearch( args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60 ) # Ammend compute nodes for common node search. fields=cast.USER_JOB_FIELDS if args.commonnodes > 0: fields += ["data.compute_nodes"] resp = cast.search_user_jobs(es, user_name = args.user, user_id = args.userid, job_state = args.state, start_time = args.starttime, end_time = args.endtime, size = args.size) # Parse the response from elasticsearch. hits = cast.deep_get(resp, "hits", "hits") total_hits = cast.deep_get(resp, "hits","total") node_collisions = {} print_fmt="{5: >10} | {0: >5} | {1: >8} | {2: <8} | {3: <26} | {4: <26}" print(print_fmt.format("AID", "P Job ID", "S Job ID", "Begin Time", "End Time", "State")) hits.sort(key=lambda x: cast.deep_get(x,"_source","data","allocation_id"), reverse=False) # Process hits. for hit in hits: data=cast.deep_get(hit,"_source","data") if data: print( print_fmt.format( data.get("allocation_id"), data.get("primary_job_id"), data.get("secondary_job_id"), data.get("begin_time"), cast.deep_get(data, "history","end_time"), data.get("state"))) # Generate a counter. if args.commonnodes > 0: for node in data.get("compute_nodes"): node_collisions[node] = 1 + node_collisions.get(node, 0) # Print out common nodes with collisions above threshold. if args.commonnodes > 0: max_width=4 collision_found=False # get the max width to improve printing. for key in node_collisions: max_width=max(len(key),max_width) print( "=============================" ) print( "Nodes common between jobs:" ) print( "=============================" ) print("{0:>{1}} : {2}".format("node", max_width, "common count")) node_count=int(args.commonnodes) for key,value in sorted( node_collisions.iteritems(), key=lambda (k,v): (v,k), reverse=False): if int(value) > node_count: collision_found = True print("{0:>{1}} : {2}".format(key, max_width, value)) if not collision_found: print("No nodes exceeded collision threshold: {0}".format(args.commonnodes))
def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description='''A tool for finding jobs running at the specified time.''') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".') parser.add_argument( '-T', '--time', metavar='YYYY-MM-DD HH:MM:SS', dest='timestamp', default="now", help='A timestamp representing a point in time to search for all running CSM Jobs. HH, MM, SS are optional, if not set they will be initialized to 0. (default=now)') parser.add_argument( '-s', '--size', metavar='size', dest='size', default=1000, help='The number of results to be returned. (default=1000)') parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None, help='A list of hostnames to filter the results to.') args = parser.parse_args() # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target == None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 # Parse the user's date. date_format='(\d{4})-(\d{1,2})-(\d{1,2})[ \.T]*(\d{0,2}):{0,1}(\d{0,2}):{0,1}(\d{0,2})' date_print_format='%Y-%m-%d %H:%M:%S' date_search_format='"yyyy-MM-dd HH:mm:ss"' target_date=args.timestamp time_search=re.search(date_format, target_date) # Build the target timestamp and verify validity. if time_search : (year,month,day,hour,minute,second) = time_search.groups() date = datetime( year=int(year), month=int(month), day=int(day), hour=int(hour if hour else 0), minute=int(minute if minute else 0), second=int(second if second else 0) ) target_date=datetime.strftime(date, date_print_format) elif target_date == "now": target_date=datetime.strftime(datetime.now(), date_print_format) else: parser.print_help() print("Invalid timestamp: {0}".format(target_date)) return 2 (range, match_min) = cast.build_target_time_search(target_date) bool_query={ "should" : range, "minimum_should_match" : match_min } if args.hosts: bool_query["must"] = { "match" : { "data.compute_nodes" : { "query" : " ".join(args.hosts) } } } body={ "query" : { "bool" : bool_query }, "_source" : [ "data.allocation_id", "data.primary_job_id", "data.secondary_job_id", "data.begin_time", "data.history.end_time"], "size": args.size } json = JSONSerializer() # Open a connection to the elastic cluster. es = Elasticsearch( args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60 ) # Execute the query on the cast-allocation index. tr_res = es.search( index="cast-allocation", body=body ) # Get Hit Data hits = cast.deep_get(tr_res, "hits", "hits") total_hits = cast.deep_get(tr_res, "hits","total") hits_displayed= len(hits) print("Search found {0} jobs running at '{2}', displaying {1} jobs:\n".format(total_hits, len(hits), target_date)) # Display the results of the search. if hits_displayed > 0: print_fmt="{0: >13} | {1: >12} | {2: <14} | {3: <26} | {4: <26}" print(print_fmt.format("Allocation ID", "Prim. Job ID", "Second. Job ID", "Begin Time", "End Time")) for hit in hits: data=cast.deep_get(hit, "_source", "data") if data: print(print_fmt.format( data.get("allocation_id"), data.get("primary_job_id"), data.get("secondary_job_id"), data.get("begin_time"), cast.deep_get(data, "history","end_time"))) return 0
def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description= '''A tool for finding metrics about the nodes participating in the supplied job id.''' ) parser.add_argument('-a', '--allocationid', metavar='int', dest='allocation_id', default=-1, help='The allocation ID of the job.') parser.add_argument('-j', '--jobid', metavar='int', dest='job_id', default=-1, help='The job ID of the job.') parser.add_argument('-s', '--jobidsecondary', metavar='int', dest='job_id_secondary', default=0, help='The secondary job ID of the job (default : 0).') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help= 'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".' ) parser.add_argument('-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None, help='A list of hostnames to filter the results to.') parser.add_argument( '-f', '--fields', metavar='field', dest='fields', nargs='*', default=None, help='A list of fields to retrieve metrics for (REQUIRED).') parser.add_argument('-i', '--index', metavar='index', dest='index', default='_all', help='The index to query for metrics records.') parser.add_argument( '--correlation', action='store_true', help= "Displays the correlation between the supplied fields over the job run." ) args = parser.parse_args() # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target is None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 if args.fields is None: print("Fields weren't set for metrics analysis.") return 2 # Open a connection to the elastic cluster, if this fails is wrong on the server. es = Elasticsearch(args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60) # Execute the query on the cast-allocation index. try: tr_res = cast.search_job(es, args.allocation_id, args.job_id, args.job_id_secondary) except exceptions.RequestError as e: cast.print_request_error(e) return 4 total_hits = cast.deep_get(tr_res, "hits", "total") print("Got {0} Hit(s) for specified job:".format(total_hits)) if total_hits == None: print("# Sorry. Could not find any matching results.") return 0 if total_hits != 1: print( "This implementation only supports queries where the hit count is equal to 1." ) return 3 hits = cast.deep_get(tr_res, "hits", "hits") allocation = cast.deep_get(hits[0], "_source", "data") # --------------------------------------------------------------------------------------------- # Build the hostnames string: if args.hosts is None: args.hosts = allocation.get("compute_nodes") hostnames = { "multi_match": { "query": " ".join(args.hosts), "type": "best_fields", "fields": ["hostname", "source"], "tie_breaker": 0.3, "minimum_should_match": 1 } } # --------------------------------------------------------------------------------------------- date_format = '%Y-%m-%d %H:%M:%S.%f' print_format = '%Y-%m-%d %H:%M:%S:%f' search_format = 'epoch_millis' # Determine the timerange: start_time = datetime.strptime(allocation.get("begin_time"), date_format) timestamp_range = { "gte": "{0}000".format(start_time.strftime('%s')), "format": search_format } # If a history is present end_time is end_time, otherwise it's now. if "history" in allocation: end_time = datetime.strptime( allocation.get("history").get("end_time"), date_format) timestamp_range["lte"] = "{0}999".format(end_time.strftime('%s')) timerange = {"range": {"@timestamp": timestamp_range}} # --------------------------------------------------------------------------------------------- # Matrix stats are very interesting.. stats = {"statistics": {"matrix_stats": {"fields": args.fields}}} for field in args.fields: stats[field] = {"extended_stats": {"field": field}} body = { "query": { "bool": { "must": [hostnames, timerange] } }, "aggs": stats, "size": 0 } try: key_res = es.search( index=args.index, # TODO This should be replaced. body=body) except exceptions.RequestError as e: cast.print_request_error(e) return 4 if args.allocation_id > 0: print("\nMetric Analysis for Allocation ID {0} :\n".format( args.allocation_id)) else: print("\nMetric Analysis for Job ID {0} - {1} :\n".format( args.job_id, args.job_id_secondary)) # Print the table. aggs = cast.deep_get(key_res, "aggregations") if aggs is not None: max_width = len("Field") for agg in aggs: max_width = max(max_width, len(agg)) print("{0:>{1}} | {2: >14} | {3: >14} | {4: >14} | {5: >14} | Count". format("Field", max_width, "Min", "Max", "Average", "Std Dev")) print_fmt = "{0: >{1}} | {2:>14.3f} | {3:>14.3f} | {4:>14.3f} | {5:>14.3f} | {6}" print_str = "{0: >{1}} | {2:>14.3} | {3:>14.3} | {4:>14.3} | {5:>14.3} | {6}" for agg in aggs: try: print( print_fmt.format(agg, max_width, aggs[agg]["min"], aggs[agg]["max"], aggs[agg]["avg"], aggs[agg]["std_deviation"], aggs[agg]["count"])) except ValueError: continue except KeyError: continue #print matrix stats if args.correlation: print("\n{0}".format("=" * 80)) print("Field Correlations:") stat_fields = aggs["statistics"].get("fields", []) for stat in stat_fields: name = stat["name"] print("\n{0}:".format(name)) correlation = stat["correlation"] corr_d = sorted(correlation.items(), key=operator.itemgetter(1)) for field in corr_d: if field[0] != name: print(" {0} : {1}".format(field[0], field[1])) else: print("No aggregations were found.") return 0
def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description='''A tool for finding jobs running during the specified time range on a specified node.''') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".') parser.add_argument( '--starttime', metavar='YYYY-MM-DDTHH:MM:SS', dest='starttime', default=None, help='A timestamp representing the beginning of the absolute range to look for failed jobs, if not set no lower bound will be imposed on the search.') parser.add_argument( '--endtime', metavar='YYYY-MM-DDTHH:MM:SS', dest='endtime', default=None, help='A timestamp representing the ending of the absolute range to look for failed jobs, if not set no upper bound will be imposed on the search.') parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None, help='A list of hostnames to filter the results to.') parser.add_argument( '-s', '--size', metavar='size', dest='size', default=1000, help='The number of results to be returned. (default=1000)') args = parser.parse_args() # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target == None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 (range, match_min) = cast.build_time_range(args.starttime, args.endtime) bool_query={ "should" : range, "minimum_should_match" : match_min } if args.hosts: bool_query["must"] = { "match" : { "data.compute_nodes" : { "query" : " ".join(args.hosts) } } } body={ "query" : { "bool" : bool_query }, "_source" : [ "data.allocation_id", "data.primary_job_id", "data.user_id", "data.user_name", "data.secondary_job_id", "data.begin_time", "data.history.end_time"], "size": args.size } json = JSONSerializer() # Open a connection to the elastic cluster. es = Elasticsearch( args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60 ) # Execute the query on the cast-allocation index. tr_res = es.search( index="cast-allocation", body=body ) # Get Hit Data hits = cast.deep_get(tr_res, "hits", "hits") total_hits = cast.deep_get(tr_res, "hits","total") hits_displayed= len(hits) print("# Search found {0} jobs running, displaying {1} jobs:\n".format(total_hits, len(hits))) # Display the results of the search. if hits_displayed > 0: print_fmt="{5: <10} | {0: >13} | {1: >12} | {2: <14} | {3: <26} | {4: <26}" print(print_fmt.format("Allocation ID", "Prim. Job ID", "Second. Job ID", "Begin Time", "End Time", "User Name")) hits.sort(key=lambda x: cast.deep_get(x,"_source","data","allocation_id"), reverse=False) for hit in hits: data=cast.deep_get(hit, "_source", "data") if data: print(print_fmt.format( data.get("allocation_id"), data.get("primary_job_id"), data.get("secondary_job_id"), data.get("begin_time"), cast.deep_get(data, "history","end_time"), data.get("user_name"))) return 0
def main(args): # Specify the arguments. parser = argparse.ArgumentParser( description= '''A tool for finding when a job was running through use of the big data store.''' ) parser.add_argument('-a', '--allocationid', metavar='int', dest='allocation_id', default=-1, help='The allocation ID of the job.') parser.add_argument('-j', '--jobid', metavar='int', dest='job_id', default=-1, help='The job ID of the job.') parser.add_argument('-s', '--jobidsecondary', metavar='int', dest='job_id_secondary', default=0, help='The secondary job ID of the job (default : 0).') parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, help= 'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".' ) parser.add_argument('-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None, help='A list of hostnames to filter the results to ') parser.add_argument( '-v', '--verbose', action='store_true', help='Displays additional details about the job in the output.') args = parser.parse_args() # If allocation_id or job_id wasn't specified, printing help on failure. if args.allocation_id == -1 and args.job_id == -1: parser.print_help() print( "Missing either allocationid or jobid. Require 1 of these fields to search." ) return 2 # If the target wasn't specified check the environment for the target value, printing help on failure. if args.target is None: if TARGET_ENV in os.environ: args.target = os.environ[TARGET_ENV] else: parser.print_help() print("Missing target, '%s' was not set." % TARGET_ENV) return 2 # set up the fields for the search operation. fields = cast.SEARCH_JOB_FIELDS if args.verbose: fields.append("data.compute_nodes") # Open a connection to the elastic cluster, if this fails is wrong on the server. es = Elasticsearch(args.target, sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=60) # Execute the query on the cast-allocation index. try: tr_res = cast.search_job(es, args.allocation_id, args.job_id, args.job_id_secondary, fields=fields) except exceptions.RequestError as e: cast.print_request_error(e) return 4 total_hits = cast.deep_get(tr_res, "hits", "total") print("# Found {0} matches for specified the job.".format(total_hits)) if total_hits == 0: print("# Sorry. Could not find any matching results.") return 0 if total_hits != 1: print( "# This implementation only supports queries where the hit count is equal to 1." ) return 3 # TODO make this code more fault tolerant hits = cast.deep_get(tr_res, "hits", "hits") if len(hits) > 0: tr_data = cast.deep_get(hits[0], "_source", "data") date_format = '%Y-%m-%d %H:%M:%S.%f' print_format = '%Y-%m-%d.%H:%M:%S:%f' search_format = '"yyyy-MM-dd HH:mm:ss:SSS"' start_time = datetime.strptime(tr_data["begin_time"], '%Y-%m-%d %H:%M:%S.%f') start_time = '{0}'.format(start_time.strftime(print_format)[:-3]) # If a history is present end_time is end_time, otherwise it's now. if "history" in tr_data: end_time = datetime.strptime(tr_data["history"]["end_time"], date_format) end_time = '{0}'.format(end_time.strftime(print_format)[:-3]) else: end_time = "now" print("\nallocation-id: {0}".format(tr_data["allocation_id"])) print("job-id: {0} - {1}".format(tr_data["primary_job_id"], tr_data["secondary_job_id"])) print("user-name: {0} \nuser-id: {1}".format(tr_data["user_name"], tr_data["user_id"])) print("begin-time: {0} \nend-time: {1}".format(start_time, end_time)) if args.verbose: nodes = tr_data.get("compute_nodes", []) print('hostnames: ') for node in nodes: print(" - {0}".format(node))