Пример #1
0
    def __init__(self, args):
        ''' Initializes the metrics tool.'''
        logger.debug("Enter ToolJobMetrics init")

        #: The user configurable input to the usecase.
        self.settings = Settings()

        self.settings.help_string = '''A tool for retrieving metrics from nodes that participated in the supplied job id.
   Uses the JobSettings, RemoteServerSettings and StatisticalSettings configuration modules.'''

        # self.settings.time
        self.settings.append_class(TimeSettings)
        # self.settings.job_info
        self.settings.append_class(
            JobSettings,
            ["no_hosts", "target_hostnames", "job_id", "secondary_job_id"])
        # self.settings.remote_server
        self.settings.append_class(RemoteServerSettings)
        # self.settings.statistics
        self.settings.append_class(StatisticalSettings)

        # Parse the arguments and config file.
        self.settings.parse_args(args)

        #: The payload object for connecting to the Log Analysis server (UnityPayload).
        self.payload = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool = CSMITool()

        try:
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"],
                self.settings.remote_server.access["userpass"])
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error(
                "Please verify that the remote_server section was properly configured."
            )

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error(
                    "The access setting MUST point to a separate configuration file."
                )

            sys.exit(1)

        logger.debug("Exit ToolJobMetrics init")
Пример #2
0
    def __init__(self, args):
        ''' Initializes the tool. '''
        
        logger.debug("Begin ToolJobsRunning init")
        
        #: The user configurable input to the usecase.
        self.settings = Settings( )

        self.settings.help_string = '''A tool for retrieving the jobs that were running during the specified time.
    The -v or --verbose flag will query the csm apis to aggregate more information
    about the jobs that were found to be running.'''

        # self.settings.time
        self.settings.append_class( TimeSettings )
        # self.settings.job_info
        self.settings.append_class( JobSettings, ["target_hostnames", "no_hosts"])
        # self.settings.remote_server
        self.settings.append_class( RemoteServerSettings )
        
        # Parse the arguments and config file.
        self.settings.parse_args(args)
         
        #: The Log Analysis payload for this tool (UnityPayload).
        self.payload          = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool        = csmi_tool.CSMITool()
        
        ''' Initialize the unity connection. '''
        try: 
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"], 
                self.settings.remote_server.access["userpass"]) 
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error("Please verify that the remote_server section was properly configured.")

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error("The access setting MUST point to a separate configuration file.")

            sys.exit(1)

        #: Tracks the jobs that were running in the specified time range (dict)
        self.job_alloc_map       = None

    
        logger.debug("End ToolJobsRunning init")
Пример #3
0
    def __init__(self, args):
        ''' Initializes the tool. '''

        logger.debug("Begin ToolJobRange init")

        #: The user configurable input to the usecase.
        self.settings = Settings( )

        self.settings.help_string = '''A tool for retrieving the start and end time of a job in the LogAnalysis Big Data Store.'''

        # self.settings.time
        self.settings.append_class( TimeSettings )
        # self.settings.job_info
        self.settings.append_class( JobSettings, ["target_hostnames", "no_hosts", "job_id", "secondary_job_id" ])
        # self.settings.remote_server
        self.settings.append_class( RemoteServerSettings )
        
        # Parse the arguments and config file.
        self.settings.parse_args(args)
         
        #: The Log Analysis payload for this tool (UnityPayload).
        self.payload          = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool        = CSMITool()

        #: The payload for the query (UnityPayload). 
        self.payload          = UnityPayload()

        # Initialize the unity connection. 
        try: 
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"], 
                self.settings.remote_server.access["userpass"]) 
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error("Please verify that the remote_server section was properly configured.")

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error("The access setting MUST point to a separate configuration file.")

            sys.exit(1)

        logger.debug("Exit ToolJobRange init")
Пример #4
0
    def __init__(self, args):
        ''' Initializes the tool. '''

        logger.debug("Begin ToolJobKeys init")

        #: The user configurable input to the usecase.
        self.settings = Settings()

        self.settings.help_string = '''A tool for determining the occurance rate of keywords during the run time of a job 
     in the syslog and mmfs logs. User supplied time ranges will be overriden by the 
     actual time range of the specified job.'''

        # self.settings.time
        self.settings.append_class(TimeSettings)
        # self.settings.job_info
        self.settings.append_class(JobSettings)
        # self.settings.remote_server
        self.settings.append_class(RemoteServerSettings)

        # Parse the arguments and config file.
        self.settings.parse_args(args)

        #: The payload for the query (UnityPayload).
        self.payload = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool = CSMITool()

        # Initialize the unity connection.
        try:
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"],
                self.settings.remote_server.access["userpass"])
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error(
                "Please verify that the remote_server section was properly configured."
            )

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error(
                    "The access setting MUST point to a separate configuration file."
                )

            sys.exit(1)

        #: Holds the jobs that were running in the specified time range (dict).
        self.job_id_map = None

        #: Overall start time for the job (datetime).
        self.job_start_time = None

        #: Overall end time for the job (datetime).
        self.job_end_time = None

        #: The allocation id, the user will generally not know this id (int).
        self.allocation_id = 0

        logger.debug("Exit ToolJobKeys init")
Пример #5
0
class ToolJobKeys(object):
    ''' Facilitates keyword searching against a Log Analysis Server.'''
    def __init__(self, args):
        ''' Initializes the tool. '''

        logger.debug("Begin ToolJobKeys init")

        #: The user configurable input to the usecase.
        self.settings = Settings()

        self.settings.help_string = '''A tool for determining the occurance rate of keywords during the run time of a job 
     in the syslog and mmfs logs. User supplied time ranges will be overriden by the 
     actual time range of the specified job.'''

        # self.settings.time
        self.settings.append_class(TimeSettings)
        # self.settings.job_info
        self.settings.append_class(JobSettings)
        # self.settings.remote_server
        self.settings.append_class(RemoteServerSettings)

        # Parse the arguments and config file.
        self.settings.parse_args(args)

        #: The payload for the query (UnityPayload).
        self.payload = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool = CSMITool()

        # Initialize the unity connection.
        try:
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"],
                self.settings.remote_server.access["userpass"])
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error(
                "Please verify that the remote_server section was properly configured."
            )

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error(
                    "The access setting MUST point to a separate configuration file."
                )

            sys.exit(1)

        #: Holds the jobs that were running in the specified time range (dict).
        self.job_id_map = None

        #: Overall start time for the job (datetime).
        self.job_start_time = None

        #: Overall end time for the job (datetime).
        self.job_end_time = None

        #: The allocation id, the user will generally not know this id (int).
        self.allocation_id = 0

        logger.debug("Exit ToolJobKeys init")

    def stat_keywords(self):
        ''' Determines the incidence rate of keywords and displays logs containing the
        keyword specified (if the verbose flag is set).'''

        logger.debug("Enter ToolJobKeys stat_keywords")

        # Get the time range
        try:
            self.job_id_map, self.job_start_time, self.job_end_time =  \
                unity_helper.find_job_time_range_csm(
                    self.payload,
                    self.settings.job_info.job_id,
                    self.settings.job_info.secondary_job_id,
                    self.settings.job_info.target_hostnames,
                    self.settings.time.date,
                    self.settings.time.days )
        except ValueError as e:
            logger.error("Unable find the job time range, error was: %s", e)
            return 1

        if self.job_id_map is None or len(self.job_id_map) == 0:
            if self.settings.job_info.target_hostnames is not None:
                logger.warning(
                    "No errors were detected, but jobid '{0}' was not found for targeted hosts: [{1}]. Please consult your settings and try again."
                    .format(self.settings.job_info.job_id,
                            ",".join(self.settings.job_info.target_hostnames)))

            else:
                logger.warning(
                    "No errors were detected, but jobid '{0}' was not found. Please consult your settings and try again."
                    .format(self.settings.job_info.job_id))

            logger.debug("Exit ToolJobKeys stat_keywords")
            return 1

        if self.settings.default.verbose:
            unity_helper.print_job_time_range(self.job_id_map,
                                              self.settings.job_info.job_id,
                                              self.job_start_time,
                                              self.job_end_time)

        # Cache the number of hosts.
        num_hosts = len(self.job_id_map)

        logger.debug("Building ToolJobKeys stat_keywords query")

        self.payload.initialize()
        self.payload.set_logsources("logSource", ["/syslog", "/mmfs"])
        self.payload.set_getAttributes(
            ["timestamp", "syslogHostname", "message"])
        self.payload.set_range_timestamp_filter(self.job_start_time,
                                                self.job_end_time)
        self.payload.set_term_facet("syslogHostname", num_hosts)

        # Build the repeated query string.
        query_string = " AND " + self.payload.scala_host_query(
            'syslogHostname', self.job_id_map)

        # Zero the keyword counts.
        for node in self.job_id_map:
            self.job_id_map[node]["keyword_counts"] = [0] * len(
                self.settings.job_info.keywords)

        logger.debug("stat_keywords query built: %s",
                     self.payload.get_json_payload())

        # Cache the baseline variables for the server communication loop.
        default_start = self.payload.get_start()
        keyword_index = 0

        # Map to store the verbose results for formatting properly.
        if self.settings.default.verbose:
            verbose_map = dict()

        logger.debug("Begin BDS communication")
        # Execute the search for each keyword.
        for keyword in self.settings.job_info.keywords:
            logger.debug("Gathering statistics about \'%s\' keyword", keyword)

            # Finalize the query.
            self.payload.set_query("\"" + keyword + "\"" + query_string)

            while self.payload.get_start() >= 0:

                logger.debug("Executing stat_keywords \'%s\' keyword query",
                             keyword)
                # Execute the actual query.
                json_response = json.loads(self.payload.post())

                # If the total results count is found in the response and the results
                # exceed the results returned increment the start point for the next iteration.
                # Else set the start to -1 so the execution knows not to iterate.
                self.payload.set_start(
                    self.payload.determine_start(json_response,
                                                 self.payload.get_start(),
                                                 self.payload.get_results()))

                # TODO Should facetResults et. all be moved to Constants?
                # get the statistics for each
                if 'facetResults' in json_response and \
                    'term_facet'  in json_response["facetResults"] and \
                    'counts'      in json_response["facetResults"]['term_facet']:

                    logger.debug("Counting for the \'%s\' keyword were found",
                                 keyword)

                    for count in json_response["facetResults"]['term_facet'][
                            'counts']:
                        self.job_id_map[count['term']]['keyword_counts'][keyword_index] = \
                            count['count']

                # XXX Maybe this should be output to a file?
                # If the verbose option is set cache the messages for output.
                if self.settings.default.verbose and "searchResults" in json_response:
                    logger.debug("Search results for the \'%s\' keyword were" +\
                        " found, gathering for verbose output", keyword)

                    verbose_map[keyword] = dict()
                    for entry in json_response["searchResults"]:
                        attributes = entry.get("attributes")

                        if attributes is None:
                            continue

                        hostname = attributes["syslogHostname"]
                        if hostname not in verbose_map[keyword]:
                            verbose_map[keyword][hostname] = []

                        if 'mmfsEventDescription' in attributes:
                            message = attributes['mmfsEventDescription']
                        elif "message" in attributes:
                            message = attributes["message"]

                        # TODO should this timestamp be formatted?
                        if "timestamp" in attributes:
                            message = attributes["timestamp"] + ": " + message

                        verbose_map[keyword][hostname].append(message)
                    ''' End for loop '''
            ''' End While Loop '''

            logger.debug("Done gathering statistics about \'%s\' keyword",
                         keyword)

            # Update the loop sentinels.
            keyword_index += 1
            self.payload.set_start(default_start)
        ''' End For Loop '''
        logger.debug("End BDS communication")

        logger.debug(
            "Keyword statistics gathering complete, outputing results")

        # Pretty Print.
        print("\nSearched from \n{0} to {1}".format(
            output_helpers.format_timestamp(self.job_start_time),
            output_helpers.format_timestamp(self.job_end_time)))
        print("\n{0} \nKeyword Statistics\n{1}".format(DIV_0, DIV_1))
        # Print the results to the console.
        for node in self.job_id_map:
            key_index = 0
            keyword_counts = " "
            for keyword in self.job_id_map[node]['keyword_counts']:
                keyword_counts += self.settings.job_info.keywords[key_index] +\
                    "=" + unicode(keyword) + " "
                key_index += 1
            print(node + keyword_counts)
        print(DIV_0 + "\n")

        if self.settings.default.verbose:
            for keyword in verbose_map:
                print("\n{0}\nContains Keyword: {1}\n{2}\n".format(
                    DIV_2, keyword, DIV_2))
                # TODO this might need to be tokenized.
                for host in verbose_map[keyword]:
                    print("\n" + DIV_2 + "\n" + host + ":\n" + DIV_2 + "\n")
                    for message in verbose_map[keyword][host]:
                        print message
                print(DIV_1)

        logger.debug("Exit ToolJobKeys stat_keywords")

        return 0
Пример #6
0
class ToolJobsRunning (object):
    ''' Contains the functions necessary to check for jobs running at a specified time on a remote LogAnalysis server.
    '''

    def __init__(self, args):
        ''' Initializes the tool. '''
        
        logger.debug("Begin ToolJobsRunning init")
        
        #: The user configurable input to the usecase.
        self.settings = Settings( )

        self.settings.help_string = '''A tool for retrieving the jobs that were running during the specified time.
    The -v or --verbose flag will query the csm apis to aggregate more information
    about the jobs that were found to be running.'''

        # self.settings.time
        self.settings.append_class( TimeSettings )
        # self.settings.job_info
        self.settings.append_class( JobSettings, ["target_hostnames", "no_hosts"])
        # self.settings.remote_server
        self.settings.append_class( RemoteServerSettings )
        
        # Parse the arguments and config file.
        self.settings.parse_args(args)
         
        #: The Log Analysis payload for this tool (UnityPayload).
        self.payload          = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool        = csmi_tool.CSMITool()
        
        ''' Initialize the unity connection. '''
        try: 
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"], 
                self.settings.remote_server.access["userpass"]) 
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error("Please verify that the remote_server section was properly configured.")

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error("The access setting MUST point to a separate configuration file.")

            sys.exit(1)

        #: Tracks the jobs that were running in the specified time range (dict)
        self.job_alloc_map       = None

    
        logger.debug("End ToolJobsRunning init")

    def find_jobs( self ):
        ''' Finds the jobs running at the specified time. If the verbose flag was
        specified a secondary query will be run. Please consult jobs_running_during
        for details regarding the query against the big data store.

        :returns int: Return Code
        '''
        rc = self.jobs_running_during( )

        if self.settings.default.verbose:
            self.verify_jobs_running()    
        
        self.print_jobs_running()

        return rc

    def jobs_running_during( self ):
        ''' Finds the jobs running at the specified time. Results are output to
        the console. Current iteration of the query works as follows:

        | Key:
        |   a - "--targettime"
        |   b - "--targettime" - "--days"
        |   - - "--days"
        |   | - start/end of scan
        |   ~ - Unexamined days/time
        |
        |    b        a
        | ~~~|<-------|~~~~~~~~~
        |
        | The query first establishes a time range to search for allocation creations/deletions.
        | Using the supplied "--targettime" and "--days" the start and end times are computed with
        | the end time being the target time. Using this time range and any nodes specified a filter
        | is generated to reduce the number of records returned.

        | A query is then sent to the Log Analysis server and the results are parsed.
        | IF create is found
        |     The status of the job (a Boolean) is combined with True on that hostname.
        | ELSE IF delete is found
        |    The status of the job (a Boolean) is combined with False on that hostname.
        | 
        | IF the status of the job cached locally is True
        |     The hostname had that job/allocation running at the target time.

        | Some notes:
        |     1. If the Job was created before the search window and ends after it will not be detected.
        |        a. "--days" should be viewed as a heuristic value of sorts (e.g. the maximum run time).
        |     2. If the Job was created, but not properly destroyed this will create a false positive!'''
        
        logger.debug("Enter ToolJobsRunning jobs_running_during")
        
        # Set the range of time to search in. If no time is specified, usenow for time.
        if self.settings.time.date:
            self.end_time = self.settings.time.date
        else:
            self.end_time = datetime.now()

        self.start_time = self.end_time - relativedelta(days=self.settings.time.days)
        
        # Build the REST POST payload.
        self.payload.initialize()
        self.payload.set_logsources("logSource","/syslog")
        self.payload.set_getAttributes(["syslogHostname","message","timestamp"])
        self.payload.set_range_timestamp_filter(self.start_time, self.end_time)
        self.payload.set_query(unity_helper.BDS_ALLOCATION_QUERY)
        
        # Build query string.
        if self.settings.job_info.target_hostnames is not None:
            self.payload.set_query( self.payload.scala_host_query( \
                "syslogHostname", self.settings.job_info.target_hostnames), "AND")
        
        # Reset the allocation mapping.
        self.job_alloc_map = dict()
        logger.debug("Enter ToolJobsRunning UnityConnection")

        while self.payload.get_start() >= 0:
            # Execute the actual query.
            json_response = json.loads( self.payload.post( ) )

            logger.info("Processing results: %d->%d out of %s", 
                self.payload.get_start(), 
                self.payload.get_start() + self.payload.get_results(),
                json_response.get("totalResults"))
        
            # Set up the start.        
            self.payload.set_start(
                self.payload.determine_start(
                    json_response,
                    self.payload.get_start(),
                    self.payload.get_results()))

            # If an error was found report it and exit.
            if "result" in json_response and\
                json_response["result"].get("status") == "failure":

                logger.error("Error occured in communication: %s",
                    json_response["result"].get("message"))
                continue

            # If the search results were found in the response payload, we can process the data.
            if UnityResponse.SEARCH_RESULTS in json_response:
                # Iterate over the search.
                for entry in json_response[UnityResponse.SEARCH_RESULTS]:
                    attributes = entry[UnityResponse.ATTRIBUTES]
                    # Cache the reused results.
                    search_result  = re.search( unity_helper.BDS_ALLOCATION_EXTRACTOR, 
                        attributes["message"] )
                    hostname       = attributes["syslogHostname"]
                    timestamp      = attributes["timestamp"]

                    if search_result is None:
                        logger.debug("Message didn't have allocation details.")
                        continue

                    alloc_id, al_type, job_id, sec_id = search_result.group(1,2,3,4)

                    if alloc_id not in self.job_alloc_map:
                        # If the allocation id hasn't be found yet create an object for it.
                        self.job_alloc_map[alloc_id] = {
                            "job_id"     : job_id,
                            "sec_id"     : sec_id,
                            "hostnames"  : {},
			                "active"     : 0
                        }
                 
                
                    if hostname not in self.job_alloc_map[alloc_id]["hostnames"]:
                        # If the hostname is not present add it and assume it is running.
                        self.job_alloc_map[alloc_id]["hostnames"][hostname] =  True  
                
                    if  al_type == unity_helper.BDS_ALLOCATION_BEGIN_KEY:
                        # If the begin was found.
                        self.job_alloc_map[alloc_id]["hostnames"][hostname] = True and  \
                            self.job_alloc_map[alloc_id]["hostnames"][hostname]
			            
                        self.job_alloc_map[alloc_id]["active"] += 1
			

                    elif al_type == unity_helper.BDS_ALLOCATION_END_KEY:
                        # end was found.
                        self.job_alloc_map[alloc_id]["hostnames"][hostname] = False and \
                            self.job_alloc_map[alloc_id]["hostnames"][hostname]
                        self.job_alloc_map[alloc_id]["active"] -= 1
       
        logger.debug("Exit ToolJobsRunning UnityConnection")
        
        logger.debug("Exit ToolJobsRunning jobs_running_during")

        # Clear out the inactive allocations. 
        inactive_allocations = []
        for alloc_id in self.job_alloc_map:
            if self.job_alloc_map[alloc_id]["active"] <= 0:
                inactive_allocations.append(alloc_id)

        for allocation in inactive_allocations:
            del self.job_alloc_map[allocation]
        
        return 0

    def verify_jobs_running( self ):
        ''' Verify that the job was running at the time stamp specified using csmi api
        queries. Determines any other nodes that participated in the job.'''
        logger.debug("Enter ToolJobsRunning verify_jobs_running")

        if self.job_alloc_map is None:
            logger.debug("Exit ToolJobsRunning verify_jobs_running. No jobs to verify")
            return 1

        tz_local = tz.tzlocal()

        for alloc_id in self.job_alloc_map:
            # Initialize the new metadata.
            allocation_map = self.job_alloc_map[alloc_id]
            allocation_map["in_db"]      = False
            allocation_map["verified"]   = False
            allocation_map["run_time"]   = "Not Found" 
            allocation_map["start_time"] = "Not Found"
            allocation_map["end_time"]   = "Not Found"
            allocation_map["other_nodes"]= [] 
            

            # Query the csm database, if it fails, continue to the next allocation. 
            try :
                allocation_dict =  self.csmi_tool.allocation_query(alloc_id)

                if allocation_dict is None:
                    logger.debug("Allocation %s was not found in the csm database", alloc_id)
                    continue
                
                allocation_map["in_db"] = True
            except Exception as e:
                logger.error("Allocation %s was not found in the csm database, error: %s",\
                    alloc_id, e )
                continue
            
            # Determine start time.
            job_start_time  = allocation_dict.get(csmi_tool.CSM_AL_BEGIN_TIME)

            # Set the end time, revert to now, if no end time is found. 
            history_dict    = allocation_dict.get(csmi_tool.CSM_AL_HISTORY)
            if history_dict is not None:
                job_end_time    = history_dict.get(csmi_tool.CSM_AL_END_TIME)
            else:
                job_end_time = None
            # Set the endtime to now andcheck if the tzinfo is none. 
            if job_end_time is None:
                job_end_time = datetime.now()
            
            # The timestamp is assumed to be local time if it's not set.
            if job_end_time.tzinfo is None:
                job_end_time = job_end_time.replace(tzinfo=tz_local)
            
            # Add time metadata to the allocation map.
            if job_start_time is not None:
                
                # The timestamp is assumed to be local time if it's not set.
                if self.end_time.tzinfo  is None:
                    self.end_time = self.end_time.replace(tzinfo=tz_local)

                # The timestamp is assumed to be local time if it's not set.
                if job_start_time.tzinfo is None:
                    job_start_time = job_start_time.replace(tzinfo=tz_local)
                
                allocation_map["verified"] = \
                    self.end_time >= job_start_time and \
                    self.end_time <= job_end_time
                
                allocation_map["run_time"]   = job_end_time - job_start_time 
                allocation_map["start_time"] = job_start_time
                allocation_map["end_time"]   = job_end_time
                    
            # Determine additional nodes that participated.
            found_hostnames = allocation_dict.get(csmi_tool.CSM_AL_COMPUTE_NODES)
            if found_hostnames is not None and \
                "hostnames" in self.job_alloc_map[alloc_id]:

                allocation_hostnames = self.job_alloc_map[alloc_id]["hostnames"]
                
                for hostname in found_hostnames:
                    if hostname not in allocation_hostnames:
                        allocation_map["other_nodes"].append(hostname)

        return 0
        
    def print_jobs_running( self ):
        ''' Print the jobs that were found to be running.
        Isolates the output from the business logic.'''

        logger.debug("Enter ToolJobsRunning print_jobs_running")

        print ("")
        print (DIV_0)
        print ("The following jobs were active on the following nodes at " 
            + output_helpers.format_timestamp(self.end_time))
        print ("AllocationID | JobID | SecondaryID | Active Hostnames")
        print (DIV_0)
        
        if self.job_alloc_map is None:
            logger.debug("Exit ToolJobsRunning print_jobs_running")
            return 

        line_limit = len(DIV_1)
        line_count = 0
        tab_count = 5
        tab = " " * tab_count
        # Print the results to the console.
        for alloc_id in self.job_alloc_map:
            active_hosts = 0
            output = alloc_id +  " | " + self.job_alloc_map[alloc_id]["job_id"] + " | " + \
                self.job_alloc_map[alloc_id]["sec_id"] + " | "
            line_count = len(output)     
 
            for host in self.job_alloc_map[alloc_id]["hostnames"]:
                # TODO simplify code.
                if self.job_alloc_map[alloc_id]["hostnames"][host] : 
                    temp_count =  (len(host) + 2)
                    line_count += temp_count
                    
                    if line_count > line_limit:
                        output += "\n" + tab
                        line_count = temp_count  + tab_count

                    output += host + ", "

            print (output[:output.rfind(', ')])

            if self.settings.default.verbose:
                print (DIV_2)
                print ("Found in Database: %s" % \
                        self.job_alloc_map[alloc_id].get("in_db"))
                print ("Time Verified    : %s" % \
                        self.job_alloc_map[alloc_id].get("verified"))
                print ("Running Time     : %s" % \
                        self.job_alloc_map[alloc_id].get("run_time"))
                print ("Start Time       : %s" % \
                    output_helpers.format_timestamp(
                        self.job_alloc_map[alloc_id].get("start_time")))
                print ("End Time         : %s" % \
                        output_helpers.format_timestamp(
                            self.job_alloc_map[alloc_id].get("end_time")))
                    
                others = self.job_alloc_map[alloc_id].get("other_nodes")
                if  others is not None:
                    print ("Additional Nodes : %s" % ", ".join(others))

            print (DIV_1)
            print ("")

        print (DIV_0)

        logger.debug("Exit ToolJobsRunning print_jobs_running")
Пример #7
0
class ToolJobRange (object):
    ''' Class collecting Job Time Range utility. '''

    def __init__(self, args):
        ''' Initializes the tool. '''

        logger.debug("Begin ToolJobRange init")

        #: The user configurable input to the usecase.
        self.settings = Settings( )

        self.settings.help_string = '''A tool for retrieving the start and end time of a job in the LogAnalysis Big Data Store.'''

        # self.settings.time
        self.settings.append_class( TimeSettings )
        # self.settings.job_info
        self.settings.append_class( JobSettings, ["target_hostnames", "no_hosts", "job_id", "secondary_job_id" ])
        # self.settings.remote_server
        self.settings.append_class( RemoteServerSettings )
        
        # Parse the arguments and config file.
        self.settings.parse_args(args)
         
        #: The Log Analysis payload for this tool (UnityPayload).
        self.payload          = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool        = CSMITool()

        #: The payload for the query (UnityPayload). 
        self.payload          = UnityPayload()

        # Initialize the unity connection. 
        try: 
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"], 
                self.settings.remote_server.access["userpass"]) 
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error("Please verify that the remote_server section was properly configured.")

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error("The access setting MUST point to a separate configuration file.")

            sys.exit(1)

        logger.debug("Exit ToolJobRange init")

    def find_job_time_range(self):
        ''' Find the time range of the specified job id.'''
        
        logger.debug("Enter ToolJobRange find_job_time_range")

        # Get the time range and exit if a value error was thrown, 
        # logging the error to the error log.
        try:
            job_id_map, start_time, end_time =  \
                unity_helper.find_job_time_range_csm(
                    self.payload,
                    self.settings.job_info.job_id,
                    self.settings.job_info.secondary_job_id,
                    self.settings.job_info.target_hostnames,
                    self.settings.time.date,
                    self.settings.time.days )
        except ValueError as e:
            logger.error(e)
            logger.debug("Exit ToolJobRange find_job_time_range")
            return 1

        if job_id_map is None or len(job_id_map) == 0 : 
            if self.settings.job_info.target_hostnames is not None:
                logger.warning("No errors were detected, but jobid '{0}' was not found for targeted hosts: [{1}]. Please consult your settings and try again.".format( 
                    self.settings.job_info.job_id,
                    ",".join(self.settings.job_info.target_hostnames ) ) )

            else:
                logger.warning("No errors were detected, but jobid '{0}' was not found. Please consult your settings and try again.".format( 
                    self.settings.job_info.job_id) )

            logger.debug("Exit ToolJobRange find_job_time_range")
            return 1

        print("")
        rc = unity_helper.print_job_time_range(
            job_id_map, 
            self.settings.job_info.job_id, 
            start_time, 
            end_time)
        
        logger.debug("Exit ToolJobRange find_job_time_range")
        return rc
Пример #8
0
class ToolJobMetrics(object):
    ''' A tool for aggregating metrics from nodes that participated in the specified job id over the job execution time.'''
    def __init__(self, args):
        ''' Initializes the metrics tool.'''
        logger.debug("Enter ToolJobMetrics init")

        #: The user configurable input to the usecase.
        self.settings = Settings()

        self.settings.help_string = '''A tool for retrieving metrics from nodes that participated in the supplied job id.
   Uses the JobSettings, RemoteServerSettings and StatisticalSettings configuration modules.'''

        # self.settings.time
        self.settings.append_class(TimeSettings)
        # self.settings.job_info
        self.settings.append_class(
            JobSettings,
            ["no_hosts", "target_hostnames", "job_id", "secondary_job_id"])
        # self.settings.remote_server
        self.settings.append_class(RemoteServerSettings)
        # self.settings.statistics
        self.settings.append_class(StatisticalSettings)

        # Parse the arguments and config file.
        self.settings.parse_args(args)

        #: The payload object for connecting to the Log Analysis server (UnityPayload).
        self.payload = UnityPayload()

        #: Exposes the CSM CLI to python without needing to use subprocess.
        self.csmi_tool = CSMITool()

        try:
            self.payload.create_connection(
                self.settings.remote_server.construct_uri(),
                self.settings.remote_server.access["userid"],
                self.settings.remote_server.access["userpass"])
        except Exception as e:
            # If the login fails, exit the system with a 1.
            logger.error(e)
            logger.error(
                "Please verify that the remote_server section was properly configured."
            )

            logger.error("Error Code 1 \nSettings Used:\n{0}".format(
                self.settings.get_values_string()))

            if self.settings.remote_server.access is None:
                logger.error(
                    "The access setting MUST point to a separate configuration file."
                )

            sys.exit(1)

        logger.debug("Exit ToolJobMetrics init")

    def get_job_metrics(self):
        ''' Gets the metrics from the Log Analysis server during the job execution on the specified nodes.'''
        logger.debug("Enter ToolJobMetrics get_job_metrics")

        # Get the time range and exit if a value error was thrown, logging the error to the error log.
        try:
            job_id_map, start_time, end_time =  \
                unity_helper.find_job_time_range_csm(
                    self.payload,
                    self.settings.job_info.job_id,
                    self.settings.job_info.secondary_job_id,
                    self.settings.job_info.target_hostnames,
                    self.settings.time.date,
                    self.settings.time.days )
        except ValueError as e:
            logger.error(e)
            return 1

        if job_id_map is None or len(job_id_map) == 0:
            if self.settings.job_info.target_hostnames is not None:
                logger.warning(
                    "No errors were detected, but jobid '{0}' was not found for targeted hosts: [{1}]. Please consult your settings and try again."
                    .format(self.settings.job_info.job_id,
                            ",".join(self.settings.job_info.target_hostnames)))

            else:
                logger.warning(
                    "No errors were detected, but jobid '{0}' was not found. Please consult your settings and try again."
                    .format(self.settings.job_info.job_id))

            logger.debug("Exit ToolJobMetrics get_job_metrics")
            return 1

        if self.settings.default.verbose:
            unity_helper.print_job_time_range(job_id_map,
                                              self.settings.job_info.job_id,
                                              start_time, end_time)

        if job_id_map is not None:
            keys = job_id_map.keys()
        else:
            keys = []

        # Get the "filtered" metrics
        try:
            ip_addrs = self.modify_unity_payload(
                self.payload, start_time, end_time, keys,
                self.settings.statistics.log_sources,
                self.settings.statistics.log_tags)
        except ValueError as e:
            logger.error(e)
            return 1

        metrics, metadata = self.get_metrics_unity(
            self.payload, self.settings.statistics.log_source_details, keys,
            ip_addrs)

        self.print_metrics(metrics, metadata, start_time, end_time,
                           self.settings.job_info.job_id,
                           self.settings.statistics.stat_options)

        # Get the "unfiltered" metrics
        ip_addrs = self.modify_unity_payload(
            self.payload,
            start_time,
            end_time,
            log_sources=self.settings.statistics.log_sources_all,
            tags=self.settings.statistics.log_tags_all)

        metrics, metadata = self.get_metrics_unity(
            self.payload, self.settings.statistics.log_source_details)

        self.print_metrics(metrics, metadata, start_time, end_time,
                           self.settings.job_info.job_id,
                           self.settings.statistics.stat_options)

        return 0

    @staticmethod
    def modify_unity_payload(payload,
                             start_time,
                             end_time,
                             nodes=None,
                             log_sources=None,
                             tags=None):
        ''' Modifies the supplied payload for a metrics query. Executes initialize_payload() before populating the payload for the metrics query.

            :param UnityPayload payload: A payload object with an existing connection to a Log Analysis Server
            :param datetime start_time: The start of the range to aggregate metrics for.
            :param datetime end_time: The end of the range to aggregate metrics for.
            :param list nodes: The nodes to perform the statistical analysis on.
            :param list log_sources: A collection of Log Analysis logSource values to search in the query.
            :param list tags: A collection of Log Analysis tags to search in the query.
            :returns: A maping of ip to ip address.
            :rtype: dict'''
        payload.initialize()

        # Set the log sources.
        append = False
        if log_sources is not None:
            payload.set_logsources("logSource", log_sources)
            append = True

        if tags is not None:
            payload.set_logsources("tags", tags, append)

        ip_addrs = None
        if nodes is not None:
            # Get the ip address mapping.
            ip_addrs = ToolJobMetrics.map_ip(nodes)
            if ip_addrs is not None:
                payload.set_query(
                    payload.scala_host_query(None, (nodes + ip_addrs.keys())))

        # Construct the payload.
        payload.set_range_timestamp_filter(start_time, end_time)

        return ip_addrs

    @staticmethod
    def get_metrics_unity(payload, sources, nodes=None, ip_addrs=None):
        ''' Queries a LogAnalysis server and performs metrics analysis on the results.

            :param UnityPayload payload: A payload object that has been configured through modify_unity_payload with an active connection to a LogAnalysis server.
            :param dict sources: A dictionary containing the hostname and fields for log sources.
            :param list nodes: A collection of hostnames/ip addresses
            :return: A dictionary of metric data and a MetricsMetadata object.
            :rtype: dict, MetricsMetadata'''

        # Initialize the metadata and metrics objects.
        metadata_objects = dict()
        metrics = {"global": {}}
        filter_on_nodes = nodes is not None

        if filter_on_nodes:
            for node in nodes:
                metrics[node] = {}

        # Query the server
        while payload.get_start() >= 0:
            json_response = json.loads(payload.post())

            payload.set_start(payload.determine_start(json_response))

            if UnityResponse.SEARCH_RESULTS not in json_response:
                continue

            for entry in json_response[UnityResponse.SEARCH_RESULTS]:
                # Get and cache the data source for the entry.
                attributes = entry.get(UnityResponse.ATTRIBUTES)
                data_source = attributes.get(UnityResponse.DATA_SOURCE)

                if data_source not in metadata_objects:
                    keys = dict()

                    if sources is not None and \
                        data_source in sources:
                        keys = sources[data_source]

                    # Parameter expansion is great - John Dunham
                    metadata_objects[data_source] = \
                        MetricsMetadata( attributes, **keys )

                    for metric in metrics:
                        metrics[metric][data_source] = \
                            metadata_objects[data_source].build_metric_dict()

                # Resolve the hostname this entry belongs to.
                hostname = attributes.get(
                    metadata_objects[data_source].hostname)
                if filter_on_nodes and (ip_addrs is not None
                                        and hostname in ip_addrs):
                    hostname = ip_addrs[hostname]

                # Build metrics object for the hostname specified in the entry.
                if hostname not in metrics:
                    metrics[hostname] = {}
                if data_source not in metrics[hostname]:
                    metrics[hostname][data_source] = \
                        metadata_objects[data_source].build_metric_dict()

                try:
                    if metadata_objects[data_source].timestamp is None:
                        logger.debug("%s data source timestamp was not set")
                        continue
                        # FIXME give the user a choice?
                    # Resolve the timestamp.
                    timestamp = attributes.get(
                        metadata_objects[data_source].timestamp)
                    last_index = timestamp.rfind(':')
                    timestamp = timestamp[:last_index] + "." + timestamp[
                        last_index + 1:]
                    metric_time = dateutil.parser.parse(timestamp)

                    # Update the count.
                    metric_index = metrics[hostname][data_source]["count"]
                    metrics[hostname][data_source]["count"] += 1
                    metrics["global"][data_source]["count"] += 1

                    # Cache the timestamp.
                    raw_index = metadata_objects[data_source].get_raw_index(
                        "timestamp")
                    metrics[hostname][data_source]["raw"][raw_index].insert(
                        metric_index, metric_time)

                except Exception as e:
                    logger.warning(
                        "Error detected when caching the timestamp for this entry: %s",
                        e)
                    continue

                for attribute in attributes:
                    if not metadata_objects[data_source].field_exists(
                            attribute):
                        continue

                    raw_index = metadata_objects[data_source].get_raw_index(
                        attribute)
                    if raw_index == 0:
                        continue

                    try:
                        value = float(attributes[attribute])
                    except ValueError:
                        value = 0.0

                    metrics[hostname][data_source]["raw"][raw_index].insert(
                        metric_index, value)

                    metrics[hostname][data_source]["sum"][raw_index] += value
                    metrics["global"][data_source]["sum"][raw_index] += value

                    metrics[hostname][data_source]["max"][raw_index] = \
                        max( value, metrics[hostname][data_source]["max"][raw_index] )

                    metrics[hostname][data_source]["min"][raw_index] = \
                        min( value, metrics[hostname][data_source]["min"][raw_index] )
        #==================================================================================

        # Compute total metrics
        for data_source in metrics["global"]:
            global_count = metrics["global"][data_source]["count"]

            for raw_index in range(1,
                                   metadata_objects[data_source].num_metrics):

                metrics["global"][data_source]["avg"][raw_index] = \
                    metrics["global"][data_source]["sum"][raw_index] / max(global_count, 1)

                variance_sum = 0
                #num_records  = 0
                for hostname in metrics:
                    if hostname == "global":
                        # TODO Is global std useful?
                        metrics[hostname][data_source]["std"][raw_index] = -1.0
                        continue

                    # If the datasource didn't aggregate any data for that metric, continue.
                    if data_source not in metrics[hostname]:
                        continue

                    metrics["global"][data_source]["max"][raw_index] = max(
                        metrics["global"][data_source]["max"][raw_index],
                        metrics[hostname][data_source]["max"][raw_index])

                    metrics["global"][data_source]["min"][raw_index] = min(
                        metrics["global"][data_source]["min"][raw_index],
                        metrics[hostname][data_source]["min"][raw_index])


                    local_avg = metrics[hostname][data_source]["sum"][raw_index]\
                        / max( metrics[hostname][data_source]["count"], 1)

                    metrics[hostname][data_source]["avg"][
                        raw_index] = local_avg
                    metrics[hostname][data_source]["std"][raw_index] = \
                        ToolJobMetrics.std(
                            metrics[hostname][data_source]["raw"][raw_index],
                            local_avg )

                    # For computing the average standard deviation of a field.
                    #num_records  += len(metrics[hostname][data_source]["raw"][raw_index])
                    #variance_sum += pow( metrics[hostname][data_source]["std"][raw_index], 2)

                # XXX I'm not sure if this is "correct" math - John Dunham ([email protected](
                #metrics["global"][data_source]["std"][raw_index] =\
                #   numpy.sqrt( variance_sum / variance_sum )

        return metrics, metadata_objects

    @staticmethod
    def print_metrics(metrics,
                      metadata,
                      start_time=None,
                      end_time=None,
                      job_id=None,
                      stat_options=["avg", "min", "max", "std"]):
        ''' Prints out the aggrgated metrics.
                
            :param dict metrics: The aggregated metrics.
            :param MetricsMetadata metadata: The metadata object for the metrics map.
            :param datetime start_time: The start time of the supplied job.
            :param datetime end_time: The end time of the supplied job.
            :param int job_id: The job id.
            :param list stat_options: Collection of metrics to diplay in the output.
        '''
        logger.debug("Enter ToolJobMetrics.print_metrics")
        # Output formatters
        DIV_HEADER = "=" * 5 + " {0} " + "=" * 5
        DIV_0 = "-" * 50
        DIV_1 = "=" * 50
        DIV_2 = "@" * 75

        # Header
        # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
        print("\n" + DIV_2)
        print("\n{0}\nDisplaying the following metrics:\n\t    {1}".format(
            DIV_1, ", ".join(stat_options)))

        if job_id is not None:
            print("Job ID:     {0}".format(job_id))

        if start_time is not None:
            print("Start Time: {0}".format(
                output_helpers.format_timestamp(start_time)))

        if end_time is not None:
            print("End Time:   {0}".format(
                output_helpers.format_timestamp(end_time)))

        print(DIV_1)
        # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

        # Generator expression
        headers = ",".join('{0: >12}'.format(stat)
                           for stat in stat_options) + ",   key"

        # Move the global metric to the end of the list.
        hostnames = [
            hostname for hostname in metrics.keys() if hostname != "global"
        ]
        hostnames.append("global")

        # Print out the metrics
        for hostname in hostnames:
            print("\n{0}\n{1}:\n".format(DIV_0, hostname))

            for data_source in metrics[hostname]:

                print(DIV_HEADER.format(data_source))

                body = [""] * len(metadata[data_source].headers)

                # Build the body
                for header_index, metric in enumerate(stat_options):
                    for body_index, field in enumerate(
                            metadata[data_source].headers):
                        stat_index = metadata[data_source].metrics.get(field)
                        try:
                            value = round(
                                metrics[hostname][data_source][metric]
                                [stat_index], 4)
                            if abs(value) == sys.float_info.max:
                                value = "NaN"
                        except:
                            value = "NaN"

                        body[body_index] += "{0: >12},".format(value)

                # Append the field name to the row.
                for body_index, field_name in enumerate(
                        metadata[data_source].headers):
                    body[body_index] += "   " + field_name

                print(headers)
                for stat in body:
                    print(stat)
                print("")

    logger.debug("Exit ToolJobMetrics.print_metrics")

    @staticmethod
    def std(values, average=None):
        ''' A passthrough to numpy.std.  Performs the standard deviation on the supplied values.

            :param list values: A list of float values to perform the standard deviation on.
            :param float average: The average for the standard deviation, currently unused.
            :returns: The Standard Deviation for the supplied values.
            :rtype: float'''
        logger.debug("Enter unity_metrics.std")
        count = len(values)

        if count < 1 or not (isinstance(values[0], float)
                             or isinstance(values[0], int)):
            logger.debug(
                "Exit unity_metrics.std, standard deviation not computed")
            return

        logger.debug("Exit unity_metrics.std")
        return numpy.std(values)

    @staticmethod
    def map_ip(hostnames):
        ''' Translate a list of hostnames to a maping of ip addresses to hostnames.

            :param list hostnames: A list of hostnames to map to ip address.
            :returns: A mapping of ip addresses to hostnames.
            :rtype: dict'''
        logger.debug("Entering unity_metrics.map_ip")

        ip_addrs = dict()
        for hostname in hostnames:
            # If the hostname is not in the /etc/hosts, don't crash the execution,
            # just don't add it to the list.
            try:
                ip = socket.gethostbyname(hostname)
                ip_addrs[ip] = hostname
            except socket.gaierror as e:
                logger.warning(
                    "Hostname \'%s\' was not found in /etc/hosts: %s",
                    hostname, e)
                pass

            logger.debug("Exiting unity_metrics.map_ip")
            return ip_addrs