def wrapper(*fargs, **fkwargs): # Typically the task folder is the index, so we will create # indices that start with decorator-<task> result = None # The watcher is required, first keyword argument if not args: bot.error( "A watcher name is required for the psutils decorator.") return result # Get a watcher to save results to watcher = get_watcher(args[0], create=kwargs.get("create", False)) # Start the function runner = ProcessRunner( seconds=kwargs.get("seconds", 3), skip=kwargs.get("skip", []), include=kwargs.get("include", []), only=kwargs.get("only", []), ) runner.run(func, *fargs, **fkwargs) result = runner.wait("monitor_pid_task") # Save results (finishing runs) - key is folder created name = kwargs.get("name", func.__name__) key = "decorator-psutils-%s" % name results = {key: runner.timepoints} watcher.finish_runs(results) # Return function result to the user return result
def export_runs(self, results, exporters): ''' export data retrieved to the set of exporters defined and active. maybe an export flag could be set to choose to run + export? ''' for name, result in results.items(): task = self.get_task(name, save=True) # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % name) # for a json, or a list of paths, ignore for now. elif not (task.params.get('save_as') == 'json' or os.path.exists(result[0])): for exporter in exporters: bot.debug('Exporting list to ' + exporter.name) exporter._save_text_list(name, result) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, ignore it. if not (os.path.exists(result)): exporter._save_text(result)
def clone_watcher(repo, base=None, name=None): """clone a watcher from Github (or other version control with git) meaning that we clone to a temporary folder, and then move to a new folder. By default, the user gets all tasks associated with the watcher, along with the git folder so that removing is also done with version control. Parameters ========== repo: the repository to clone base: the watchme base, defaults to $HOME/.watchme name: a new name for the watcher, if a rename is desired. """ if base is None: base = WATCHME_BASE_DIR # clone_watcher(repo=repo, base=args.base, name=extra) # STOPPED HERE - need to test this. # Validate the repository address if not re.search("^git@|http", repo): bot.exit("Please provide a valid url to git repository") # if the name is None, use the repo name if name is None: name = os.path.basename(repo) # Ensure we aren't overwriting dest = os.path.join(base, name) if os.path.exists(dest): bot.exit("%s already exists, choose a different watcher name." % name) clone_dest = get_tmpdir(prefix="watchme-clone", create=False) run_command("git clone %s %s" % (repo, clone_dest)) # Valid by default - will copy over if valid valid = True # Iterate over watchers watchers = os.listdir(clone_dest) for watcher in watchers: watcher = os.path.join(clone_dest, watcher) tasks = os.listdir(watcher) # Check that tasks include watchme.cfg for task in tasks: if not task.startswith("task"): continue task_folder = os.path.join(watcher, task) content = os.listdir(task_folder) if "watcher.cfg" not in content: bot.error("%s is missing a watcher.cfg" % task) valid = False break if valid: shutil.move(clone_dest, dest) if os.path.exists(clone_dest): shutil.rmtree(clone_dest)
def run(self): """run an isolated task, meaning no update or communication with the watcher. This will return the raw result. """ params = self.export_params() func = self.export_func() if func is not None: return func(**params) bot.error("Cannot find function.")
def _validate(self): """additional validation function, called by validate() of superclass. Here we assume all required self.params are included. If an parameter is found to be invalid, self.valid should be set to False """ # The url must begin with http if not self.params["url"].startswith("http"): bot.error("%s is not a valid url." % self.params["url"]) self.valid = False
def _validate(self): '''additional validation function, called by validate() of superclass. Here we assume all required self.params are included. If an parameter is found to be invalid, self.valid should be set to False ''' # The url must begin with http if not self.params['url'].startswith('http'): bot.error('%s is not a valid url.' % self.params['url']) self.valid = False
def get_url_selection(url, **kwargs): """select some content from a page dynamically, using selenium. Parameters ========== kwargs: a dictionary of key, value pairs provided by the user """ results = None selector = kwargs.get("selection", None) headers = get_headers(kwargs) if selector is None: bot.error("You must define the selection (e.g., [email protected]") return results # Does the user want to get text? get_text = False if kwargs.get("get_text") is not None: get_text = True # Are we searching for a regular expression in the result? regex = kwargs.get("regex") # Does the user want to get one or more attributes? attributes = kwargs.get("attributes", None) if attributes is not None: attributes = attributes.split(",") # User can pass a parameter like url_param_<name> # url_param_page=1,2,3,4,5,6,7,8,9 paramsets = get_params(kwargs) # Each is a dictionary of values results = [] for params in paramsets: # Get the page results += get_results( url=url, selector=selector, headers=headers, attributes=attributes, params=params, get_text=get_text, regex=regex, ) # No results if not results: results = None return results
def get_url_selection(url, **kwargs): '''select some content from a page dynamically, using selenium. Parameters ========== kwargs: a dictionary of key, value pairs provided by the user ''' results = None selector = kwargs.get('selection', None) headers = get_headers(kwargs) if selector == None: bot.error('You must define the selection (e.g., [email protected]') return results # Does the user want to get text? get_text = False if kwargs.get('get_text') != None: get_text = True # Does the user want to capture a certain value? regex = kwargs.get('regex') # Does the user want to get one or more attributes? attributes = kwargs.get('attributes', None) if attributes != None: attributes = attributes.split(',') # User can pass a parameter like url_param_<name> # url_param_page=1,2,3,4,5,6,7,8,9 paramsets = get_params(kwargs) # Each is a dictionary of values results = [] for params in paramsets: # Get the page results += get_results(url=url, selector=selector, headers=headers, attributes=attributes, params=params, get_text=get_text, regex=regex) # No results if len(results) == 0: results = None return results
def validate(self): """validate the parameters set for the Task. Exit if there are any errors. Ensure required parameters are defined, and have correct values. """ self.valid = True for param in self.required_params: if param not in self.params: bot.error("Missing required parameter: %s" % param) self.valid = False # Call subclass validation function self._validate()
def _write_to_pushgateway(self, result): ''' writes data to the pushgateway Parameters ========== result: the result object to save ''' g = Gauge(self.name.replace('-', ':'), '', registry=self.registry) g.set(result) try: push_to_gateway(self.params['url'], job='watchme', registry=self.registry) except: bot.error('An exception occurred while trying to export data using %s' % self.name)
def mkdir_p(path): """mkdir_p attempts to get the same functionality as mkdir -p Paramters ========= param path: the path to create. """ try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(path): pass else: bot.error("Error creating path %s, exiting." % path) sys.exit(1)
def _get_pid(name): '''used to get the pid of a process, by name Parameters ========== name: the name of the process to get. ''' try: pid = check_output(["pidof", name]) pid = pid.decode('utf-8').strip('\n').split(' ') if len(pid) > 1: bot.warning("More than one pid found for %s, using first." % name) pid = int(pid[0]) except CalledProcessError: bot.error("%s does not exist." % name) pid = None return pid
def getenv(variable_key, default=None, required=False, silent=True): """attempt to get an environment variable. If the variable is not found, None is returned. Parameters ========== variable_key: the variable name required: exit with error if not found silent: Do not print debugging information for variable """ variable = os.environ.get(variable_key, default) if variable is None and required: bot.error("Cannot find environment variable %s, exiting." % variable_key) sys.exit(1) if not silent and variable is not None: bot.verbose("%s found as %s" % (variable_key, variable)) return variable
def _get_params_dict(self, pairs): '''iterate through parameters, make keys lowercase, and ensure valid format. Parameters ========== pairs: a list of key@value pairs to set. ''' params = {} for pair in pairs: if "@" not in pair: bot.exit('incorrectly formatted param, must be key@value') key, value = pair.split('@', 1) key = key.lower() # All tasks are not allowed to have default params if key in WATCHME_NOTALLOWED_PARAMS: bot.error('%s is a default, not allowed setting by task.' % key) self.valid = False params[key] = value return params
def post_task(url, **kwargs): '''a simple task to use requests to post to. By default, we return json. Parameters ========== REQUIRED: url: a url to post to ''' results = [] # The json params can vary, but headers do not jsonlist = get_params(kwargs, key='json_param_') headers = get_headers(kwargs) # Loop through lists of json and headers for params in jsonlist: # Get the post response and proceed if successful response = requests.post(url, json=params, headers=headers) if response.status_code == 200: save_as = kwargs.get('save_as', 'json') # Returning the result as json will detect dictionary, and save json if save_as == "json": result = response.json() # Otherwise, we return text else: result = response.text else: bot.error("%s: %s" % (response.status_code, response.reason)) # Return None if no results found if len(results) == 0: results = None return results
def post_task(url, **kwargs): """a simple task to use requests to post to. By default, we return json. Parameters ========== REQUIRED: url: a url to post to """ results = [] # The json params can vary, but headers do not jsonlist = get_params(kwargs, key="json_param_") headers = get_headers(kwargs) # Loop through lists of json and headers for params in jsonlist: # Get the post response and proceed if successful response = requests.post(url, json=params, headers=headers) if response.status_code == 200: # Parse the response per the user's request result = parse_success_response(response, kwargs) results.append(result) else: bot.error("%s: %s" % (response.status_code, response.reason)) results = [x for x in results if x] # Return None if no results found if not results: results = None return results
def write_results(self, result, repo): '''an entrypoint function for a general task. By default, we parse results based on the result type. Any particular subclass of the TaskBase can modify or extend these functions. Parameters ========== result: the result object to parse repo: the repo base (watcher.repo) ''' files = [] # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % self.name) # multiple jsons save specified, regardless elif self.params.get('save_as') == 'jsons': bot.debug('Saving single list as multiple json...') files += self._save_json_list(result, repo) # json output is specified by the user or we find dict results elif self.params.get('save_as') == 'json' or isinstance( result[0], dict): bot.debug('Saving single list as one json...') files.append(self._save_json(result, repo)) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug('Found list of paths...') files += self._save_files_list(result, repo) # Finally, assume just writing text to file else: bot.debug('Saving content from list to file...') files += self._save_text_list(result, repo) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, just save to repository if os.path.exists(result): files.append(self._save_file(result, repo)) # Otherwise, it's a string that needs to be saved to file else: files.append(self._save_text(result, repo)) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(self._save_json(result, repo)) elif result == None: bot.error('Result for task %s is None' % self.name) elif hasattr(self, '_write_results'): return self._write_results(result) else: bot.error('Unsupported result format %s' % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] return files
def run(self, funcs, tasks): '''run will send a list of tasks, a tuple with arguments, through a function. the arguments should be ordered correctly. Parameters ========== funcs: the functions to run with multiprocessing.pool, a dictionary with lookup by the task name tasks: a dict of tasks, each task name (key) with a tuple of arguments to process ''' # Number of tasks must == number of functions assert (len(funcs) == len(tasks)) # Keep track of some progress for the user progress = 1 total = len(tasks) # if we don't have tasks, don't run if len(tasks) == 0: return # results will also have the same key to look up finished = dict() results = [] try: prefix = "[%s/%s]" % (progress, total) if self.show_progress: bot.show_progress(0, total, length=35, prefix=prefix) pool = multiprocessing.Pool(self.workers, init_worker) self.start() for key, params in tasks.items(): func = funcs[key] if not self.show_progress: bot.info('Processing task %s:%s' % (key, params)) result = pool.apply_async(multi_wrapper, multi_package(func, [params])) # Store the key with the result results.append((key, result)) while len(results) > 0: pair = results.pop() key, result = pair result.wait() if self.show_progress: bot.show_progress(progress, total, length=35, prefix=prefix) progress += 1 prefix = "[%s/%s]" % (progress, total) finished[key] = result.get() self.end() pool.close() pool.join() except (KeyboardInterrupt, SystemExit): bot.error("Keyboard interrupt detected, terminating workers!") pool.terminate() sys.exit(1) except Exception as e: bot.error(e) return finished
def finish_runs(self, results): '''finish runs should take a dictionary of results, with keys as the folder name, and for each, depending on the result type, write the result to file (or update file) and then commit to git. Parameters ========== results: a dictionary of tasks, with keys as the task name, and values as the result. ''' for name, result in results.items(): task_folder = os.path.join(self.repo, name) task = self.get_task(name, save=True) # Files to be added via Git after files = [] # Ensure that the task folder exists if not os.path.exists(task_folder): mkdir_p(task_folder) git_add(self.repo, task_folder) # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % name) # json output is specified elif task.params.get('save_as') == 'json': bot.debug('Saving single list as one json...') files.append(task._save_json(result, self.repo)) elif task.params.get('save_as') == 'json': bot.debug('Saving single list as multiple json...') files += task._save_json_list(result, self.repo) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug('Found list of paths...') files += task._save_files_list(result, self.repo) # Finally, assume just writing text to file else: bot.debug('Saving content from list to file...') files += task._save_text_list(result, self.repo) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, just save to repository if os.path.exists(result): files.append(task._save_file(result, self.repo)) # Otherwise, it's a string that needs to be saved to file else: files.append(task._save_text(result, self.repo)) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(task._save_json(result, self.repo)) elif result == None: bot.error('Result for task %s is None' % name) else: bot.error('Unsupported result format %s' % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] # Add files to git, and commit files.append(write_timestamp(repo=self.repo, task=name)) git_add(repo=self.repo, files=files) git_commit(repo=self.repo, task=self.name, message="ADD results %s" % name)
def write_results(self, result, repo): """an entrypoint function for a general task. By default, we parse results based on the result type. Any particular subclass of the TaskBase can modify or extend these functions. Parameters ========== result: the result object to parse repo: the repo base (watcher.repo) """ files = [] # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error("%s returned empty list of results." % self.name) # multiple jsons save specified, regardless elif self.params.get("save_as") == "jsons": bot.debug("Saving single list as multiple json...") files += self._save_json_list(result, repo) # json output is specified by the user or we find dict results elif self.params.get("save_as") == "json" or isinstance( result[0], dict): bot.debug("Saving single list as one json...") files.append(self._save_json(result, repo)) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug("Found list of paths...") files += self._save_files_list(result, repo) # Finally, assume just writing text to file else: bot.debug("Saving content from list to file...") files += self._save_text_list(result, repo) # Case 2. The result is a string elif isinstance(result, str): files = self._save_str_result(files, result, repo) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(self._save_json(result, repo)) elif result is None: bot.error("Result for task %s is None" % self.name) elif hasattr(self, "_write_results"): return self._write_results(result) # If it's unicode, try encoding, and then fail (repetitive) else: try: result = result.encode("utf-8") files = self._save_str_result(files, result, repo) except: bot.error("Unsupported result format %s" % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] return files