def get_task(self, name): """get a particular task, based on the name. This is where each type of class should check the "type" parameter from the config, and import the correct Task class. Parameters ========== name: the name of the task to load """ self.load_config() task = None # Only sections that start with task- are considered tasks if name in self.config._sections and name.startswith("task"): # Task is an ordered dict, key value pairs are entries params = self.config._sections[name] # Get the task type (if removed, consider disabled) task_type = params.get("type", "") # If we get here, validate and prepare the task if task_type.startswith("url"): from .urls import Task elif task_type == "psutils": from .psutils import Task elif task_type == "results": from .results import Task elif task_type == "gpu": from .gpu import Task else: bot.exit("Type %s not properly set up in get_task" % task_type) # if not valid, will return None task = Task(name, params) return task
def main(args, extra): """activate one or more watchers""" # Required - will print help if not provided name = args.watcher[0] watcher = get_watcher(name, base=args.base, create=False) # If delete is true, remove entire watcher, only if not protected or frozen if args.delete: watcher.delete() else: # Exit if the user doesn't provide any tasks to remove if extra is None: bot.exit("Provide tasks to remove, or --delete for entire watcher.") for task in extra: # Remove the task, if it exists watcher.remove_task(task)
def _add_task(self, task, force=False, active='true'): '''add a new task to the watcher, meaning we: 1. Check first that the task doesn't already exist (if the task exists, we only add if force is set to true) 2. Validate the task (depends on the task) 3. write the task to the helper config file, if valid. Parameters ========== task: the Task object to add, should have a name and params and be child of watchme.tasks.TaskBase force: if task already exists, overwrite active: add the task as active (default "true") ''' self.load_config() if active not in ["true", "false"]: bot.exit('Active must be "true" or "false"') # Don't overwrite a section that already exists if task.name in self.config.sections(): if not force: bot.exit('%s exists, use --force to overwrite.' % task.name) self.remove_section(task.name, save=False) # Add the new section self.config[task.name] = task.export_params(active=active) self.print_section(task.name) self.save() # If the task folder doesn't exist, recreate it. task_folder = os.path.join(self.repo, task.name) if not os.path.exists(task_folder): mkdir_p(task_folder) git_add(self.repo, task.name) # Commit changes git_commit(repo=self.repo, task=self.name, message="ADD task %s" % task.name)
def get_decorator(self, name): '''instantiate a task object for a decorator. Decorators must start with "decorator-" and since they are run on the fly, we don't find them in the config. Parameters ========== name: the name of the task to load ''' task = None # Only psutils has decorators if name.startswith('decorator-psutils'): from .psutils import Task else: bot.exit('Type %s is not recognized in get_decorator' % name) task = Task(name) return task
def _get_params_dict(self, pairs): '''iterate through parameters, make keys lowercase, and ensure valid format. Parameters ========== pairs: a list of key@value pairs to set. ''' params = {} for pair in pairs: if "@" not in pair: bot.exit('incorrectly formatted param, must be key@value') key, value = pair.split('@', 1) key = key.lower() # All tasks are not allowed to have default params if key in WATCHME_NOTALLOWED_PARAMS: bot.error('%s is a default, not allowed setting by task.' % key) self.valid = False params[key] = value return params
def add_task(self, task, task_type, params, force=False, active="true"): """add a task, meaning ensuring that the type is valid, and that the parameters are valid for the task. Parameters ========== task: the Task object to add, should have a name and params and be child of watchme.tasks.TaskBase task_type: must be in WATCHME_TASK_TYPES, meaning a client exists params: list of parameters to be validated (key@value) force: if task already exists, overwrite active: add the task as active (default "true") """ # Check again, in case user calling from client if not task.startswith("task"): bot.exit('Task name must start with "task" (e.g., task-reddit)') # Ensure it's a valid type if task_type not in WATCHME_TASK_TYPES: bot.exit("%s is not a valid type: %s" % WATCHME_TASK_TYPES) # Validate variables provided for task if task_type.startswith("url"): from .urls import Task elif task_type == "psutils": from .psutils import Task elif task_type == "results": from .results import Task elif task_type == "gpu": from .gpu import Task else: bot.exit("task_type %s not properly added to Watcher" % task_type) # Convert list to dictionary params = self._get_params_dict(params) # Creating the task will validate parameters newtask = Task(task, params=params) # Exit if the new task is not valid if not newtask.valid: bot.exit("%s is not valid, will not be added." % task) # Write to file (all tasks get active = True added, and type) self._add_task(newtask, force, active)
def run(self, regexp=None, parallel=True, test=False, show_progress=True): '''run the watcher, which should be done via the crontab, including: - checks: the instantiation of the client already ensures that the watcher folder exists, and has a configuration, and it loads. - parse: parse the tasks to be run - start: run the tasks that are defined for the watcher. - finish: after completion, commit to the repository changed files Parameters ========== regexp: if supplied, the user wants to run only tasks that match a particular pattern parallel: if True, use multiprocessing to run tasks (True) each watcher should have this setup ready to go. test: run in test mode (no saving of results) show_progress: if True, show progress bar instead of task information (defaults to True) ''' # Step 0: Each run session is given a fun name run_id = RobotNamer().generate() # Step 1: determine if the watcher is active. if self.is_active() == False and test is False: bot.exit('Watcher %s is not active.' % self.name) # Step 2: get the tasks associated with the run, a list of param dicts tasks = self.get_tasks() # Step 3: Run the tasks. This means preparing a list of funcs/params, # and then submitting with multiprocessing results = self.run_tasks(tasks, parallel, show_progress) # Finally, finish the runs. if test is False: self.finish_runs(results) else: # or print results to the screen print(json.dumps(results, indent=4))
def main(args, extra): """list installed watchers""" if args.watchers is True: list_watcher_types() # Otherwise, we are listing installed watchers and tasks else: # If no watchers provided, list the watchers if extra is None: get_watchers(args.base) # One argument is the name of a watcher elif len(extra) == 1: list_watcher(extra[0], args.base) # Two arguments must be a watcher and task elif len(extra) == 2: list_task(extra[0], extra[1], args.base) else: bot.exit("Please provide none or all of <watcher> <task> to list.")
def _set_base(self, base=None, create=False): ''' set the base for the watcher, ensuring that it exists. Parameters ========== base: the base folder of watcher repos. Uses $HOME/.watchme default create: create the watcher if it doesn't exist (default is False) ''' if base is None: base = WATCHME_BASE_DIR # Does the watcher exist? self.base = base self.repo = os.path.join(self.base, self.name) self.configfile = os.path.join(self.repo, 'watchme.cfg') # If the watcher doesn't exist and we need to create: if not os.path.exists(self.repo) or not os.path.exists(self.configfile): if create is True: create_watcher(self.name) else: bot.exit('Watcher %s does not exist. Use watchme create.' % self.name)
def remove_task(self, task): '''remove a task from the watcher repo, if it exists, and the watcher is not frozen. Parameters ========== task: the name of the task to remove ''' if self.get_section(task) is not None: if self.is_frozen(): bot.exit('watcher is frozen, unfreeze first.') self.remove_section(task) # If the task has a folder, remove the entire thing repo = os.path.join(self.repo, task) if os.path.exists(repo): shutil.rmtree(repo) bot.info('%s removed successfully.' % task) git_commit(self.repo, self.name, "REMOVE task %s" % task) else: bot.warning('Task %s does not exist.' % task)
def get_decorator(self, name): """instantiate a task object for a decorator. Decorators must start with "decorator-" and since they are run on the fly, we don't find them in the config. Parameters ========== name: the name of the task to load """ task = None # Only psutils and gpu have decorators if name.startswith("decorator-psutils"): from .psutils import Task elif name.startswith("decorator-gpu"): from .gpu import Task else: bot.exit("Type %s is not recognized in watchers.get_decorator" % name) task = Task(name) return task
def main(args, extra): """edit the configuration for a watcher task""" # Required - will print help if not provided name = args.watcher[0] action = args.action[0] task = args.task[0] # Get the watcher (exits if doesn't exist) watcher = get_watcher(name, base=args.base) # Exit if the user doesn't provide a time if extra is None: bot.exit("Please provide one or more items to %s" % action) key = extra[0] value = None if action in ["add", "update"]: if len(extra) != 2: bot.exit("You must do watchme <watcher> add <key> <value>") value = extra[1] # Ensure the task exists watcher.edit_task(task, action, key, value)
def get_task(self, name, save=False): '''get a particular task, based on the name. This is where each type of class should check the "type" parameter from the config, and import the correct Task class. Parameters ========== name: the name of the task to load save: if saving, will be True ''' self.load_config() task = None # Only sections that start with task- are considered tasks if name in self.config._sections and name.startswith('task'): # Task is an ordered dict, key value pairs are entries params = self.config._sections[name] # Get the task type (if removed, consider disabled) task_type = params.get('type', '') # If we get here, validate and prepare the task if task_type.startswith("url"): from .urls import Task elif task_type == 'psutils': from .psutils import Task else: bot.exit('Type %s not properly set up in get_task' % task_type) # if not valid, will return None task = Task(name, params, _save=save) return task
def _active_status(self, status='true', name=None): '''a general function to change the status, used by activate and deactivate. Parameters ========== status: must be one of true, false name: if not None, we are deactivating a task (not the watcher) ''' # Load the configuration, if not loaded self.load_config() if name is None: name = 'watcher' # Cut out early if section not in config if name not in self.config._sections: bot.exit('%s is not a valid task or section' % name) if status not in ['true', 'false']: bot.exit('status must be true or false.') # Update the status and alert the user self.set_setting(name, 'active', status) self.save() # Return the message for the commit message = "ACTIVE" if status == "false": message = "DEACTIVATE" # Add the task name if name is not None: message = "%s task %s" % (message, name) bot.info('[%s|%s] active: %s' % (name, self.name, status)) return message
def main(args, extra): """export temporal data for a watcher """ # Required - will print help if not provided name = args.watcher[0] task = args.task[0] filename = args.filename[0] if not task.startswith("task") and not task.startswith("decorator"): example = "watchme export watcher task-reddit result.txt" bot.exit('Task name must start with "task" or "decorator": %s' % example) # Use the output file, or a temporary file out = args.out # Get the watcher to interact with, must already exist watcher = get_watcher(name, base=args.base, create=False) if out is not None: if os.path.exists(out) and args.force is False: bot.exit("%s exists! Use --force to overwrite." % out) # Export the data to file result = watcher.export_dict(task=task, filename=filename, name=name, export_json=args.json, base=args.base) if result is not None: if out is None: print(json.dumps(result, indent=4)) else: write_json(result, out) bot.info("Result written to %s" % out)
def main(args, extra): '''activate one or more watchers ''' # Required - will print help if not provided name = args.watcher[0] task = args.task[0] filename = args.filename[0] if not task.startswith('task'): example = 'watchme add watcher task-reddit url@https://www.reddit.com' bot.exit('Task name must start with "task", e.g., %s' % example) # Use the output file, or a temporary file out = args.out # Get the watcher to interact with, must already exist watcher = get_watcher(name, base=args.base, create=False) if out is not None: if os.path.exists(out) and args.force is False: bot.exit('%s exists! Use --force to overwrite.' % out) # Export the data to file result = watcher.export_dict(task=task, filename=filename, name=name, export_json=args.json, base=args.base) if result != None: if out == None: print(json.dumps(result, indent=4)) else: write_json(result, out) bot.info('Result written to %s' % out)
def main(args, extra): """add a task for a watcher """ # Required - will print help if not provided name = args.watcher[0] task = args.task[0] if not task.startswith("task"): example = "watchme add-task watcher task-cpu func@cpu_task type@psutils" bot.exit('Task name must start with "task", e.g., %s' % example) # Exit if the user doesn't provide any parameters if extra is None: bot.exit( "Please provide parameters to add to your watcher (key@value)") # Type can also be an argument watcher_type = args.watcher_type params = [] for param in extra: if param.startswith("type@"): watcher_type = param.replace("type@", "") else: params.append(param) # Get the watcher to interact with, must already exist watcher = get_watcher(name, base=args.base, create=False) # Add the task. Will exit if not a valid type, or parameters watcher.add_task( task=task, task_type=watcher_type, params=params, force=args.force, active=args.active, )
def delete(self): '''delete the entire watcher, only if not protected. Cannot be undone. ''' self.load_config() # Check for protection if self.is_frozen(): bot.exit('watcher %s is frozen, unfreeze to delete.' % self.name) elif self.is_protected(): bot.exit('watcher %s is protected, turn off protection to delete.' % self.name) repo = os.path.dirname(self.configfile) # Ensure repository exists before delete if os.path.exists(repo): bot.info('Removing watcher %s' % self.name) shutil.rmtree(repo) else: bot.exit("%s:%s doesn't exist" % (self.name, repo))
def schedule(self, minute=12, hour=0, month='*', day='*', weekday='*', job=None, force=False): '''schedule the watcher to run at some frequency to update record of pages. By default, the task will run at 12 minutes passed midnight, daily. You can change the variables to change the frequency. See https://crontab.guru/ to get a setting that works for you. Hourly: 0 * * * * Daily: 0 0 * * * (midnight) default weekly 0 0 * * 0 monthly 0 0 1 * * yearly 0 0 1 1 * Parameters ========== minute: must be within 1 and 60, or set to "*" for every minute hour: must be within 0 through 23 or set to * month: must be within 1 and 12, or * day: must be between 1 and 31, or * weekday: must be between 0 and 6 or * job: if provided, assumes we are updated an existing entry. ''' cron = self.get_crontab() # Cut out early if the job already exists, and force is false if self.has_schedule() and not force: bot.exit('%s already has a schedule. Use --force to update.' % self.name) # Remove any previous schedules cron = self.remove_schedule(quiet=True) # minute must be between * or 0 through 59, or * if minute not in ['*'] + list(range(60)): bot.exit('minute must be in [0..59] or equal to *') # Hour must be between 0 through 23, or * if hour not in ['*'] + list(range(24)): bot.exit('hour must be in [0..23] or equal to *') # Day must be in range 1 through 31, or * if day not in ['*'] + list(range(1, 32)): bot.exit('day must be in [1..31] or equal to *') # Day must be in range 1 through 31, or * if month not in ['*'] + list(range(1, 13)): bot.exit('month must be in [1..12] or equal to *') # Day must be in range 1 through 31, or * if weekday not in ['*'] + list(range(7)): bot.exit('weekday must be in [0..6] or equal to *') # The command will run the watcher, watcher.cfg controls what happens whereis = which('watchme') command = '%s run %s' % (whereis, self.name) comment = 'watchme-%s' % self.name if job == None: job = cron.new(command=command, comment=comment) # Set the time, and then write the job to file job.setall(minute, hour, day, month, weekday) job.enable() cron.write_to_user(user=True) bot.info(job) return job
def export_dict(self, task, filename, name=None, export_json=False, from_commit=None, to_commit=None, base=None): '''Export a data frame of changes for a filename over time. Parameters ========== task: the task folder for the watcher to look in name: the name of the watcher, defaults to the client's base: the base of watchme to look for the task folder from_commit: the commit to start at to_commit: the commit to go to grep: the expression to match (not used if None) filename: the filename to filter to. Includes all files if not specified. ''' if name == None: name = self.name if base == None: base = self.base # Quit early if the task isn't there if not self.has_task(task) and not task.startswith('decorator'): bot.exit('%s is not a valid task or decorator for %s' % (task, name)) repo = os.path.join(base, self.name) if not os.path.exists(repo): bot.exit('%s does not exist.' % repo) filepath = os.path.join(base, self.name, task, filename) # Ensure that the filename exists in the repository if not os.path.exists(filepath): bot.exit('%s does not exist for watcher %s' % (filepath, name)) # Now filepath must be relative to the repo filepath = os.path.join(task, filename) commits = get_commits(repo=repo, from_commit=from_commit, to_commit=to_commit, grep="ADD results %s" % task, filename=filepath) # Keep lists of commits, dates, content result = {'commits': [], 'dates': [], 'content': []} # Empty content (or other) returns None for commit in commits: content = git_show(repo=repo, commit=commit, filename=filepath) if export_json is True: content = json.loads(content) # If it's a list, add it to content if isinstance(content, list): result['content'] += content # Otherwise, append else: result['content'].append(content) result['dates'].append(git_date(repo=repo, commit=commit)) result['commits'].append(commit) return result
def edit_task(self, name, action, key, value=None): '''edit a task, meaning doing an addition (add), update (update), or "remove", All actions require a value other than remove. Parameters ========== name: the name of the task to update action: the action to take (update, add, remove) a parameter key: the key to update value: the value to update ''' if not self.has_task(name): bot.exit('%s is not a task defined by %s' % (name, self.name)) if action not in ['update', 'add', 'remove']: bot.exit('Action must be update, add, or remove') if action in ['update', 'add'] and value is None: bot.exit('A value must be provided for the %s action' % action) # Add, and it doesn't exist so it's okay if action == "add" and key not in self.config[name]: bot.info('Adding %s:%s to %s' % (key, value, name)) self.set_setting(name, key, value) # Already exists, encourage user to update elif action == "add" and key in self.config[name]: bot.exit('%s already exists. Use "update" action to change.' % key) # Update, and it's a valid choice elif action == 'update' and key in self.config[name]: bot.info('Updating %s to %s in %s' % (key, value, name)) self.set_setting(name, key, value) # Update, and it's not a valid choice elif action == 'update' and key not in self.config[name]: bot.exit('%s is not found in config, cannot be updated.' % key) # Remove, and it's a valid choice elif action == "remove" and key in self.config[name]: bot.info('Removing %s' % key) del self.config[name][key] # Remove, and it's not a valid choice elif action == "remove" and key not in self.config[name]: bot.exit('%s is not found in config, cannot be removed.' % key) self.save()
def check_exists(filename): """a general helper function to check for existence, and exit if not found. """ if not os.path.exists(filename): bot.exit("Cannot find %s" % filename)
def check_exists(filename): '''a general helper function to check for existence, and exit if not found. ''' if not os.path.exists(filename): bot.exit('Cannot find %s' % filename)
def run(self, *args, **kwargs): '''run should be implemented by the subclass to run the function or process being monitored ''' bot.exit('run function must be implemented by subclass.')
def run(self, funcs, tasks): '''run will send a list of tasks, a tuple with arguments, through a function. the arguments should be ordered correctly. Parameters ========== funcs: the functions to run with multiprocessing.pool, a dictionary with lookup by the task name tasks: a dict of tasks, each task name (key) with a tuple of arguments to process ''' # Number of tasks must == number of functions assert len(funcs)==len(tasks) # Keep track of some progress for the user progress = 1 total = len(tasks) # if we don't have tasks, don't run if not tasks: return # results will also have the same key to look up finished = dict() results = [] try: prefix = "[%s/%s]" % (progress, total) if self.show_progress: bot.show_progress(0, total, length=35, prefix=prefix) pool = multiprocessing.Pool(self.workers, init_worker) self.start() for key, params in tasks.items(): func = funcs[key] if not self.show_progress: bot.info('Processing task %s:%s' % (key, params)) result = pool.apply_async(multi_wrapper, multi_package(func, [params])) # Store the key with the result results.append((key, result)) while len(results) > 0: pair = results.pop() key, result = pair result.wait() if self.show_progress: bot.show_progress(progress, total, length=35, prefix=prefix) progress += 1 prefix = "[%s/%s]" % (progress, total) finished[key] = result.get() self.end() pool.close() pool.join() except (KeyboardInterrupt, SystemExit): bot.error("Keyboard interrupt detected, terminating workers!") pool.terminate() sys.exit(1) except: bot.exit('Error running task.') return finished
def wait(self, *args, **kwargs): '''wait should be run after run to monitor the process being run.''' bot.exit('wait function must be implemented by subclass.')