def __init__(self, log_file=None, schedules=None): # init storage self.psql = PSQLDBAccess() # init logger self.logger = DITLogger(filename=log_file if log_file else DEFAULT_LOG_FILE) # schedules file self.schedule_settings_file = schedules
class Scheduler: """Main scheduler implementation""" total = 0 # total controllers results = {} # results of each module, if any prev_comment_id = 0 # comment ID from previous schedule date_start = 0 # start date of schedule def __init__(self, log_file=None, schedules=None): # init storage self.psql = PSQLDBAccess() # init logger self.logger = DITLogger(filename=log_file if log_file else DEFAULT_LOG_FILE) # schedules file self.schedule_settings_file = schedules def execute_pipeline(self, first=False): """ Execute the schedule, as stated in the yaml file :param first: flag to define first execution """ # mark started self.date_start = datetime.now() # get all modules to execute modules = Scheduler.get_modules(self.schedule_settings_file) self.total = len(modules) # get previous comment ID if not first: self.prev_comment_id = self.psql.get_latest_comment_id() else: self.prev_comment_id = 0 self._store({"prev_comment_id": self.prev_comment_id}, LOCAL_TEMP_FILE) # log initialization self.logger.info("Initializing schedule for %d modules. " "Last comment id: %d" % (self.total, self.prev_comment_id)) # execute pipeline for step, controller in modules.items(): self._execute_controller(step, controller) # finalized self.logger.schedule_step(step_num=step, total_steps=self.total, date_start=self.date_start, date_end=datetime.now()) def _execute_controller(self, step, controller): """ Execute the controller passed, and if this controller returns smth, store it """ # log step self.logger.schedule_step(step_num=step, total_steps=self.total, date_start=self.date_start) result = controller.execute( self.results.get('ControllerCrawl') ) # applied custom hack to pass consultations to wordcloud if result: self.results[repr(controller).split(":")[0]] = result def get_previous_comment_id(self): return self._load(LOCAL_TEMP_FILE)["prev_comment_id"] @staticmethod def get_modules(schedules_file_path): """ :param schedules_file_path: the path to the yaml file :return: a dict containing the instances to be executed """ modules = {} # inject class instances, with parameters from settings file with open(schedules_file_path, 'r') as inp: scheduler_settings = yaml.load(inp) for index, setting in enumerate(scheduler_settings): cl_set = setting[CLASS_LABEL] pack_set = setting[PACKAGE_LABEL] params_set = setting[PARAM_LABEL] pack = importlib.import_module(pack_set) cl = getattr(pack, cl_set) modules[index + 1] = cl(**params_set) # print [k for k in modules.values()] # debug return modules def _store(self, dict, storage): """ store data to file: custom hack to override issue with class inheritance :param storage: the file to store data """ if not os.path.isfile(storage): with open(storage, mode='a') as f: json.dump(dict, f) else: with open(storage, mode='w') as f: json.dump(dict, f) def _load(self, storage): """ :param storage: """ if os.path.isfile(storage): with open(storage, mode='r') as f: return json.load(f) # we do not want schedule to terminate return {"prev_comment_id": 0}