def __init__(self, log_file=None, schedules=None):
     # init storage
     self.psql = PSQLDBAccess()
     # init logger
     self.logger = DITLogger(filename=log_file if log_file else DEFAULT_LOG_FILE)
     # schedules file
     self.schedule_settings_file = schedules
class Scheduler:
    """Main scheduler implementation"""
    total = 0  # total controllers
    results = {}  # results of each module, if any
    prev_comment_id = 0  # comment ID from previous schedule
    date_start = 0  # start date of schedule

    def __init__(self, log_file=None, schedules=None):
        # init storage
        self.psql = PSQLDBAccess()
        # init logger
        self.logger = DITLogger(filename=log_file if log_file else DEFAULT_LOG_FILE)
        # schedules file
        self.schedule_settings_file = schedules

    def execute_pipeline(self, first=False):
        """
        Execute the schedule, as stated in the yaml file
        :param first: flag to define first execution
        """
        # mark started
        self.date_start = datetime.now()
        # get all modules to execute
        modules = Scheduler.get_modules(self.schedule_settings_file)
        self.total = len(modules)
        # get previous comment ID
        if not first:
            self.prev_comment_id = self.psql.get_latest_comment_id()
        else:
            self.prev_comment_id = 0
        self._store({"prev_comment_id": self.prev_comment_id}, LOCAL_TEMP_FILE)
        # log initialization
        self.logger.info("Initializing schedule for %d modules. "
                         "Last comment id: %d" % (self.total, self.prev_comment_id))
        # execute pipeline
        for step, controller in modules.items():
            self._execute_controller(step, controller)

        # finalized
        self.logger.schedule_step(step_num=step, total_steps=self.total, date_start=self.date_start,
                                  date_end=datetime.now())

    def _execute_controller(self, step, controller):
        """
        Execute the controller passed, and if this controller returns smth, store it
        """
        # log step
        self.logger.schedule_step(step_num=step, total_steps=self.total, date_start=self.date_start)
        result = controller.execute(
            self.results.get('ControllerCrawl')
        )  # applied custom hack to pass consultations to wordcloud
        if result:
            self.results[repr(controller).split(":")[0]] = result

    def get_previous_comment_id(self):
        return self._load(LOCAL_TEMP_FILE)["prev_comment_id"]

    @staticmethod
    def get_modules(schedules_file_path):
        """
        :param schedules_file_path: the path to the yaml file
        :return: a dict containing the instances to be executed
        """
        modules = {}
        # inject class instances, with parameters from settings file
        with open(schedules_file_path, 'r') as inp:
            scheduler_settings = yaml.load(inp)
            for index, setting in enumerate(scheduler_settings):
                cl_set = setting[CLASS_LABEL]
                pack_set = setting[PACKAGE_LABEL]
                params_set = setting[PARAM_LABEL]
                pack = importlib.import_module(pack_set)
                cl = getattr(pack, cl_set)
                modules[index + 1] = cl(**params_set)
        # print [k for k in modules.values()]  # debug
        return modules

    def _store(self, dict, storage):
        """
        store data to file: custom hack to override issue with class inheritance
        :param storage: the file to store data
        """
        if not os.path.isfile(storage):
            with open(storage, mode='a') as f:
                json.dump(dict, f)
        else:
            with open(storage, mode='w') as f:
                json.dump(dict, f)

    def _load(self, storage):
        """

        :param storage:
        """
        if os.path.isfile(storage):
            with open(storage, mode='r') as f:
                return json.load(f)
        # we do not want schedule to terminate
        return {"prev_comment_id": 0}