def __init__(self): self._logger = logging.getLogger(type(self).__name__) jobs = Configuration().getProperty('jobs') self._package = jobs.get("package") self._module = jobs.get("module") self._job_list = jobs.get("list") self._config_file_path = None
def __init__(self): cfg = Configuration() cache_types = cfg.getProperty("traffic_limit").keys() self._mutexes = dict() for cache_type in cache_types: limit = cfg.getProperty(f"traffic_limit.{cache_type}.size") # Configure limit self._mutexes[cache_type] = threading.Semaphore(limit)
def __init__(self): self._logger = logging.getLogger(type(self).__name__) cfg = Configuration() job_lists = cfg.getProperty("jobs.list") self._pools = dict() for job_name, job_config in job_lists.items(): pool_size = 1 if 'one-off' != job_config["type"]: pool_size = job_config["pool_size"] max_workers = pool_size # Configure worker thread_pool_executor = ThreadPoolExecutor( thread_name_prefix=f'task_executor_{job_name}', max_workers=max_workers) self._pools[job_name] = thread_pool_executor
def __init__(self): self._use_headless = False self._browser_type = CONSTANT.driver_name() cfg = Configuration() self._driver_path_mapping = { CONSTANT.chrome_name(): cfg.getProperty("driver_path.chrome") } self._driver_builder_mapping = { CONSTANT.chrome_name(): self._build_chrome_driver } self._clear_process_cmd_mapping = { CONSTANT.chrome_name(): self._clear_chrome_process } self._tmp_dir = cfg.getProperty("client.tmp_dir") options = webdriver.ChromeOptions() if self._use_headless: options.add_argument('headless') cfg = {'download.default_directory': self._tmp_dir} options.add_experimental_option('prefs', cfg) options.add_argument(f"download.default_directory={self._tmp_dir}") options.add_argument("--start-maximized") options.add_argument("--disable-infobars") options.add_argument("--disable-extensions") options.add_argument('--disable-gpu') options.add_argument('--disable-dev-shm-usage') options.add_argument('--hide-scrollbars') self._options = options self._driver_path = self._driver_path_mapping.get(self._browser_type) self._safe_raise_exception( self._driver_path, f"Driver for browser {self._browser_type} not configured! ") os.environ['PATH'] += os.pathsep + self._driver_path self._builder = self._driver_builder_mapping.get(self._browser_type) self._safe_raise_exception( self._builder, f"Browser {self._browser_type} not supported")
def __init__(self): os.environ["DBUS_SESSION_BUS_ADDRESS"] = "/dev/null" cfg = Configuration() self._timeout = cfg.getProperty("client.selenium.timeout")
def sec(self): return Configuration().getProperty( f"jobs.list.{type(self).__name__}.sec")
def cron(self): return Configuration().getProperty( f"jobs.list.{type(self).__name__}.cron")
def schedule_type(self): return Configuration().getProperty( f"jobs.list.{type(self).__name__}.type")
import sys from ibranch.scraping_scheduler.configuration.Configurator import Configuration from ibranch.scraping_scheduler.engine.Scraper import ScraperEngine if __name__ == "__main__": args = sys.argv[1:] Configuration(args) ScraperEngine().start()