def _load_modules_from_file(self, filepath, safe_mode): if not might_contain_dag(filepath, safe_mode): # Don't want to spam user with skip messages if not self.has_logged: self.has_logged = True self.log.info("File %s assumed to contain no DAGs. Skipping.", filepath) return [] self.log.debug("Importing %s", filepath) org_mod_name, _ = os.path.splitext(os.path.split(filepath)[-1]) path_hash = hashlib.sha1(filepath.encode('utf-8')).hexdigest() mod_name = f'unusual_prefix_{path_hash}_{org_mod_name}' if mod_name in sys.modules: del sys.modules[mod_name] timeout_msg = f"DagBag import timeout for {filepath} after {self.DAGBAG_IMPORT_TIMEOUT}s" with timeout(self.DAGBAG_IMPORT_TIMEOUT, error_message=timeout_msg): try: loader = importlib.machinery.SourceFileLoader(mod_name, filepath) spec = importlib.util.spec_from_loader(mod_name, loader) new_module = importlib.util.module_from_spec(spec) sys.modules[spec.name] = new_module loader.exec_module(new_module) return [new_module] except Exception as e: # pylint: disable=broad-except self.log.exception("Failed to import: %s", filepath) if self.dagbag_import_error_tracebacks: self.import_errors[filepath] = traceback.format_exc( limit=-self.dagbag_import_error_traceback_depth ) else: self.import_errors[filepath] = str(e) return []
def _load_modules_from_file(self, filepath, safe_mode): if not might_contain_dag(filepath, safe_mode): # Don't want to spam user with skip messages if not self.has_logged: self.has_logged = True self.log.info("File %s assumed to contain no DAGs. Skipping.", filepath) return [] self.log.debug("Importing %s", filepath) org_mod_name, _ = os.path.splitext(os.path.split(filepath)[-1]) path_hash = hashlib.sha1(filepath.encode('utf-8')).hexdigest() mod_name = f'unusual_prefix_{path_hash}_{org_mod_name}' if mod_name in sys.modules: del sys.modules[mod_name] def parse(mod_name, filepath): try: loader = importlib.machinery.SourceFileLoader( mod_name, filepath) spec = importlib.util.spec_from_loader(mod_name, loader) new_module = importlib.util.module_from_spec(spec) sys.modules[spec.name] = new_module loader.exec_module(new_module) return [new_module] except Exception as e: self.log.exception("Failed to import: %s", filepath) if self.dagbag_import_error_tracebacks: self.import_errors[filepath] = traceback.format_exc( limit=-self.dagbag_import_error_traceback_depth) else: self.import_errors[filepath] = str(e) return [] dagbag_import_timeout = settings.get_dagbag_import_timeout(filepath) if not isinstance(dagbag_import_timeout, (int, float)): raise TypeError( f'Value ({dagbag_import_timeout}) from get_dagbag_import_timeout must be int or float' ) if dagbag_import_timeout <= 0: # no parsing timeout return parse(mod_name, filepath) timeout_msg = ( f"DagBag import timeout for {filepath} after {dagbag_import_timeout}s.\n" "Please take a look at these docs to improve your DAG import time:\n" f"* {get_docs_url('best-practices.html#top-level-python-code')}\n" f"* {get_docs_url('best-practices.html#reducing-dag-complexity')}") with timeout(dagbag_import_timeout, error_message=timeout_msg): return parse(mod_name, filepath)
def _load_modules_from_zip(self, filepath, safe_mode): mods = [] with zipfile.ZipFile(filepath) as current_zip_file: for zip_info in current_zip_file.infolist(): head, _ = os.path.split(zip_info.filename) mod_name, ext = os.path.splitext(zip_info.filename) if ext not in [".py", ".pyc"]: continue if head: continue if mod_name == '__init__': self.log.warning("Found __init__.%s at root of %s", ext, filepath) self.log.debug("Reading %s from %s", zip_info.filename, filepath) if not might_contain_dag(zip_info.filename, safe_mode, current_zip_file): # todo: create ignore list # Don't want to spam user with skip messages if not self.has_logged: self.has_logged = True self.log.info( "File %s:%s assumed to contain no DAGs. Skipping.", filepath, zip_info.filename) continue if mod_name in sys.modules: del sys.modules[mod_name] try: sys.path.insert(0, filepath) current_module = importlib.import_module(mod_name) mods.append(current_module) except Exception as e: fileloc = os.path.join(filepath, zip_info.filename) self.log.exception("Failed to import: %s", fileloc) if self.dagbag_import_error_tracebacks: self.import_errors[fileloc] = traceback.format_exc( limit=-self.dagbag_import_error_traceback_depth) else: self.import_errors[fileloc] = str(e) finally: if sys.path[0] == filepath: del sys.path[0] return mods
def _refresh_dag_dir(self): """Refresh file paths from dag dir if we haven't done it for too long.""" now = timezone.utcnow() elapsed_time_since_refresh = ( now - self.last_dag_dir_refresh_time).total_seconds() if elapsed_time_since_refresh > self.dag_dir_list_interval: # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self._dag_directory) self._file_paths = list_py_file_paths(self._dag_directory) self.last_dag_dir_refresh_time = now self.log.info("There are %s files in %s", len(self._file_paths), self._dag_directory) self.set_file_paths(self._file_paths) try: self.log.debug("Removing old import errors") self.clear_nonexistent_import_errors() except Exception: self.log.exception("Error removing old import errors") # Check if file path is a zipfile and get the full path of the python file. # Without this, SerializedDagModel.remove_deleted_files would delete zipped dags. # Likewise DagCode.remove_deleted_code dag_filelocs = [] for fileloc in self._file_paths: if not fileloc.endswith(".py") and zipfile.is_zipfile(fileloc): with zipfile.ZipFile(fileloc) as z: dag_filelocs.extend([ os.path.join(fileloc, info.filename) for info in z.infolist() if might_contain_dag(info.filename, True, z) ]) else: dag_filelocs.append(fileloc) SerializedDagModel.remove_deleted_dags(dag_filelocs) DagModel.deactivate_deleted_dags(self._file_paths) from airflow.models.dagcode import DagCode DagCode.remove_deleted_code(dag_filelocs)
def _load_modules_from_zip(self, filepath, safe_mode): mods = [] current_zip_file = zipfile.ZipFile(filepath) for zip_info in current_zip_file.infolist(): head, _ = os.path.split(zip_info.filename) mod_name, ext = os.path.splitext(zip_info.filename) if ext not in [".py", ".pyc"]: continue if head: continue if mod_name == '__init__': self.log.warning("Found __init__.%s at root of %s", ext, filepath) self.log.debug("Reading %s from %s", zip_info.filename, filepath) if not might_contain_dag(zip_info.filename, safe_mode, current_zip_file): # todo: create ignore list # Don't want to spam user with skip messages if not self.has_logged or True: self.has_logged = True self.log.info( "File %s:%s assumed to contain no DAGs. Skipping.", filepath, zip_info.filename) continue if mod_name in sys.modules: del sys.modules[mod_name] try: sys.path.insert(0, filepath) current_module = importlib.import_module(mod_name) mods.append(current_module) except Exception as e: # pylint: disable=broad-except self.log.exception("Failed to import: %s", filepath) self.import_errors[filepath] = str(e) return mods