def execute_command(command_to_exec): log = LoggingMixin().log log.info("Executing command in Celery: %s", command_to_exec) env = os.environ.copy() try: subprocess.check_call(command_to_exec, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) env = os.environ.copy() try: subprocess.check_call(command, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def list_py_file_paths(directory, safe_mode=True): """ Traverse a directory and look for Python files. :param directory: the directory to traverse :type directory: unicode :param safe_mode: whether to use a heuristic to determine whether a file contains Airflow DAG definitions :return: a list of paths to Python files in the specified directory :rtype: list[unicode] """ file_paths = [] if directory is None: return [] elif os.path.isfile(directory): return [directory] elif os.path.isdir(directory): patterns = [] for root, dirs, files in os.walk(directory, followlinks=True): ignore_file = [f for f in files if f == '.airflowignore'] if ignore_file: f = open(os.path.join(root, ignore_file[0]), 'r') patterns += [p for p in f.read().split('\n') if p] f.close() for f in files: try: file_path = os.path.join(root, f) if not os.path.isfile(file_path): continue mod_name, file_ext = os.path.splitext( os.path.split(file_path)[-1]) if file_ext != '.py' and not zipfile.is_zipfile(file_path): continue if any([re.findall(p, file_path) for p in patterns]): continue # Heuristic that guesses whether a Python file contains an # Airflow DAG definition. might_contain_dag = True if safe_mode and not zipfile.is_zipfile(file_path): with open(file_path, 'rb') as f: content = f.read() might_contain_dag = all( [s in content for s in (b'DAG', b'airflow')]) if not might_contain_dag: continue file_paths.append(file_path) except Exception: log = LoggingMixin().log log.exception("Error while examining %s", f) return file_paths
def list_py_file_paths(directory, safe_mode=True): """ Traverse a directory and look for Python files. :param directory: the directory to traverse :type directory: unicode :param safe_mode: whether to use a heuristic to determine whether a file contains Airflow DAG definitions :return: a list of paths to Python files in the specified directory :rtype: list[unicode] """ file_paths = [] if directory is None: return [] elif os.path.isfile(directory): return [directory] elif os.path.isdir(directory): patterns = [] for root, dirs, files in os.walk(directory, followlinks=True): ignore_file = [f for f in files if f == '.airflowignore'] if ignore_file: f = open(os.path.join(root, ignore_file[0]), 'r') patterns += [p for p in f.read().split('\n') if p] f.close() for f in files: try: file_path = os.path.join(root, f) if not os.path.isfile(file_path): continue mod_name, file_ext = os.path.splitext( os.path.split(file_path)[-1]) if file_ext != '.py' and not zipfile.is_zipfile(file_path): continue if any([re.findall(p, file_path) for p in patterns]): continue # Heuristic that guesses whether a Python file contains an # Airflow DAG definition. might_contain_dag = True if safe_mode and not zipfile.is_zipfile(file_path): with open(file_path, 'rb') as f: content = f.read() might_contain_dag = all( [s in content for s in (b'DAG', b'airflow')]) if not might_contain_dag: continue file_paths.append(file_path) except Exception: log = LoggingMixin().log log.exception("Error while examining %s", f) return file_paths
def list_py_file_paths(directory, safe_mode=conf.getboolean('core', 'DAG_DISCOVERY_SAFE_MODE', fallback=True), include_examples=None): """ Traverse a directory and look for Python files. :param directory: the directory to traverse :type directory: unicode :param safe_mode: whether to use a heuristic to determine whether a file contains Airflow DAG definitions. If not provided, use the core.DAG_DISCOVERY_SAFE_MODE configuration setting. If not set, default to safe. :return: a list of paths to Python files in the specified directory :rtype: list[unicode] """ if include_examples is None: include_examples = conf.getboolean('core', 'LOAD_EXAMPLES') file_paths = [] if directory is None: return [] elif os.path.isfile(directory): return [directory] elif os.path.isdir(directory): patterns_by_dir = {} for root, dirs, files in os.walk(directory, followlinks=True): patterns = patterns_by_dir.get(root, []) ignore_file = os.path.join(root, '.airflowignore') if os.path.isfile(ignore_file): with open(ignore_file, 'r') as file: # If we have new patterns create a copy so we don't change # the previous list (which would affect other subdirs) lines_no_comments = [ COMMENT_PATTERN.sub("", line) for line in file.read().split("\n") ] patterns += [ re.compile(line) for line in lines_no_comments if line ] # If we can ignore any subdirs entirely we should - fewer paths # to walk is better. We have to modify the ``dirs`` array in # place for this to affect os.walk dirs[:] = [ d for d in dirs if not any(p.search(os.path.join(root, d)) for p in patterns) ] # We want patterns defined in a parent folder's .airflowignore to # apply to subdirs too for d in dirs: patterns_by_dir[os.path.join(root, d)] = patterns for f in files: try: file_path = os.path.join(root, f) if not os.path.isfile(file_path): continue _, file_ext = os.path.splitext( os.path.split(file_path)[-1]) if file_ext != '.py' and not zipfile.is_zipfile(file_path): continue if any([re.findall(p, file_path) for p in patterns]): continue # Heuristic that guesses whether a Python file contains an # Airflow DAG definition. might_contain_dag = True if safe_mode and not zipfile.is_zipfile(file_path): with open(file_path, 'rb') as fp: content = fp.read() might_contain_dag = all( [s in content for s in (b'DAG', b'airflow')]) if not might_contain_dag: continue file_paths.append(file_path) except Exception: log = LoggingMixin().log log.exception("Error while examining %s", f) if include_examples: import airflow.example_dags example_dag_folder = airflow.example_dags.__path__[0] file_paths.extend( list_py_file_paths(example_dag_folder, safe_mode, False)) return file_paths
continue log.debug('Importing plugin module %s', filepath) # normalize root path as namespace namespace = '_'.join([re.sub(norm_pattern, '__', root), mod_name]) m = imp.load_source(namespace, filepath) for obj in list(m.__dict__.values()): if (inspect.isclass(obj) and issubclass(obj, AirflowPlugin) and obj is not AirflowPlugin): obj.validate() if obj not in plugins: plugins.append(obj) except Exception as e: log.exception(e) log.error('Failed to import plugin %s', filepath) import_errors[filepath] = str(e) def make_module(name, objects): log.debug('Creating module %s', name) name = name.lower() module = imp.new_module(name) module._name = name.split('.')[-1] module._objects = objects module.__dict__.update((o.__name__, o) for o in objects) return module # Plugin components to integrate as modules
def list_py_file_paths(directory, safe_mode=True): """ Traverse a directory and look for Python files. :param directory: the directory to traverse :type directory: unicode :param safe_mode: whether to use a heuristic to determine whether a file contains Airflow DAG definitions :return: a list of paths to Python files in the specified directory :rtype: list[unicode] """ file_paths = [] if directory is None: return [] elif os.path.isfile(directory): return [directory] elif os.path.isdir(directory): patterns_by_dir = {} for root, dirs, files in os.walk(directory, followlinks=True): patterns = patterns_by_dir.get(root, []) ignore_file = os.path.join(root, '.airflowignore') if os.path.isfile(ignore_file): with open(ignore_file, 'r') as f: # If we have new patterns create a copy so we don't change # the previous list (which would affect other subdirs) patterns = patterns + [ p for p in f.read().split('\n') if p ] # If we can ignore any subdirs entirely we should - fewer paths # to walk is better. We have to modify the ``dirs`` array in # place for this to affect os.walk dirs[:] = [ d for d in dirs if not any( re.search(p, os.path.join(root, d)) for p in patterns) ] # We want patterns defined in a parent folder's .airflowignore to # apply to subdirs too for d in dirs: patterns_by_dir[os.path.join(root, d)] = patterns for f in files: try: file_path = os.path.join(root, f) if not os.path.isfile(file_path): continue mod_name, file_ext = os.path.splitext( os.path.split(file_path)[-1]) if file_ext != '.py' and not zipfile.is_zipfile(file_path): continue if any([re.findall(p, file_path) for p in patterns]): continue # Heuristic that guesses whether a Python file contains an # Airflow DAG definition. might_contain_dag = True if safe_mode and not zipfile.is_zipfile(file_path): with open(file_path, 'rb') as f: content = f.read() might_contain_dag = all( [s in content for s in (b'DAG', b'airflow')]) if not might_contain_dag: continue file_paths.append(file_path) except Exception: log = LoggingMixin().log log.exception("Error while examining %s", f) return file_paths
mod_name, file_ext = os.path.splitext( os.path.split(filepath)[-1]) if file_ext != '.py': continue log.debug('Importing plugin module %s', filepath) # normalize root path as namespace namespace = '_'.join([re.sub(norm_pattern, '__', root), mod_name]) m = imp.load_source(namespace, filepath) for obj in list(m.__dict__.values()): if is_valid_plugin(obj, plugins): plugins.append(obj) except Exception as e: log.exception(e) log.error('Failed to import plugin %s', filepath) import_errors[filepath] = str(e) plugins = load_entrypoint_plugins( pkg_resources.iter_entry_points('airflow.plugins'), plugins ) def make_module(name, objects): log.debug('Creating module %s', name) name = name.lower() module = imp.new_module(name) module._name = name.split('.')[-1] module._objects = objects
def list_py_file_paths(directory, safe_mode=True, include_examples=None): """ Traverse a directory and look for Python files. :param directory: the directory to traverse :type directory: unicode :param safe_mode: whether to use a heuristic to determine whether a file contains Airflow DAG definitions :return: a list of paths to Python files in the specified directory :rtype: list[unicode] """ if include_examples is None: include_examples = conf.getboolean('core', 'LOAD_EXAMPLES') file_paths = [] if directory is None: return [] elif os.path.isfile(directory): return [directory] elif os.path.isdir(directory): patterns_by_dir = {} for root, dirs, files in os.walk(directory, followlinks=True): patterns = patterns_by_dir.get(root, []) ignore_file = os.path.join(root, '.airflowignore') if os.path.isfile(ignore_file): with open(ignore_file, 'r') as f: # If we have new patterns create a copy so we don't change # the previous list (which would affect other subdirs) patterns += [re.compile(p) for p in f.read().split('\n') if p] # If we can ignore any subdirs entirely we should - fewer paths # to walk is better. We have to modify the ``dirs`` array in # place for this to affect os.walk dirs[:] = [ d for d in dirs if not any(p.search(os.path.join(root, d)) for p in patterns) ] # We want patterns defined in a parent folder's .airflowignore to # apply to subdirs too for d in dirs: patterns_by_dir[os.path.join(root, d)] = patterns for f in files: try: file_path = os.path.join(root, f) if not os.path.isfile(file_path): continue mod_name, file_ext = os.path.splitext( os.path.split(file_path)[-1]) if file_ext != '.py' and not zipfile.is_zipfile(file_path): continue if any([re.findall(p, file_path) for p in patterns]): continue # Heuristic that guesses whether a Python file contains an # Airflow DAG definition. might_contain_dag = True if safe_mode and not zipfile.is_zipfile(file_path): with open(file_path, 'rb') as fp: content = fp.read() might_contain_dag = all( [s in content for s in (b'DAG', b'airflow')]) if not might_contain_dag: continue file_paths.append(file_path) except Exception: log = LoggingMixin().log log.exception("Error while examining %s", f) if include_examples: import airflow.example_dags example_dag_folder = airflow.example_dags.__path__[0] file_paths.extend(list_py_file_paths(example_dag_folder, safe_mode, False)) return file_paths