Пример #1
0
    def _patch_imports(self, resolver, output_path: Path) -> int:
        # select modules to patch imports
        query = Query()
        query.paths = []
        for package in resolver.graph.metainfo.package.packages:
            for module_path in package:
                query.paths.append(str(module_path))

        # patch vendors if it's outside of main package
        package_path = resolver.graph.metainfo.package.packages[0].path
        if package_path.resolve() not in output_path.resolve().parents:
            query.paths.append(str(output_path))

        # set renamings
        root = Path(self.config['project'])
        for library in output_path.iterdir():
            if library.name in self.config['vendor']['exclude']:
                continue
            library_module = '.'.join(library.resolve().relative_to(
                str(root)).parts)
            self.logger.debug('patch imports',
                              extra=dict(
                                  old_name=library.name,
                                  new_name=library_module,
                              ))
            query = transform_imports(
                query=query,
                old_name=library.name,
                new_name=library_module,
            )

        # execute renaming
        query.execute(interactive=False, write=True, silent=True)
        return len(query.paths)
Пример #2
0
    def run_bowler_modifier(
        self,
        input_text,
        selector=None,
        modifier=None,
        selector_func=None,
        modifier_func=None,
        in_process=True,
    ):
        """Returns the modified text."""

        if not (selector or selector_func):
            raise ValueError("Pass selector")
        if not (modifier or modifier_func):
            raise ValueError("Pass modifier")

        exception_queue = multiprocessing.Queue()

        def local_modifier(node, capture, filename):
            # When in_process=False, this runs in another process.  See notes below.
            try:
                return modifier(node, capture, filename)
            except Exception as e:
                exception_queue.put(e)

        with tempfile.NamedTemporaryFile(suffix=".py") as f:
            # TODO: I'm almost certain this will not work on Windows, since
            # NamedTemporaryFile has it already open for writing.  Consider
            # using mktemp directly?
            with open(f.name, "w") as fw:
                fw.write(input_text + "\n")

            if selector_func:
                query = selector_func([f.name])
            else:
                query = Query([f.name]).select(selector)

            if modifier_func:
                # N.b. exceptions may not work
                query = modifier_func(query)
            else:
                query = query.modify(local_modifier)

            # We require the in_process parameter in order to record coverage properly,
            # but it also helps in bubbling exceptions and letting tests read state set
            # by modifiers.
            query.execute(interactive=False,
                          write=True,
                          silent=False,
                          in_process=in_process)

            # In the case of in_process=False (mirroring normal use of the tool) we use
            # the queue to ship back exceptions from local_process, which can actually
            # fail the test.  Normally exceptions in modifiers are not printed unless
            # you pass --debug.
            if not exception_queue.empty():
                raise AssertionError from exception_queue.get()

            with open(f.name, "r") as fr:
                return fr.read().rstrip()
Пример #3
0
def change_import_paths_to_deprecated():
    from bowler import LN, TOKEN, Capture, Filename, Query
    from fissix.pytree import Leaf

    def remove_tags_modifier(node: LN, capture: Capture,
                             filename: Filename) -> None:
        for node in capture['function_arguments'][0].post_order():
            if isinstance(
                    node,
                    Leaf) and node.value == "tags" and node.type == TOKEN.NAME:
                if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                    node.parent.next_sibling.remove()
                node.parent.remove()

    def pure_airflow_models_filter(node: LN, capture: Capture,
                                   filename: Filename) -> bool:
        """Check if select is exactly [airflow, . , models]"""
        return len([ch for ch in node.children[1].leaves()]) == 3

    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "python_virtualenv.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "python_virtualenv.py"))
    (qry.select_module("airflow.utils.python_virtualenv").rename(
        "airflow.providers.google.cloud.utils.python_virtualenv"))
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "process_utils.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "process_utils.py"))
    (qry.select_module("airflow.utils.process_utils").rename(
        "airflow.providers.google.cloud.utils.process_utils"))

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix KubernetesPodOperator imports to use old path
    qry.select_module(
        "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename(
            "airflow.contrib.operators.kubernetes_pod_operator")

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(
        pure_airflow_models_filter).rename("airflow.models.baseoperator")

    qry.execute(write=True, silent=False, interactive=False)
def change_import_paths_to_deprecated():
    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"),
        os.path.join(dirname(__file__), "airflow", "providers",
                     "google", "cloud", "utils", "python_virtualenv.py"
                     )
    )
    (
        qry
        .select_module("airflow.utils.python_virtualenv")
        .rename("airflow.providers.google.cloud.utils.python_virtualenv")
    )
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"),
        os.path.join(dirname(__file__), "airflow", "providers",
                     "google", "cloud", "utils", "process_utils.py"
                     )
    )
    (
        qry
        .select_module("airflow.utils.process_utils")
        .rename("airflow.providers.google.cloud.utils.process_utils")
    )

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix KubernetesPodOperator imports to use old path
    qry.select_module(
        "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename(
        "airflow.contrib.operators.kubernetes_pod_operator"
    )

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(pure_airflow_models_filter).rename(
        "airflow.models.baseoperator")

    qry.execute(write=True, silent=False, interactive=False)
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--log-level", dest="log_level", type=str, choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="set log level, default is INFO")
    parser.add_argument("--no-log-file", dest="no_log_file", action='store_true', default=False, help="don't log to file")
    parser.add_argument("--log-filepath", dest="log_filepath", type=str, help='set log file path, default is "report.log"')
    parser.add_argument("--inpath", required=True, type=str, help='the file or directory path you want to upgrade.')
    parser.add_argument("--backup", type=str, nargs='?', default=None, const=None, help='backup directory, default is the "~/.paddle1to2/".')
    parser.add_argument("--write", action='store_true', default=False, help='modify files in-place.')
    parser.add_argument("--no-confirm", dest="no_confirm", action='store_true', default=False, help='write files in-place without confirm, ignored without --write.')
    parser.add_argument("--refactor", action='append', choices=refactor.__all__, help='this is a debug option. Specify refactor you want to run. If none, all refactors will be run.')
    parser.add_argument("--print-match", action='store_true', default=False, help='this is a debug option. Print matched code and node for each file.')

    args = parser.parse_args()
    if args.refactor:
        args.refactor = set(args.refactor)
    if args.backup is None:
        home = os.path.expanduser('~')
        args.backup = os.path.join(home, '.paddle1to2')
    else:
        args.backup = os.path.expanduser(args.backup)

    if args.log_level:
        logger.setLevel(args.log_level)
    if not args.no_log_file:
        log_to_file(args.log_filepath)
    if not should_convert(args.inpath):
        logger.error("convert abort!")
        sys.exit(1)

    # refactor code via "Query" step by step.
    q = Query(args.inpath)
    for fn in refactor.__all__:
        refactor_func = getattr(refactor, fn)
        if args.refactor and fn not in args.refactor:
            continue
        assert callable(refactor_func), "{} is not callable.".format(fn)
        logger.debug("run refactor: {}".format(fn))
        if args.print_match:
            refactor_func(q, change_spec).filter(filters.print_match)
        else:
            refactor_func(q, change_spec)

    if args.write:
        # backup args.inpath
        backup = backup_inpath(args.inpath, args.backup)
        # print diff to stdout, and modify file in place.
        if utils.is_windows():
            q.execute(write=True, silent=False, need_confirm=not args.no_confirm, backup=backup, in_process=True)
        else:
            q.execute(write=True, silent=False, need_confirm=not args.no_confirm, backup=backup)
    else:
        # print diff to stdout
        if utils.is_windows():
            q.execute(write=False, silent=False, in_process=True)
        else:
            q.execute(write=False, silent=False)
        click.secho('Refactor finished without touching source files, add "--write" to modify source files in-place if everything is ok.', fg="red", bold=True)
Пример #6
0
class RefactorBackportPackages:
    """
    Refactors the code of providers, so that it works in 1.10.

    """
    def __init__(self):
        self.qry = Query()

    def remove_class(self, class_name) -> None:
        """
        Removes class altogether. Example diff generated:


        .. code-block:: diff

            --- ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            +++ ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            @@ -179,86 +179,3 @@
            -
            -class GKEStartPodOperator(KubernetesPodOperator):
            -
            - ...

        :param class_name: name to remove
        """
        def _remover(node: LN, capture: Capture, filename: Filename) -> None:
            node.remove()

        self.qry.select_class(class_name).modify(_remover)

    def rename_deprecated_modules(self) -> None:
        """
        Renames back to deprecated modules imported. Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/dingding/operators/dingding.py
            +++ ./airflow/providers/dingding/operators/dingding.py
            @@ -16,7 +16,7 @@
             # specific language governing permissions and limitations
             # under the License.

            -from airflow.operators.bash import BaseOperator
            +from airflow.operators.bash_operator import BaseOperator
             from airflow.providers.dingding.hooks.dingding import DingdingHook
             from airflow.utils.decorators import apply_defaults

        """
        changes = [
            ("airflow.operators.bash", "airflow.operators.bash_operator"),
            ("airflow.operators.python", "airflow.operators.python_operator"),
            ("airflow.utils.session", "airflow.utils.db"),
            ("airflow.providers.cncf.kubernetes.operators.kubernetes_pod",
             "airflow.contrib.operators.kubernetes_pod_operator"),
        ]
        for new, old in changes:
            self.qry.select_module(new).rename(old)

    def add_provide_context_to_python_operators(self) -> None:
        """

        Adds provide context to usages of Python/BranchPython Operators - mostly in example_dags.
        Note that those changes  apply to example DAGs not to the operators/hooks erc.
        We package the example DAGs together with the provider classes and they should serve as
        examples independently on the version of Airflow it will be installed in.
        Provide_context feature in Python operators was feature added 2.0.0 and we are still
        using the "Core" operators from the Airflow version that the provider packages are installed
        in - the "Core" operators do not have (for now) their own provider package.

        The core operators are:

            * Python
            * BranchPython
            * Bash
            * Branch
            * Dummy
            * LatestOnly
            * ShortCircuit
            * PythonVirtualEnv


        Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/amazon/aws/example_dags/example_google_api_to_s3_transfer_advanced.py
            +++ ./airflow/providers/amazon/aws/example_dags/example_google_api_to_s3_transfer_advanced.py
            @@ -105,7 +105,8 @@
                         task_video_ids_to_s3.google_api_response_via_xcom,
                         task_video_ids_to_s3.task_id
                     ],
            -        task_id='check_and_transform_video_ids'
            +        task_id='check_and_transform_video_ids',
            +        provide_context=True
                 )

        """
        def add_provide_context_to_python_operator(node: LN, capture: Capture,
                                                   filename: Filename) -> None:
            fn_args = capture['function_arguments'][0]
            if len(fn_args.children) > 0 and (
                    not isinstance(fn_args.children[-1], Leaf)
                    or fn_args.children[-1].type != token.COMMA):
                fn_args.append_child(Comma())

            provide_context_arg = KeywordArg(Name('provide_context'),
                                             Name('True'))
            provide_context_arg.prefix = fn_args.children[0].prefix
            fn_args.append_child(provide_context_arg)

        (self.qry.select_function("PythonOperator").is_call().modify(
            add_provide_context_to_python_operator))
        (self.qry.select_function("BranchPythonOperator").is_call().modify(
            add_provide_context_to_python_operator))

    def remove_super_init_call(self):
        r"""
        Removes super().__init__() call from Hooks.

        In airflow 1.10 almost none of the Hooks call super().init(). It was always broken in Airflow 1.10 -
        the BaseHook() has it's own __init__() which is wrongly implemented and requires source
        parameter to be passed::

        .. code-block:: python

            def __init__(self, source):
                pass

        We fixed it in 2.0, but for the entire 1.10 line calling super().init() is not a good idea -
        and it basically does nothing even if you do. And it's bad because it does not initialize
        LoggingMixin (BaseHook derives from LoggingMixin). And it is the main reason why Hook
        logs are not working as they are supposed to sometimes:

        .. code-block:: python

            class LoggingMixin(object):
                \"\"\"
                Convenience super-class to have a logger configured with the class name
                \"\"\"
                def __init__(self, context=None):
                    self._set_context(context)


        There are two Hooks in 1.10 that call super.__init__ :

        .. code-block:: python

               super(CloudSqlDatabaseHook, self).__init__(source=None)
               super(MongoHook, self).__init__(source='mongo')

        Not that it helps with anything because init in BaseHook does nothing. So we remove
        the super().init() in Hooks when backporting to 1.10.

        Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/apache/druid/hooks/druid.py
            +++ ./airflow/providers/apache/druid/hooks/druid.py
            @@ -49,7 +49,7 @@
                         timeout=1,
                         max_ingestion_time=None):

            -        super().__init__()
            +
                     self.druid_ingest_conn_id = druid_ingest_conn_id
                     self.timeout = timeout
                     self.max_ingestion_time = max_ingestion_time

        """
        def remove_super_init_call_modifier(node: LN, capture: Capture,
                                            filename: Filename) -> None:
            for ch in node.post_order():
                if isinstance(ch, Leaf) and ch.value == "super":
                    if any(c.value for c in ch.parent.post_order()
                           if isinstance(c, Leaf)):
                        ch.parent.remove()

        self.qry.select_subclass("BaseHook").modify(
            remove_super_init_call_modifier)

    def remove_tags(self):
        """
        Removes tags from execution of the operators (in example_dags). Note that those changes
        apply to example DAGs not to the operators/hooks erc. We package the example DAGs together
        with the provider classes and they should serve as examples independently on the version
        of Airflow it will be installed in. The tags are feature added in 1.10.10 and occasionally
        we will want to run example DAGs as system tests in pre-1.10.10 version so we want to
        remove the tags here.


        Example diff generated:

        .. code-block:: diff


            -- ./airflow/providers/amazon/aws/example_dags/example_datasync_2.py
            +++ ./airflow/providers/amazon/aws/example_dags/example_datasync_2.py
            @@ -83,8 +83,7 @@
             with models.DAG(
                 "example_datasync_2",
                 default_args=default_args,
            -    schedule_interval=None,  # Override to match your needs
            -    tags=['example'],
            +    schedule_interval=None,
             ) as dag:

        """
        def remove_tags_modifier(_: LN, capture: Capture,
                                 filename: Filename) -> None:
            for node in capture['function_arguments'][0].post_order():
                if isinstance(
                        node, Leaf
                ) and node.value == "tags" and node.type == TOKEN.NAME:
                    if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                        node.parent.next_sibling.remove()
                    node.parent.remove()

        # Remove tags
        self.qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    def remove_poke_mode_only_decorator(self):
        r"""
        Removes @poke_mode_only decorator. The decorator is only available in Airflow 2.0.

        Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/sensors/gcs.py
            +++ ./airflow/providers/google/cloud/sensors/gcs.py
            @@ -189,7 +189,6 @@
                 return datetime.now()


            -@poke_mode_only
             class GCSUploadSessionCompleteSensor(BaseSensorOperator):
                 \"\"\"
                Checks for changes in the number of objects at prefix in Google Cloud Storage

        """
        def find_and_remove_poke_mode_only_import(node: LN):
            for child in node.children:
                if isinstance(
                        child, Leaf
                ) and child.type == 1 and child.value == 'poke_mode_only':
                    import_node = child.parent
                    # remove the import by default
                    skip_import_remove = False
                    if isinstance(child.prev_sibling,
                                  Leaf) and child.prev_sibling.value == ",":
                        # remove coma before the whole import
                        child.prev_sibling.remove()
                        # do not remove if there are other imports
                        skip_import_remove = True
                    if isinstance(child.next_sibling,
                                  Leaf) and child.prev_sibling.value == ",":
                        # but keep the one after and do not remove the whole import
                        skip_import_remove = True
                    # remove the import
                    child.remove()
                    if not skip_import_remove:
                        # remove import of there were no sibling
                        import_node.remove()
                else:
                    find_and_remove_poke_mode_only_import(child)

        def find_root_remove_import(node: LN):
            current_node = node
            while current_node.parent:
                current_node = current_node.parent
            find_and_remove_poke_mode_only_import(current_node)

        def is_poke_mode_only_decorator(node: LN) -> bool:
            return node.children and len(node.children) >= 2 and \
                isinstance(node.children[0], Leaf) and node.children[0].value == '@' and \
                isinstance(node.children[1], Leaf) and node.children[1].value == 'poke_mode_only'

        def remove_poke_mode_only_modifier(node: LN, capture: Capture,
                                           filename: Filename) -> None:
            for child in capture['node'].parent.children:
                if is_poke_mode_only_decorator(child):
                    find_root_remove_import(child)
                    child.remove()

        self.qry.select_subclass("BaseSensorOperator").modify(
            remove_poke_mode_only_modifier)

    def refactor_amazon_package(self):
        """
        Fixes to "amazon" providers package.

        Copies some of the classes used from core Airflow to "common.utils" package of
        the provider and renames imports to use them from there.

        We copy typing_compat.py and change import as in example diff:

        .. code-block:: diff

            --- ./airflow/providers/amazon/aws/operators/ecs.py
            +++ ./airflow/providers/amazon/aws/operators/ecs.py
            @@ -24,7 +24,7 @@
             from airflow.models import BaseOperator
             from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
             from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
            -from airflow.typing_compat import Protocol, runtime_checkable
            +from airflow.providers.amazon.common.utils.typing_compat import Protocol, runtime_checkable
             from airflow.utils.decorators import apply_defaults

        """
        def amazon_package_filter(node: LN, capture: Capture,
                                  filename: Filename) -> bool:
            return filename.startswith("./airflow/providers/amazon/")

        os.makedirs(os.path.join(get_target_providers_package_folder("amazon"),
                                 "common", "utils"),
                    exist_ok=True)
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "__init__.py"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "utils", "__init__.py"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow",
                         "typing_compat.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "utils", "typing_compat.py"))
        (self.qry.select_module("airflow.typing_compat").filter(
            callback=amazon_package_filter).rename(
                "airflow.providers.amazon.common.utils.typing_compat"))

        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "email.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "utils", "email.py"))
        (self.qry.select_module("airflow.utils.email").filter(
            callback=amazon_package_filter).rename(
                "airflow.providers.amazon.common.utils.email"))

    def refactor_google_package(self):
        r"""
        Fixes to "google" providers package.

        Copies some of the classes used from core Airflow to "common.utils" package of the
        the provider and renames imports to use them from there. Note that in this case we also rename
        the imports in the copied files.

        For example we copy python_virtualenv.py, process_utils.py and change import as in example diff:

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            +++ ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            @@ -28,11 +28,11 @@

             from airflow.exceptions import AirflowException
             from airflow.models import BaseOperator
            -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator
            +from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
             from airflow.providers.google.cloud.hooks.kubernetes_engine import GKEHook
             from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
             from airflow.utils.decorators import apply_defaults
            -from airflow.utils.process_utils import execute_in_subprocess, patch_environ
            +from airflow.providers.google.common.utils.process_utils import execute_in_subprocess


        And in the copied python_virtualenv.py we also change import to process_utils.py. This happens
        automatically and is solved by Pybowler.


        .. code-block:: diff

            --- ./airflow/providers/google/common/utils/python_virtualenv.py
            +++ ./airflow/providers/google/common/utils/python_virtualenv.py
            @@ -21,7 +21,7 @@
             \"\"\"
            from typing import List, Optional

            -from airflow.utils.process_utils import execute_in_subprocess
            +from airflow.providers.google.common.utils.process_utils import execute_in_subprocess


            def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool)


        We also rename Base operator links to deprecated names:


        .. code-block:: diff

            --- ./airflow/providers/google/cloud/operators/mlengine.py
            +++ ./airflow/providers/google/cloud/operators/mlengine.py
            @@ -24,7 +24,7 @@
             from typing import List, Optional

             from airflow.exceptions import AirflowException
            -from airflow.models import BaseOperator, BaseOperatorLink
            +from airflow.models.baseoperator import BaseOperator, BaseOperatorLink
             from airflow.models.taskinstance import TaskInstance
             from airflow.providers.google.cloud.hooks.mlengine import MLEngineHook
             from airflow.utils.decorators import apply_defaults


        We remove GKEStartPodOperator (example in remove_class method)


        We also copy (google.common.utils) and rename imports to the helpers.

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            +++ ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            @@ -37,7 +37,7 @@
                 CloudDataCatalogUpdateTagTemplateOperator,
             )
             from airflow.utils.dates import days_ago
            -from airflow.utils.helpers import chain
            +from airflow.providers.google.common.utils.helpers import chain

             default_args = {"start_date": days_ago(1)}

        And also module_loading  which is used by helpers

        .. code-block:: diff

            --- ./airflow/providers/google/common/utils/helpers.py
            +++ ./airflow/providers/google/common/utils/helpers.py
            @@ -26,7 +26,7 @@
             from jinja2 import Template

             from airflow.exceptions import AirflowException
            -from airflow.utils.module_loading import import_string
            +from airflow.providers.google.common.utils.module_loading import import_string

             KEY_REGEX = re.compile(r'^[\\w.-]+$')

        """
        def google_package_filter(node: LN, capture: Capture,
                                  filename: Filename) -> bool:
            return filename.startswith("./airflow/providers/google/")

        def pure_airflow_models_filter(node: LN, capture: Capture,
                                       filename: Filename) -> bool:
            """Check if select is exactly [airflow, . , models]"""
            return len(list(node.children[1].leaves())) == 3

        os.makedirs(os.path.join(get_target_providers_package_folder("google"),
                                 "common", "utils"),
                    exist_ok=True)
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "__init__.py"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "python_virtualenv.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "python_virtualenv.py"))

        copy_helper_py_file(
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "helpers.py"))

        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "module_loading.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "module_loading.py"))
        (self.qry.select_module("airflow.utils.python_virtualenv").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.python_virtualenv"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "process_utils.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "process_utils.py"))
        (self.qry.select_module("airflow.utils.process_utils").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.process_utils"))

        (self.qry.select_module("airflow.utils.helpers").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.helpers"))

        (self.qry.select_module("airflow.utils.module_loading").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.module_loading"))

        (
            # Fix BaseOperatorLinks imports
            self.qry.select_module("airflow.models").is_filename(
                include=r"bigquery\.py|mlengine\.py"
            ).filter(callback=google_package_filter
                     ).filter(pure_airflow_models_filter).rename(
                         "airflow.models.baseoperator"))
        self.remove_class("GKEStartPodOperator")
        (self.qry.select_class("GKEStartPodOperator").filter(
            callback=google_package_filter).is_filename(
                include=r"example_kubernetes_engine\.py").rename(
                    "GKEPodOperator"))

    def refactor_odbc_package(self):
        """
        Fixes to "odbc" providers package.

        Copies some of the classes used from core Airflow to "common.utils" package of the
        the provider and renames imports to use them from there.

        We copy helpers.py and change import as in example diff:

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            +++ ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            @@ -37,7 +37,7 @@
                 CloudDataCatalogUpdateTagTemplateOperator,
             )
             from airflow.utils.dates import days_ago
            -from airflow.utils.helpers import chain
            +from airflow.providers.odbc.utils.helpers import chain

             default_args = {"start_date": days_ago(1)}


        """
        def odbc_package_filter(node: LN, capture: Capture,
                                filename: Filename) -> bool:
            return filename.startswith("./airflow/providers/odbc/")

        os.makedirs(os.path.join(get_target_providers_folder(), "odbc",
                                 "utils"),
                    exist_ok=True)
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("odbc"), "utils",
                         "__init__.py"))
        copy_helper_py_file(
            os.path.join(get_target_providers_package_folder("odbc"), "utils",
                         "helpers.py"))

        (self.qry.select_module("airflow.utils.helpers").filter(
            callback=odbc_package_filter).rename(
                "airflow.providers.odbc.utils.helpers"))

    def do_refactor(self, in_process: bool = False) -> None:  # noqa
        self.rename_deprecated_modules()
        self.refactor_amazon_package()
        self.refactor_google_package()
        self.refactor_odbc_package()
        self.remove_tags()
        self.remove_super_init_call()
        self.add_provide_context_to_python_operators()
        self.remove_poke_mode_only_decorator()
        # In order to debug Bowler - set in_process to True
        self.qry.execute(write=True,
                         silent=False,
                         interactive=False,
                         in_process=in_process)
def change_import_paths_to_deprecated():
    from bowler import LN, TOKEN, Capture, Filename, Query
    from fissix.pytree import Leaf
    from fissix.fixer_util import KeywordArg, Name, Comma

    def remove_tags_modifier(node: LN, capture: Capture, filename: Filename) -> None:
        for node in capture['function_arguments'][0].post_order():
            if isinstance(node, Leaf) and node.value == "tags" and node.type == TOKEN.NAME:
                if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                    node.parent.next_sibling.remove()
                node.parent.remove()

    def pure_airflow_models_filter(node: LN, capture: Capture, filename: Filename) -> bool:
        """Check if select is exactly [airflow, . , models]"""
        return len([ch for ch in node.children[1].leaves()]) == 3

    def remove_super_init_call(node: LN, capture: Capture, filename: Filename) -> None:
        for ch in node.post_order():
            if isinstance(ch, Leaf) and ch.value == "super":
                if any(c.value for c in ch.parent.post_order() if isinstance(c, Leaf)):
                    ch.parent.remove()

    def add_provide_context_to_python_operator(node: LN, capture: Capture, filename: Filename) -> None:
        fn_args = capture['function_arguments'][0]
        fn_args.append_child(Comma())

        provide_context_arg = KeywordArg(Name('provide_context'), Name('True'))
        provide_context_arg.prefix = fn_args.children[0].prefix
        fn_args.append_child(provide_context_arg)

    def remove_class(qry, class_name) -> None:
        def _remover(node: LN, capture: Capture, filename: Filename) -> None:
            if node.type not in (300, 311):  # remove only definition
                node.remove()

        qry.select_class(class_name).modify(_remover)

    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
        (
            "airflow.providers.cncf.kubernetes.operators.kubernetes_pod",
            "airflow.contrib.operators.kubernetes_pod_operator"
        ),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"),
        os.path.join(
            dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "python_virtualenv.py"
        )
    )
    (
        qry
        .select_module("airflow.utils.python_virtualenv")
        .rename("airflow.providers.google.cloud.utils.python_virtualenv")
    )
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"),
        os.path.join(
            dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "process_utils.py"
        )
    )
    (
        qry
        .select_module("airflow.utils.process_utils")
        .rename("airflow.providers.google.cloud.utils.process_utils")
    )

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix AWS import in Google Cloud Transfer Service
    (
        qry
        .select_module("airflow.providers.amazon.aws.hooks.base_aws")
        .is_filename(include=r"cloud_storage_transfer_service\.py")
        .rename("airflow.contrib.hooks.aws_hook")
    )

    (
        qry
        .select_class("AwsBaseHook")
        .is_filename(include=r"cloud_storage_transfer_service\.py")
        .filter(lambda n, c, f: n.type == 300)
        .rename("AwsHook")
    )

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(pure_airflow_models_filter).rename(
        "airflow.models.baseoperator")

    # Fix super().__init__() call in hooks
    qry.select_subclass("BaseHook").modify(remove_super_init_call)

    (
        qry.select_function("PythonOperator")
        .is_call()
        .is_filename(include=r"mlengine_operator_utils.py$")
        .modify(add_provide_context_to_python_operator)
    )

    (
        qry.select_function("BranchPythonOperator")
        .is_call()
        .is_filename(include=r"example_google_api_to_s3_transfer_advanced.py$")
        .modify(add_provide_context_to_python_operator)
    )

    # Remove new class and rename usages of old
    remove_class(qry, "GKEStartPodOperator")
    (
        qry
        .select_class("GKEStartPodOperator")
        .is_filename(include=r"example_kubernetes_engine\.py")
        .rename("GKEPodOperator")
    )

    qry.execute(write=True, silent=False, interactive=False)

    # Add old import to GKE
    gke_path = os.path.join(
        dirname(__file__), "airflow", "providers", "google", "cloud", "operators", "kubernetes_engine.py"
    )
    with open(gke_path, "a") as f:
        f.writelines(["", "from airflow.contrib.operators.gcp_container_operator import GKEPodOperator"])

    gke_path = os.path.join(
        dirname(__file__), "airflow", "providers", "google", "cloud", "operators", "kubernetes_engine.py"
    )
Пример #8
0
def change_import_paths_to_deprecated():
    from bowler import LN, TOKEN, Capture, Filename, Query
    from fissix.pytree import Leaf
    from fissix.fixer_util import KeywordArg, Name, Comma

    def remove_tags_modifier(node: LN, capture: Capture,
                             filename: Filename) -> None:
        for node in capture['function_arguments'][0].post_order():
            if isinstance(
                    node,
                    Leaf) and node.value == "tags" and node.type == TOKEN.NAME:
                if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                    node.parent.next_sibling.remove()
                node.parent.remove()

    def pure_airflow_models_filter(node: LN, capture: Capture,
                                   filename: Filename) -> bool:
        """Check if select is exactly [airflow, . , models]"""
        return len([ch for ch in node.children[1].leaves()]) == 3

    def remove_super_init_call(node: LN, capture: Capture,
                               filename: Filename) -> None:
        for ch in node.post_order():
            if isinstance(ch, Leaf) and ch.value == "super":
                if any(c.value for c in ch.parent.post_order()
                       if isinstance(c, Leaf)):
                    ch.parent.remove()

    def add_provide_context_to_python_operator(node: LN, capture: Capture,
                                               filename: Filename) -> None:
        fn_args = capture['function_arguments'][0]
        fn_args.append_child(Comma())

        provide_context_arg = KeywordArg(Name('provide_context'), Name('True'))
        provide_context_arg.prefix = fn_args.children[0].prefix
        fn_args.append_child(provide_context_arg)

    def remove_class(qry, class_name) -> None:
        def _remover(node: LN, capture: Capture, filename: Filename) -> None:
            if node.type == 300:
                for ch in node.post_order():
                    if isinstance(ch, Leaf) and ch.value == class_name:
                        if ch.next_sibling and ch.next_sibling.value == ",":
                            ch.next_sibling.remove()
                        ch.remove()
            elif node.type == 311:
                node.parent.remove()
            else:
                node.remove()

        qry.select_class(class_name).modify(_remover)

    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "python_virtualenv.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "python_virtualenv.py"))
    (qry.select_module("airflow.utils.python_virtualenv").rename(
        "airflow.providers.google.cloud.utils.python_virtualenv"))
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "process_utils.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "process_utils.py"))
    (qry.select_module("airflow.utils.process_utils").rename(
        "airflow.providers.google.cloud.utils.process_utils"))

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix KubernetesPodOperator imports to use old path
    qry.select_module(
        "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename(
            "airflow.contrib.operators.kubernetes_pod_operator")

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(
        pure_airflow_models_filter).rename("airflow.models.baseoperator")

    # Fix super().__init__() call in hooks
    qry.select_subclass("BaseHook").modify(remove_super_init_call)

    (qry.select_function("PythonOperator").is_call().is_filename(
        include=r"mlengine_operator_utils.py$").modify(
            add_provide_context_to_python_operator))

    remove_class(qry, "GKEStartPodOperator")

    qry.execute(write=True, silent=False, interactive=False)
Пример #9
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Converts GDAL's test assertions to be pytest-style where possible.")
    parser.add_argument(
        "--no-input",
        dest="interactive",
        default=True,
        action="store_false",
        help="Non-interactive mode",
    )
    parser.add_argument(
        "--no-write",
        dest="write",
        default=True,
        action="store_false",
        help=
        "Don't write the changes to the source file, just output a diff to stdout",
    )
    parser.add_argument(
        "--debug",
        default=False,
        action="store_true",
        help="Spit out debugging information",
    )
    parser.add_argument(
        "--silent",
        default=False,
        action="store_true",
        help="Don't spit out a diff, just write changes to files",
    )
    parser.add_argument("--step",
                        default=False,
                        action="store",
                        type=int,
                        help="Which step to run")
    parser.add_argument("files",
                        nargs="+",
                        help="The python source file(s) to operate on.")
    args = parser.parse_args()

    # No way to pass this to .modify() callables, so we just set it at module level
    flags["debug"] = args.debug

    query = Query(*args.files)

    steps = {
        # Rename all tests `test_*`
        0:
        lambda q: q.select("""
            expr_stmt< "gdaltest_list" "=" atom< "["
                testnames=( listmaker | NAME )
            "]" > >
            """).modify(rename_tests),
        # `if x() != 'success'` --> `x()` (the 'success' return value gets removed further down)
        1:
        lambda q: q.select("""
            if_stmt<
                "if" comparison<
                    x=any "!=" ( "'success'" | '"success"' )
                > ":"
                suite<
                    any any
                    [
                        simple_stmt<
                            power<
                                (
                                    "gdaltest" trailer< "." "post_reason" >
                                |
                                    "post_reason"
                                )
                                trailer< "(" reason=( "'failure'" | "'fail'" | "'failed'" | '"fail"' | '"failed"' | '"failure"' ) ")" >
                            >
                            any
                        >
                    ]
                    simple_stmt<
                        return_stmt< "return" returntype=any >
                        any
                    >
                    dedent=any
                >
            >
            """).modify(callback=remove_success_expectations),
        # Remove useless `post_reason('fail')` calls
        2:
        lambda q: q.select("""
            simple_stmt<
                power<
                    (
                        "gdaltest" trailer< "." "post_reason" >
                    |
                        "post_reason"
                    )
                    trailer< "(" reason=STRING ")" >
                >
                any
            >
        """).modify(callback=remove_useless_post_reason_calls),
        # Turn basic if/post_reason clauses into assertions
        3:
        lambda q: q.select(f"""
            if_stmt<
                "if" condition=any ":"
                suite<
                    any any
                    reason_candidates=(
                        simple_stmt<
                            (
                                power<
                                    (
                                        "gdaltest" trailer< "." "post_reason" >
                                    |
                                        "post_reason"
                                    )
                                    trailer
                                >
                            |
                                power<
                                    "print"
                                    trailer
                                >
                            )
                            any
                        >
                    )*
                    simple_stmt<
                        return_call=return_stmt< "return" returntype=STRING >
                        any
                    >
                    dedent=any
                >
            >
        """.format(
            print=
            '''print=simple_stmt< power< "print" trailer< "(" reason=any ")" > > any >'''
        )).modify(callback=gdaltest_fail_reason_to_assert),
        # Replace further post_reason calls and skip/fail returns
        4:
        lambda q: (
            q.select("""
                    any<
                        any*
                        post_reason_call=simple_stmt<
                            power<
                                (
                                    "gdaltest" trailer< "." "post_reason" >
                                |
                                    "post_reason"
                                )
                                trailer< "(" reason=any ")" >
                            >
                            any
                        >
                        any*
                        return_stmt=simple_stmt<
                            return_call=return_stmt< "return" returntype=STRING >
                            any
                        >
                        any*
                    >
                """).modify(callback=gdaltest_other_skipfails)
            # (still part of step 4)
            # same as above, but get the reason from `print(reason)`
            # if we didn't find a post_reason clause.
            # (and, now, the reason is optional)
            .select("""
                    any<
                        any*
                        [
                            post_reason_call=simple_stmt<
                                power<
                                    (
                                        "print"
                                    )
                                    trailer< "(" reason=any ")" >
                                >
                                any
                            >
                            any*
                        ]
                        return_stmt=simple_stmt<
                            return_call=return_stmt< "return" returntype=STRING >
                            any
                        >
                        any*
                    >
                """).modify(callback=gdaltest_other_skipfails)),
        # Remove all `return 'success'`, or convert ternary ones to asserts.
        5:
        lambda q: q.select("""
            simple_stmt<
                return_call=return_stmt< "return"
                    (
                        test<
                            true_result=STRING "if" comparison=any "else" false_result=STRING
                        >
                    |
                        returnvalue=STRING
                    )
                >
                any
            >
            """).modify(callback=remove_return_success),
        # Remove gdaltest_list from each test module
        6:
        lambda q: q.select("""
            simple_stmt<
                expr_stmt< "gdaltest_list" "=" atom< "["
                    [ testnames=( listmaker | NAME ) ]
                "]" > >
                any
            >
            """).modify(remove_test_lists),
        # Remove the __main__ block from each test module
        7:
        lambda q: q.select("""
            if_stmt<
                "if"
                comparison< "__name__" "==" "'__main__'" >
                any*
            >
            """).modify(remove_main_block),
        # Find pytest.fail() inside `try` blocks
        # where the 'except' bit is just "pass",
        # and turn them into `with pytest.raises(...)` blocks
        8:
        lambda q: q.select("""
            try_stmt<
                "try" ":"
                try_suite=suite<
                    any any
                    any*
                    fail_stmt=simple_stmt<
                        power<
                            "pytest"
                            trailer< "." "fail" >
                            trailer< "(" reason=any* ")" >
                        >
                        any
                    >
                    any
                >
                ("except" | except_clause< "except" exc_class=NAME any* > ) ":"
                suite<
                    any any
                    simple_stmt<
                        "pass"
                        any
                    >
                    dedent=any
                >
            >
            """).modify(make_pytest_raises_blocks),
    }

    if args.step is not None:
        query = steps[args.step](query)
    else:
        for i in sorted(steps.keys()):
            query = steps[i](query)

    query.execute(
        # interactive diff implies write (for the bits the user says 'y' to)
        interactive=(args.interactive and args.write),
        write=args.write,
        silent=args.silent,
    )