Пример #1
0
    def update_datajob_stack_resources(self, resource: object) -> None:
        """add  a DataJob resource to the DataJob stack resources variable.

        Args:
            resource: A DataJobBase implementation. we cannot reference it here explicitly
            in the typing, because then we  have a circular dependency conflict.

        Returns: None
        """
        logger.info(f"adding job {self} to stack workflow resources")
        self.resources.append(resource)
Пример #2
0
 def __init__(self, datajob_stack, name, **kwargs):
     super().__init__(datajob_stack, name, **kwargs)
     assert isinstance(
         datajob_stack, DataJobStack
     ), f"we expect the scope argument to be of type {DataJobStack}"
     self.name = name
     self.project_root = datajob_stack.project_root
     self.stage = datajob_stack.stage
     self.unique_name = f"{datajob_stack.unique_stack_name}-{self.name}"
     self.context = datajob_stack.context
     logger.info(f"adding job {self} to stack workflow resources")
     datajob_stack.resources.append(self)
Пример #3
0
 def __init__(
     self,
     datajob_stack: core.Construct,
     name: str,
     job_path: str,
     job_type: str = GlueJobType.PYTHONSHELL.value,
     glue_version: str = None,
     max_capacity: int = None,
     arguments: dict = None,
     python_version: str = "3",
     role: iam.Role = None,
     worker_type: str = None,
     number_of_workers: int = None,
     *args,
     **kwargs,
 ):
     """
     :param datajob_stack: aws cdk core construct object.
     :param name: a name for this glue job (will appear on the glue console).
     :param job_path: the path to the glue job relative to the project root.
     :param job_type: choose pythonshell for plain python / glueetl for a spark cluster. pythonshell is the default.
     :param glue_version: at the time of writing choose 1.0 for pythonshell / 2.0 for spark.
     :param max_capacity: max nodes we want to run.
     :param arguments: the arguments as a dict for this glue job.
     :param python_version: 3 is the default
     :param role: you can provide a cdk iam role object as arg. if not provided this class will instantiate a role,
     :param worker_type: you can provide a worker type Standard / G.1X / G.2X
     :param number_of_workers: for pythonshell is this 0.0625 or 1. for glueetl is this minimum 2.
     :param args: any extra args for the glue.CfnJob
     :param kwargs: any extra kwargs for the glue.CfnJob
     """
     logger.info(f"creating glue job {name}")
     super().__init__(datajob_stack, name, **kwargs)
     self.job_path = GlueJob._get_job_path(self.project_root, job_path)
     self.arguments = arguments or {}
     self.job_type = GlueJob._get_job_type(job_type=job_type)
     self.python_version = python_version
     self.glue_version = GlueJob._get_glue_version(
         glue_version=glue_version, job_type=job_type)
     self.max_capacity = max_capacity
     self.role = self._get_role(role, self.unique_name)
     self.worker_type = worker_type
     self.number_of_workers = number_of_workers
     self.args = args
     self.kwargs = kwargs
     logger.info(f"glue job {name} created.")
Пример #4
0
def _execute_packaging_logic(project_root: str, config_file: str, cmd: str) -> None:
    """
    check if the config file exists in the project root and execute the command to
    create a wheel.
    :param project_root: the path to the root of your project.
    :param config_file: the confgi file to package the project as a wheel (setup.py or pyproject.toml)
    :param cmd: the command to execute to create a wheel.
    :return: None
    """
    config_file_full_path = Path(project_root, config_file)
    logger.info(f"expecting {config_file_full_path}")
    if not config_file_full_path.is_file():
        raise DatajobPackageWheelError(
            f"no {config_file} file detected in project root {project_root}. "
            f"Hence we cannot create a python wheel for this project"
        )

    logger.debug(f"found a {config_file} file in {project_root}")
    call_subprocess(cmd=cmd)
Пример #5
0
    def __init__(
        self,
        scope: core.Construct,
        unique_stack_name: str,
        project_root: str = None,
        include_folder: str = None,
        **kwargs,
    ) -> None:
        """
        :param scope: aws cdk core construct object.
        :param unique_stack_name: a unique name for this stack. like this the name of our resources will not collide with other deployments.
        :param project_root: the path to the root of this project
        :param include_folder: specify the name of the folder we would like to include in the deployment bucket.
        """
        logger.info("creating datajob context.")
        super().__init__(scope, unique_stack_name, **kwargs)
        self.project_root = project_root
        self.unique_stack_name = unique_stack_name
        (
            self.deployment_bucket,
            self.deployment_bucket_name,
        ) = self._create_deployment_bucket(self.unique_stack_name)
        (self.data_bucket, self.data_bucket_name) = self._create_data_bucket(
            self.unique_stack_name)
        self.s3_url_wheel = None
        if self.project_root:
            self.s3_url_wheel = self._deploy_wheel(
                self.unique_stack_name,
                self.project_root,
                self.deployment_bucket,
                self.deployment_bucket_name,
            )

        if include_folder:
            self._deploy_local_folder(include_folder)
        logger.info("datajob context created.")
Пример #6
0
    def __init__(
        self,
        scope: core.Construct,
        project_root: str = None,
        include_folder: str = None,
        **kwargs,
    ) -> None:
        """
        :param scope: aws cdk core construct object.
        :param stage: stage from DataJobStack.
        :param project_root: the path to the root of this project
        :param include_folder: specify the name of the folder we would like to include in the deployment bucket.
        """
        logger.info("creating datajob context.")
        self.unique_stack_name = scope.unique_stack_name
        super().__init__(scope, self.unique_stack_name, **kwargs)
        self.stage = scope.stage
        self.bucket_suffix = None
        self.project_root = project_root
        (
            self.deployment_bucket,
            self.deployment_bucket_name,
        ) = self._create_deployment_bucket()
        (self.data_bucket, self.data_bucket_name) = self._create_data_bucket()
        self.s3_url_wheel = None
        if self.project_root:
            self.s3_url_wheel = self._deploy_wheel(
                self.unique_stack_name,
                self.project_root,
                self.deployment_bucket,
                self.deployment_bucket_name,
            )

        if include_folder:
            self._deploy_local_folder(include_folder)
        logger.info("datajob context created.")
 def __exit__(self, exc_type, exc_value, traceback) -> None:
     """steps we have to do when exiting the context manager."""
     self.build_workflow()
     _set_workflow(None)
     logger.info(f"step functions workflow {self.unique_name} created")
 def __enter__(self):
     """first steps we have to do when entering the context manager."""
     logger.info(f"creating step functions workflow for {self.unique_name}")
     _set_workflow(self)
     return self