def execute(self, context, **kwargs):
     self._oci_hook = OCIObjectStorageHook(
         compartment_id=self.compartment_id,
         bucket_name=self.bucket_name,
         oci_conn_id=self.oci_conn_id,
         namespace_name=self.namespace_name)
     client = self._oci_hook.get_client(self.oci_client)
     self.log.info("Validating OCI Config")
     self._oci_hook.validate_config()
     if not self.namespace_name:
         self.namespace_name = self._oci_hook.get_namespace()
     details = oci.object_storage.models.CreateBucketDetails(
         compartment_id=self.compartment_id, name=self.bucket_name)
     self.log.info("Checking if Bucket {} exists".format(self.bucket_name))
     bucket_exists = self._oci_hook.check_for_bucket(
         namespace_name=self.namespace_name, bucket_name=self.bucket_name)
     if bucket_exists is True:
         self.log.info("Bucket {0} exists, skipping creation".format(
             self.bucket_name))
     else:
         self.log.info("Creating Bucket {0} in {1}".format(
             self.bucket_name, self.namespace_name))
         client.create_bucket(namespace_name=self.namespace_name,
                              create_bucket_details=details,
                              **kwargs)
         self.log.info("Create bucket complete")
 def execute(self, context, **kwargs):
     try:
         self._oci_hook = OCIDBHook(compartment_ocid=self.compartment_id,
                                    db_name=self.db_name,
                                    db_workload=self.db_workload,
                                    tns_admin_root=self.tns_admin_root,
                                    wallet_location=self.wallet_location)
         self._oci_storage_hook = OCIObjectStorageHook(
             compartment_id=self.compartment_id,
             bucket_name=self.bucket_name)
         self.log.info("Relocalizing sqlnet.ora")
         self._oci_hook.relocalize_sqlnet()
         self.log.info("Sqlnet.ora relocalized to {0}".format(
             self.tns_admin_root))
         self.log.info("Establishing DB Connection")
         with self._oci_hook.connect_sqlalchemy(
                 dsn=self.dsn, user=self.user_id,
                 password=self.password) as conn:
             namespace = self._oci_storage_hook.get_namespace(
                 compartment_id=self.compartment_id)
             object_contents = self._oci_storage_hook.read_from_bucket(
                 bucket_name=self.bucket_name,
                 namespace_name=namespace,
                 object_name=self.object_name)
             dff = pd.DataFrameFactory(conn)
             dff.write(object_contents,
                       name=self.object_name,
                       if_exists='replace')
     except AirflowException as e:
         self.log.error(e.response["Error"]["Message"])
 def execute(self, context, **kwargs):
     self._oci_hook = OCIObjectStorageHook(
         compartment_id=self.compartment_id,
         bucket_name=self.bucket_name,
         oci_conn_id=self.oci_conn_id)
     client = self._oci_hook.get_client(self.oci_client)
     self.log.info("Validating OCI Config")
     self._oci_hook.validate_config()
     if not self.namespace_name:
         self.namespace_name = self._oci_hook.get_namespace()
     self.log.info("Checking if {0} exists in {1}".format(
         self.object_name, self.bucket_name))
     object_exists = self._oci_hook.check_for_object(
         namespace_name=self.namespace_name,
         bucket_name=self.bucket_name,
         object_name=self.object_name,
         **kwargs)
     if object_exists is True:
         self.log.info("Reading {0} from {1}".format(
             self.object_name, self.bucket_name))
         return client.get_object(namespace_name=self.namespace_name,
                                  object_name=self.object_name,
                                  bucket_name=self.bucket_name,
                                  **kwargs)
     else:
         raise AirflowException("{0} does not exist in {1}".format(
             self.object_name, self.bucket_name))
Exemplo n.º 4
0
class CopyFromOCIObjectStorage(BaseOperator):
    """
    Copy object from OCI object store

    :param bucket_name: Name of target bucket
    :type bucket_name: str
    :param compartment_ocid: Compartment ID
    :type compartment_id: str
    :param object_name: Object name to create in object store
    :type object_name: str
    :param put_object_body: Contents of object_name
    :type put_object_body: stream
    :param oci_conn_id: Airflow connection ID
    :type oci_conn_id: str
    """
    @apply_defaults
    def __init__(self,
                 bucket_name: str,
                 compartment_id: str,
                 object_name: str,
                 oci_conn_id: Optional[str] = "oci_default",
                 *args,
                 **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.bucket_name = bucket_name
        self.compartment_id = compartment_id
        self.object_name = object_name
        self.oci_conn_id = oci_conn_id
        self._oci_hook = None
        self.oci_client = oci.object_storage.ObjectStorageClient

    def execute(self, context, **kwargs):
        self._oci_hook = OCIObjectStorageHook(
            compartment_id=self.compartment_id,
            bucket_name=self.bucket_name,
            oci_conn_id=self.oci_conn_id)
        client = self._oci_hook.get_client(self.oci_client)
        self.log.info("Validating OCI Config")
        self._oci_hook.validate_config()
        namespace = self._oci_hook.get_namespace()
        self.log.info("Checking if {0} exists in {1}".format(
            self.object_name, self.bucket_name))
        object_exists = self._oci_hook.check_for_object(
            namespace_name=namespace,
            bucket_name=self.bucket_name,
            object_name=self.object_name,
            **kwargs)
        if object_exists is True:
            self.log.info("Reading {0} from {1}".format(
                self.object_name, self.bucket_name))
            return client.get_object(namespace_name=namespace,
                                     object_name=self.object_name,
                                     bucket_name=self.bucket_name,
                                     **kwargs)
        else:
            raise AirflowException("{0} does not exist in {1}".format(
                self.object_name, self.bucket_name))
class MakeBucket(BaseOperator):
    """
    Create a Bucket in OCI object store

    :param bucket_name: Name of bucket
    :type bucket_name: str
    :param compartment_ocid: Compartment ID
    :type compartment_id: str
    :param namespace_name: Object storage namespace
    :type namespace_name: str
    :param oci_conn_id: Airflow connection ID
    :type oci_conn_id: str
    """
    @apply_defaults
    def __init__(self,
                 bucket_name: str,
                 compartment_ocid: str,
                 namespace_name: Optional[str] = None,
                 oci_conn_id: Optional[str] = "oci_default",
                 *args,
                 **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.bucket_name = bucket_name
        self.compartment_id = compartment_ocid
        self.namespace_name = namespace_name
        self.oci_conn_id = oci_conn_id
        self._oci_hook = None
        self.oci_client = oci.object_storage.ObjectStorageClient

    def execute(self, context, **kwargs):
        self._oci_hook = OCIObjectStorageHook(
            compartment_id=self.compartment_id,
            bucket_name=self.bucket_name,
            oci_conn_id=self.oci_conn_id,
            namespace_name=self.namespace_name)
        client = self._oci_hook.get_client(self.oci_client)
        self.log.info("Validating OCI Config")
        self._oci_hook.validate_config()
        if not self.namespace_name:
            self.namespace_name = self._oci_hook.get_namespace()
        details = oci.object_storage.models.CreateBucketDetails(
            compartment_id=self.compartment_id, name=self.bucket_name)
        self.log.info("Checking if Bucket {} exists".format(self.bucket_name))
        bucket_exists = self._oci_hook.check_for_bucket(
            namespace_name=self.namespace_name, bucket_name=self.bucket_name)
        if bucket_exists is True:
            self.log.info("Bucket {0} exists, skipping creation".format(
                self.bucket_name))
        else:
            self.log.info("Creating Bucket {0} in {1}".format(
                self.bucket_name, self.namespace_name))
            client.create_bucket(namespace_name=self.namespace_name,
                                 create_bucket_details=details,
                                 **kwargs)
            self.log.info("Create bucket complete")
 def execute(self, context, **kwargs):
     self._oci_hook = OCIObjectStorageHook(
         compartment_id=self.compartment_id,
         bucket_name=self.bucket_name,
         oci_conn_id=self.oci_conn_id)
     client = self._oci_hook.get_client(self.oci_client)
     self.log.info("Validating OCI Config")
     self._oci_hook.validate_config()
     if not self.namespace_name:
         self.namespace_name = self._oci_hook.get_namespace()
     details = oci.object_storage.models.CreateBucketDetails(
         compartment_id=self.compartment_id, name=self.bucket_name)
     self.log.info("Checking if Bucket {} exists".format(self.bucket_name))
     bucket_exists = self._oci_hook.check_for_bucket(
         namespace_name=self.namespace_name, bucket_name=self.bucket_name)
     if bucket_exists is True:
         self.log.info("Bucket {0} exists, skipping creation".format(
             self.bucket_name))
     else:
         self.log.info("Creating Bucket {0} in {1}".format(
             self.bucket_name, self.namespace_name))
         client.create_bucket(namespace_name=self.namespace_name,
                              create_bucket_details=details)
         self.log.info("Create bucket complete")
     self.log.info("Checking if {0} exists in {1}".format(
         self.object_name, self.bucket_name))
     object_exists = self._oci_hook.check_for_object(
         namespace_name=self.namespace_name,
         bucket_name=self.bucket_name,
         object_name=self.object_name)
     if object_exists is True:
         self.log.info("Object {0} exists already in {1}".format(
             self.object_name, self.bucket_name))
     else:
         self.log.info("Validating local file {0} exists".format(
             self.object_name))
         if path.exists(self.local_file_path) is True:
             self.local_file = self.local_file_path + self.object_name
             if path.exists(self.local_file) is True:
                 self.log.info("Copying {0} to {1}".format(
                     self.local_file, self.bucket_name))
                 self.put_object_body = open(self.local_file, 'rb')
                 self._oci_hook.copy_to_bucket(
                     bucket_name=self.bucket_name,
                     namespace_name=self.namespace_name,
                     object_name=self.object_name,
                     put_object_body=self.put_object_body,
                     **kwargs)
             else:
                 self.log.error("Local file {0} does not exist".format(
                     self.local_file))
         else:
             self.log.error("Local file path {0} does not exist".format(
                 self.local_file_path))
Exemplo n.º 7
0
 def get_oci_hook(self):
     """
     Create and return OCI Hook
     :return:
     """
     if not self._oci_hook:
         self._oci_hook = OCIObjectStorageHook(
             bucket_name=self.bucket_name,
             compartment_id=self.compartment_id,
             oci_conn_id=self.oci_conn_id,
             verify=self.verify)
     return self._oci_hook
Exemplo n.º 8
0
class OCIDBCopyFromObject(BaseOperator):
    """
    Copy data from a file in Object Storage into OCI ADB/ADW
    :param compartment_id: Target compartment OCID
    :type compartment_id: str
    :param tns_admin_root: The wallet root directory.  The wallet will be loaded from $TNS_ADMIN/sqlnet.ora.
    If you do not set tns_admin_root, it is assumed to be in your environment.
    :type tns_admin_root: str
    :param database_ocid:  Database ID
    :type database_ocid: str
    :param db_workload: DB Workload type, valid options are DW or OLTP
    :type str:
    :param db_name: Databse Name (Not display)
    :type db_name: str
    :param debug: Whether to display debug output
    :type debug: bool
    :param dsn: DSN (TNS Name) for connection
    :type dsn: str
    :param oci_conn_id: Airflow connection ID
    :type oci_conn_id: str
    :param oci_region: Target OCI Region
    :type oci_region: str
    :param password: Database password for user_id
    :type password: str
    :param user_id: User ID for Database login
    :type user_id: str
    :param wallet_location: Filesystem location for wallet files
    :param wallet_location: str
    """

    @apply_defaults
    def __init__(self,
                 compartment_ocid: str,
                 bucket_name: str,
                 object_name: str,
                 tns_admin_root: Optional[str] = None,
                 database_ocid: Optional[str] = None,
                 db_workload: Optional[str] = None,
                 db_name: Optional[str] = None,
                 debug: Optional[bool] = False,
                 dsn: Optional[str] = None,
                 oci_conn_id: Optional[str] = "oci_default",
                 oci_region: Optional[str] = None,
                 password: Optional[str] = None,
                 user_id: Optional[str] = None,
                 wallet_location: Optional[str] = None,
                 *args,
                 **kwargs):
        super(OCIDBCopyFromObject, self).__init__(*args, **kwargs)
        self.compartment_id = compartment_ocid
        self.bucket_name = bucket_name
        self.object_name = object_name
        self.tns_admin_root = tns_admin_root
        self.database_id = database_ocid
        self.db_workload = db_workload
        self.db_name = db_name
        self.debug = debug
        self.dsn = dsn
        self.oci_conn_id = oci_conn_id
        self.oci_region = oci_region
        self.password = password
        self.user_id = user_id
        self.wallet_location = wallet_location
        self._oci_hook = None
        self._oci_storage_hook = None
        self.oci_client = oci.database.DatabaseClient

    def execute(self, context, **kwargs):
        try:
            self._oci_hook = OCIDBHook(compartment_ocid=self.compartment_id, db_name=self.db_name,
                                       db_workload=self.db_workload, tns_admin_root=self.tns_admin_root,
                                       wallet_location=self.wallet_location)
            self._oci_storage_hook = OCIObjectStorageHook(compartment_id=self.compartment_id,
                                                          bucket_name=self.bucket_name)
            self.log.info("Relocalizing sqlnet.ora")
            self._oci_hook.relocalize_sqlnet()
            self.log.info("Sqlnet.ora relocalized to {0}".format(self.tns_admin_root))
            self.log.info("Establishing DB Connection")
            with self._oci_hook.connect_sqlalchemy(dsn=self.dsn, user=self.user_id, password=self.password) as conn:
                namespace = self._oci_storage_hook.get_namespace(compartment_id=self.compartment_id)
                object_contents = self._oci_storage_hook.read_from_bucket(bucket_name=self.bucket_name,
                                                                          namespace_name=namespace,
                                                                          object_name=self.object_name)
                dff = pd.DataFrameFactory(conn)
                dff.write(object_contents, name=self.object_name, if_exists='replace')
        except AirflowException as e:
            self.log.error(e.response["Error"]["Message"])
class CopyFileToOCIObjectStorageOperator(BaseOperator):
    """
    Copy local file to OCI object store

    :param bucket_name: Name of bucket
    :type bucket_name: str
    :param compartment_ocid: Compartment ID
    :type compartment_id: str
    :param object_name: Object name - must match local file
    :type object_name: str
    :param local_file_path: Path to local file
    :type local_file_path: str
    :param namespace_name: Object storage namespace
    :type namespace_name: str
    :param oci_conn_id: Airflow connection ID
    :type oci_conn_id: str
    :param overwrite: Overwrite files if they exist
    :type overwrite: bool
    """
    @apply_defaults
    def __init__(self,
                 bucket_name: str,
                 compartment_ocid: str,
                 object_name: str,
                 local_file_path: str,
                 namespace_name: Optional[str] = None,
                 oci_conn_id: Optional[str] = "oci_default",
                 overwrite: Optional[bool] = False,
                 *args,
                 **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.bucket_name = bucket_name
        self.compartment_id = compartment_ocid
        self.namespace_name = namespace_name
        self.object_name = object_name
        self.local_file_path = local_file_path
        self.oci_conn_id = oci_conn_id
        self.overwrite = overwrite
        self._oci_hook = None
        self.oci_client = oci.object_storage.ObjectStorageClient

    def execute(self, context, **kwargs):
        self._oci_hook = OCIObjectStorageHook(
            compartment_id=self.compartment_id,
            bucket_name=self.bucket_name,
            oci_conn_id=self.oci_conn_id)
        client = self._oci_hook.get_client(self.oci_client)
        self.log.info("Validating OCI Config")
        self._oci_hook.validate_config()
        if not self.namespace_name:
            self.namespace_name = self._oci_hook.get_namespace()
        details = oci.object_storage.models.CreateBucketDetails(
            compartment_id=self.compartment_id, name=self.bucket_name)
        self.log.info("Checking if Bucket {} exists".format(self.bucket_name))
        bucket_exists = self._oci_hook.check_for_bucket(
            namespace_name=self.namespace_name, bucket_name=self.bucket_name)
        if bucket_exists is True:
            self.log.info("Bucket {0} exists, skipping creation".format(
                self.bucket_name))
        else:
            self.log.info("Creating Bucket {0} in {1}".format(
                self.bucket_name, self.namespace_name))
            client.create_bucket(namespace_name=self.namespace_name,
                                 create_bucket_details=details)
            self.log.info("Create bucket complete")
        self.log.info("Checking if {0} exists in {1}".format(
            self.object_name, self.bucket_name))
        object_exists = self._oci_hook.check_for_object(
            namespace_name=self.namespace_name,
            bucket_name=self.bucket_name,
            object_name=self.object_name)
        if object_exists is True:
            if self.overwrite is True:
                self.log.info("Validating local file {0} exists".format(
                    self.object_name))
                if path.exists(self.local_file_path) is True:
                    self.local_file = self.local_file_path + self.object_name
                    if path.exists(self.local_file) is True:
                        self.log.info("Copying {0} to {1}".format(
                            self.local_file, self.bucket_name))
                        self.put_object_body = open(self.local_file, 'rb')
                        self._oci_hook.copy_to_bucket(
                            bucket_name=self.bucket_name,
                            namespace_name=self.namespace_name,
                            object_name=self.object_name,
                            put_object_body=self.put_object_body,
                            **kwargs)
                    else:
                        self.log.error("Local file {0} does not exist".format(
                            self.local_file))
                else:
                    self.log.error("Local file path {0} does not exist".format(
                        self.local_file_path))
            else:
                self.log.info("Object {0} exists already in {1}".format(
                    self.object_name, self.bucket_name))
        else:
            self.log.info("Validating local file {0} exists".format(
                self.object_name))
            if path.exists(self.local_file_path) is True:
                self.local_file = self.local_file_path + self.object_name
                if path.exists(self.local_file) is True:
                    self.log.info("Copying {0} to {1}".format(
                        self.local_file, self.bucket_name))
                    self.put_object_body = open(self.local_file, 'rb')
                    self._oci_hook.copy_to_bucket(
                        bucket_name=self.bucket_name,
                        namespace_name=self.namespace_name,
                        object_name=self.object_name,
                        put_object_body=self.put_object_body,
                        **kwargs)
                else:
                    self.log.error("Local file {0} does not exist".format(
                        self.local_file))
            else:
                self.log.error("Local file path {0} does not exist".format(
                    self.local_file_path))
Exemplo n.º 10
0
 def execute(self, context):
     self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id,
                                      oci_conn_id=self.oci_conn_id,
                                      display_name=self.display_name)
     client = self._oci_hook.get_client(oci.data_flow.DataFlowClient)
     self.log.info("Validating OCI Config")
     self._oci_hook.validate_config()
     if not self.timeout:
         self.timeout = float('inf')
     if not self.check_interval:
         self.check_interval = 30
     if not self.executor_shape:
         self.executor_shape = 'VM.Standard2.1'
     if not self.num_executors:
         self.num_executors = 1
     if not self.driver_shape:
         self.driver_shape = self.executor_shape
     if not self.warehouse_bucket_uri:
         self.namespace = OCIObjectStorageHook(
             compartment_id=self.compartment_id,
             oci_conn_id=self.oci_conn_id,
             bucket_name=self.bucket_name).get_namespace()
         self.warehouse_bucket_uri = "oci://" + str(
             self.bucket_name) + "@" + str(self.namespace) + "/"
     if not self.application_id:
         self.application_id = OCIDataFlowHook(
             compartment_ocid=self.compartment_id,
             oci_conn_id=self.oci_conn_id,
             display_name=self.display_name).get_application_ocid()
     run_details = {
         "application_id": self.application_id,
         "compartment_id": self.compartment_id,
         "display_name": self.display_name,
         "executor_shape": self.executor_shape,
         "num_executors": self.num_executors,
         "driver_shape": self.driver_shape,
         "warehouse_bucket_uri": self.warehouse_bucket_uri,
         "logs_bucket_uri": self.logs_bucket_uri,
         "arguments": self.arguments,
         "parameters": self.parameters,
     }
     if self.runtime_callback is not None:
         callback_settings = self.runtime_callback(context)
         run_details = {**run_details, **callback_settings}
     dataflow_run = oci.data_flow.models.CreateRunDetails(**run_details)
     try:
         submit_run = DataFlowClientCompositeOperations(client)
         response = submit_run.create_run_and_wait_for_state(
             create_run_details=dataflow_run,
             wait_for_states=["CANCELED", "SUCCEEDED", "FAILED"],
             waiter_kwargs={
                 "max_interval_seconds": self.check_interval,
                 "max_wait_seconds": self.timeout
             })
         if response.data.lifecycle_state != "SUCCEEDED":
             self.log.error(response.data.lifecycle_details)
             raise AirflowException(response.data.lifecycle_details)
         if self.log_run_output:
             try:
                 log_contents = client.get_run_log(
                     run_id=response.data.id,
                     name="spark_application_stdout.log.gz")
                 self.log.info("Data Flow Run Output:")
                 self.log.info(log_contents.data.text)
             except:
                 self.log.info(
                     "Unable to fetch Run logs. This can be due to a missing IAM policy"
                 )
                 self.log.info(
                     "Data Flow needs a policy like \"allow service dataflow to read objects in tenancy where target.bucket.name='<bucket>'\" to read your logs"
                 )
                 self.log.info(
                     "See https://docs.cloud.oracle.com/en-us/iaas/data-flow/using/dfs_getting_started.htm#set_up_admin for more information"
                 )
     except oci.exceptions.CompositeOperationError as e:
         self.log.error(str(e.cause))
         raise e
Exemplo n.º 11
0
 def execute(self, context):
     self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id,
                                      oci_conn_id=self.oci_conn_id,
                                      display_name=self.display_name)
     client = self._oci_hook.get_client(oci.data_flow.DataFlowClient)
     self.log.info("Validating OCI Config")
     self._oci_hook.validate_config()
     if not self.timeout:
         self.timeout = float('inf')
     if not self.check_interval:
         self.check_interval = 30
     if not self.executor_shape:
         self.executor_shape = 'VM.Standard2.1'
     if not self.num_executors:
         self.num_executors = 1
     if not self.driver_shape:
         self.driver_shape = self.executor_shape
     if not self.file_uri:
         self.namespace = OCIObjectStorageHook(
             compartment_id=self.compartment_id,
             oci_conn_id=self.oci_conn_id,
             bucket_name=self.bucket_name).get_namespace()
         self.file_uri = "oci://" + str(self.bucket_name) + "@" + str(
             self.namespace) + "/" + str(self.object_name)
         self.log.info("File URI: {0}".format(self.file_uri))
     if not self.language:
         self.log.error("Application Language must be set")
     if not self.spark_version:
         self.spark_version = '2.4.4'
     app_details = {
         "compartment_id": self.compartment_id,
         "display_name": self.display_name,
         "driver_shape": self.driver_shape,
         "executor_shape": self.executor_shape,
         "file_uri": self.file_uri,
         "language": self.language,
         "num_executors": self.num_executors,
         "spark_version": self.spark_version
     }
     dataflow_create = \
         oci.data_flow.models.CreateApplicationDetails(compartment_id=app_details["compartment_id"],
                                                       display_name=app_details["display_name"],
                                                       driver_shape=app_details["driver_shape"],
                                                       executor_shape=app_details["executor_shape"],
                                                       file_uri=app_details["file_uri"],
                                                       language=app_details["language"],
                                                       num_executors=app_details["num_executors"],
                                                       spark_version=app_details["spark_version"]
                                                       )
     try:
         print("Checking if Application {0} exists".format(
             self.display_name))
         appcheck = self._oci_hook.check_for_application_by_name()
         if appcheck is True:
             self.log.error("Application {0} already exists".format(
                 self.display_name))
         else:
             print("Creating DataFlow Application {0}".format(
                 self.display_name))
             create_app = DataFlowClientCompositeOperations(client)
             create_app.create_application_and_wait_for_state(
                 create_application_details=dataflow_create,
                 wait_for_states=["ACTIVE"],
                 waiter_kwargs={
                     "max_interval_seconds": self.check_interval,
                     "max_wait_seconds": self.timeout
                 })
     except AirflowException as e:
         self.log.error(e.response["Error"]["Message"])
Exemplo n.º 12
0
 def execute(self, context):
     self._oci_hook = OCIDataFlowHook(compartment_ocid=self.compartment_id,
                                      oci_conn_id=self.oci_conn_id,
                                      display_name=self.display_name)
     client = self._oci_hook.get_client(oci.data_flow.DataFlowClient)
     self.log.info("Validating OCI Config")
     self._oci_hook.validate_config()
     if not self.timeout:
         self.timeout = float('inf')
     if not self.check_interval:
         self.check_interval = 30
     if not self.executor_shape:
         self.executor_shape = 'VM.Standard2.1'
     if not self.num_executors:
         self.num_executors = 1
     if not self.driver_shape:
         self.driver_shape = self.executor_shape
     if not self.warehouse_bucket_uri:
         self.namespace = OCIObjectStorageHook(
             compartment_id=self.compartment_id,
             oci_conn_id=self.oci_conn_id,
             bucket_name=self.bucket_name).get_namespace()
         self.warehouse_bucket_uri = "oci://" + str(
             self.bucket_name) + "@" + str(self.namespace) + "/"
     if not self.application_id:
         self.application_id = OCIDataFlowHook(
             compartment_ocid=self.compartment_id,
             oci_conn_id=self.oci_conn_id,
             display_name=self.display_name).get_application_ocid()
     run_details = {
         "application_id": self.application_id,
         "compartment_id": self.compartment_id,
         "display_name": self.display_name,
         "executor_shape": self.executor_shape,
         "num_executors": self.num_executors,
         "driver_shape": self.driver_shape,
         "warehouse_bucket_uri": self.warehouse_bucket_uri
     }
     dataflow_run = oci.data_flow.models.CreateRunDetails(
         application_id=run_details["application_id"],
         compartment_id=run_details["compartment_id"],
         display_name=run_details["display_name"],
         executor_shape=run_details["executor_shape"],
         num_executors=run_details["num_executors"],
         driver_shape=run_details["driver_shape"],
         warehouse_bucket_uri=run_details["warehouse_bucket_uri"])
     try:
         appcheck = self._oci_hook.check_for_application_by_name()
         if appcheck is True:
             print("Submitting Data Flow Run")
             submit_run = DataFlowClientCompositeOperations(client)
             submit_run.create_run_and_wait_for_state(
                 create_run_details=dataflow_run,
                 wait_for_states=["CANCELED", "SUCCEEDED", "FAILED"],
                 waiter_kwargs={
                     "max_interval_seconds": self.check_interval,
                     "max_wait_seconds": self.timeout
                 })
         else:
             self.log.error("Application {0} does not exist".format(
                 self.display_name))
     except AirflowException as e:
         self.log.error(e.response["Error"]["Message"])
Exemplo n.º 13
0
class CopyToOCIObjectStorageOperator(BaseOperator):
    """
    Copy data to OCI object store

    :param bucket_name: Name of target bucket
    :type bucket_name: str
    :param compartment_ocid: Compartment ID
    :type compartment_id: str
    :param object_name: Object name to create in object store
    :type object_name: str
    :param put_object_body: Contents of object_name
    :type put_object_body: stream
    :param oci_conn_id: Airflow connection ID
    :type oci_conn_id: str
    """
    @apply_defaults
    def __init__(self,
                 bucket_name: str,
                 compartment_ocid: str,
                 object_name: str,
                 put_object_body: str,
                 oci_conn_id: Optional[str] = "oci_default",
                 *args,
                 **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.bucket_name = bucket_name
        self.compartment_id = compartment_ocid
        self.object_name = object_name
        self.put_object_body = put_object_body
        self.oci_conn_id = oci_conn_id
        self._oci_hook = None
        self.oci_client = oci.object_storage.ObjectStorageClient

    def execute(self, context, **kwargs):
        self._oci_hook = OCIObjectStorageHook(
            compartment_id=self.compartment_id,
            bucket_name=self.bucket_name,
            oci_conn_id=self.oci_conn_id)
        client = self._oci_hook.get_client(self.oci_client)
        self.log.info("Validating OCI Config")
        self._oci_hook.validate_config()
        namespace = self._oci_hook.get_namespace()
        details = oci.object_storage.models.CreateBucketDetails(
            compartment_id=self.compartment_id, name=self.bucket_name)
        self.log.info("Checking if Bucket {} exists".format(self.bucket_name))
        bucket_exists = self._oci_hook.check_for_bucket(
            namespace_name=namespace, bucket_name=self.bucket_name)
        if bucket_exists is True:
            self.log.info("Bucket {0} exists, skipping creation".format(
                self.bucket_name))
        else:
            self.log.info("Creating Bucket {0} in {1}".format(
                self.bucket_name, namespace))
            client.create_bucket(namespace_name=namespace,
                                 create_bucket_details=details)
            self.log.info("Create bucket complete")
        self.log.info("Checking if {0} exists in {1}".format(
            self.object_name, self.bucket_name))
        object_exists = self._oci_hook.check_for_object(
            namespace_name=namespace,
            bucket_name=self.bucket_name,
            object_name=self.object_name)
        if object_exists is True:
            self.log.info("Object {0} exists already in {1}".format(
                self.object_name, self.bucket_name))
        else:
            self.log.info("Copying {0} to {1}".format(self.object_name,
                                                      self.bucket_name))
            self._oci_hook.copy_to_bucket(bucket_name=self.bucket_name,
                                          namespace_name=namespace,
                                          object_name=self.object_name,
                                          put_object_body=self.put_object_body,
                                          **kwargs)