Python DataFactoryManagementClient示例，azure.mgmt.datafactory.DataFactoryManagementClient Python示例

示例#1

0

显示文件

    def __init__(self, subscription_id, rg_name, df_name, client_id, secret,
                 tenant, mapping, sap_source, ir_reference, sap_con_string,
                 sql_con_string):
        self.subscription_id = subscription_id
        self.rg_name = rg_name
        self.df_name = df_name
        self.client_id = client_id
        self.secret = secret
        self.tenant = tenant
        self.mapping = mapping
        self.schema, self.source = sap_source.split('.')
        self.rg_params = {'location': 'centralus'}
        self.df_params = {'location': 'centralus'}
        self.ds_name = 'RelationalTable1'
        self.dsOut_name = 'sap_hana_db'
        self.ir_reference = ir_reference = 'integrationRuntime3'

        # '{"host":"example.com","port":30015,"user":"******","password":"******"}'
        self.sap_con = json.loads(sap_con_string)
        self.sap_host = self.sap_con['host']
        self.sap_username = self.sap_con['user']
        self.sep_password = self.sap_con['password']
        self.sap_port = int(self.sap_con['port'])

        #'Server=tcp:<<fqdbservername>>;Database=<<dbname>>;Uid=<<username>>@<<dbservername>>;Pwd=<<password>>;Encrypt=yes;Connection Timeout=30;'
        self.sql_con = sql_con_string

        credentials = ServicePrincipalCredentials(client_id=self.client_id,
                                                  secret=self.secret,
                                                  tenant=self.tenant)
        resource_client = ResourceManagementClient(credentials,
                                                   subscription_id)
        self.adf_client = DataFactoryManagementClient(credentials,
                                                      subscription_id)

示例#2

0

显示文件

文件： ProdRedacted_UpdateLinkedServiceAzureLibraries.py 项目： georgesturrock/AzureProjects

def authToken(cId, cSecret, ten):
    try:
        credentials = ServicePrincipalCredentials(client_id=cId, secret=cSecret, tenant=ten)
        adf_client = DataFactoryManagementClient(credentials, subID)
    except Exception as e:
        print('Auth Token error: ', e)
    return credentials.token['access_token']

示例#3

0

显示文件

文件： datafactory.py 项目： jspreddy/dbx

    def __init__(
        self,
        specs_file: str,
        subscription_name: str,
        resource_group: str,
        factory_name: str,
        name: str,
        environment: str,
    ):
        self.resource_group = resource_group
        self.factory_name = factory_name
        self.name = name
        self.environment = environment
        self.credential = DefaultAzureCredential(
            exclude_visual_studio_code_credential=True)

        self.sub_client = SubscriptionClient(self.credential)
        self.subscription_id = self._get_subscription_id(subscription_name)
        self.adf_client = DataFactoryManagementClient(
            self.credential, subscription_id=self.subscription_id)

        self._specs = self._read_specs(specs_file, environment)
        self._config = self._get_config()

        self._verify_adf_setup()

示例#4

0

显示文件

    def get_conn(self):
        """
        :return:
        """
        if self._adf_client:
            return self._adf_client

        key_path = os.environ.get('AZURE_AUTH_LOCATION', False)

        if not key_path:
            conn = self.get_connection(self.conn_id)
            key_path = conn.extra_dejson.get('key_path', False)

        if key_path:
            self.log.info('Getting connection using a JSON key file.')
            self._adf_client = get_client_from_auth_file(
                DataFactoryManagementClient, key_path)
            return self._adf_client

        self.log.info('Getting connection using a service principal.')
        credentials = ServicePrincipalCredentials(
            client_id=conn.login,
            secret=conn.password,
            tenant=conn.extra_dejson['tenantId'])

        self._adf_client = DataFactoryManagementClient(
            credentials, conn.extra_dejson['subscriptionId'])
        return self._adf_client

示例#5

0

显示文件

文件： pytest_adf.py 项目： vstrien/pytest-adf

def adf_client(adf_config):
    """Creates an DataFactoryManagementClient object"""
    credentials = ServicePrincipalCredentials(
        client_id=adf_config["AZ_SERVICE_PRINCIPAL_ID"],
        secret=adf_config["AZ_SERVICE_PRINCIPAL_SECRET"],
        tenant=adf_config["AZ_SERVICE_PRINCIPAL_TENANT_ID"])
    return DataFactoryManagementClient(credentials,
                                       adf_config["AZ_SUBSCRIPTION_ID"])

示例#6

0

显示文件

文件： pytest_adf.py 项目： qzhou-hmcts/pytest-adf

def adf_client(adf_config):
    """Creates an DataFactoryManagementClient object"""
    if adf_config["AZ_SERVICE_PRINCIPAL_ID"] is None:
        credentials = AzureCliCredential()
    else:
        credentials = ClientSecretCredential(client_id=adf_config["AZ_SERVICE_PRINCIPAL_ID"],
                                         client_secret=adf_config["AZ_SERVICE_PRINCIPAL_SECRET"],
                                         tenant_id=adf_config["AZ_SERVICE_PRINCIPAL_TENANT_ID"])
    return DataFactoryManagementClient(credentials, adf_config["AZ_SUBSCRIPTION_ID"])

示例#7

0

显示文件

 def __init__(self):
     self.schema = '_SYS_BIC'
     self.source = 'Temp/VBAP_DEMO'
     self.source_type = 'VIEW'
     self.sap_con = '{"host" : "40.87.84.72","port" : 30215,"user" : "system","password" : "Metro#123"}'
     self.sql_con = 'Server=tcp:yashtesting.database.windows.net;Database=test;Uid=yash@yashtesting;Pwd=Myageis@20;Encrypt=yes;Connection Timeout=30;'
     self.sql_odbc_con = 'Driver={ODBC Driver 17 for SQL Server};' + self.sql_con
     self.subscription_id = '938ace66-9598-4029-b6bb-429929b03761'
     self.rg_name = 'celebal_rnd'
     self.df_name = 'celebaladf'
     self.client_id = 'b628371b-654f-4848-b214-c8553f2fc665'
     self.secret = '/JCA4now2LAn1/L4aa+ICfmTumPRryW.'
     self.tenant = 'e4e34038-ea1f-4882-b6e8-ccd776459ca0'
     self.rg_params = {'location': 'eastus'}
     self.df_params = {'location': 'eastus'}
     self.credentials = ServicePrincipalCredentials(
         client_id=self.client_id, secret=self.secret, tenant=self.tenant)
     self.resource_client = ResourceManagementClient(
         self.credentials, self.subscription_id)
     self.adf_client = DataFactoryManagementClient(self.credentials,
                                                   self.subscription_id)
     self.blob_dataset = 'AzureBlob1'
     self.input_dataset = 'RelationalTable2'
     self.output_dataset = 'AzureSqlTable3'
     self.staging_ls = 'LS_Sap_Hana'
     self.staging_path = 'testazure'
     self.creation_date = "ERDAT"
     self.change_date = "AEDAT"
     self.timestamp_staging_table = '[dbo].[Sap_hana_db_timestamp_staging]'
     self.timestamp_table = '[dbo].[Sap_hana_db_timestamp]'
     self.mapping = {
         "VBELN": "SalesDocument",
         "POSNR": "SalesDocumentItem",
         "MATNR": "Material",
         "MATKL": "MaterialGroup",
         "PSTYV": "SalesDocumentItemCat",
         "FKREL": "RelevantforBilling",
         "NETWR": "NetValue",
         "WAERK": "Currency",
         "KWMENG": "OrderQuantity",
         "LSMENG": "RequiredDelQuantity",
         "KBMENG": "ConfirmedDelQuantiy",
         "WERKS": "Plant",
         "PRCTR": "ProfitCenter",
         "ABSTA": "RejectionStatus",
         "GBSTA": "OverallStatus",
         "LFSTA": "DeliveryStatus",
         "ERDAT": "CreatedDate",
         "WAVWR": "Cost",
         "AEDAT": "UpdateDate"
     }
     self.translator = self.make_translator()

示例#8

0

显示文件

def get_adfclient():
    subscription_id = 'a9645a3e-7a1d-4f88-9704-6e9f2e7b5d90'
    # Specify your Active Directory client ID, client secret, and tenant ID
    #appid: a03a061c-865b-40ad-b10b-b51996e1dc34
    #key: RYi1miuBnif/r73Xr+7AiD68sq/5PUE+azTp1N1uS00=
    #directory id: 5c2f5846-dd75-4228-be41-51a280645298
    credentials = ServicePrincipalCredentials(
        client_id='a03a061c-865b-40ad-b10b-b51996e1dc34',
        secret='RYi1miuBnif/r73Xr+7AiD68sq/5PUE+azTp1N1uS00=',
        tenant='5c2f5846-dd75-4228-be41-51a280645298')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)
    return (adf_client)

示例#9

0

显示文件

文件： create_datafactory.py 项目： ajeetpandeyy/azure-adf-py-demo

def createDataFactory(credentials):

    adf_client = DataFactoryManagementClient(credentials, SUBSCRIPTION_ID)

    # Create a data factory
    df_resource = Factory(location=DEPLOYMENT_REGION)
    df = adf_client.factories.create_or_update(RESOURCE_GROUP,
                                               DATA_FACTORY_NAME, df_resource)

    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(RESOURCE_GROUP, DATA_FACTORY_NAME)
        time.sleep(1)

    print("Created Data Factory")

示例#10

0

显示文件

    def get_conn(self) -> DataFactoryManagementClient:
        if self._conn is not None:
            return self._conn

        conn = self.get_connection(self.conn_id)

        self._conn = DataFactoryManagementClient(
            credential=ClientSecretCredential(
                client_id=conn.login,
                client_secret=conn.password,
                tenant_id=conn.extra_dejson.get("tenantId")),
            subscription_id=conn.extra_dejson.get("subscriptionId"),
        )

        return self._conn

示例#11

0

显示文件

文件： data_factory.py 项目： potatochip/airflow

    def get_conn(self) -> DataFactoryManagementClient:
        if self._conn is not None:
            return self._conn

        conn = self.get_connection(self.conn_id)
        tenant = conn.extra_dejson.get('extra__azure_data_factory__tenantId')
        subscription_id = conn.extra_dejson.get('extra__azure_data_factory__subscriptionId')

        self._conn = DataFactoryManagementClient(
            credential=ClientSecretCredential(
                client_id=conn.login, client_secret=conn.password, tenant_id=tenant
            ),
            subscription_id=subscription_id,
        )

        return self._conn

示例#12

0

显示文件

文件： ADFPythonSDK.py 项目： gazmack86/PacGen

def main():

    # Azure subscription ID
    subscription_id = '<Specify your Azure Subscription ID>'

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = 'ADFTutorialResourceGroup'

    # The data factory name. It must be globally unique.
    df_name = '<Specify a name for the data factory. It must be globally unique>'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(client_id='<Active Directory application/client ID>', secret='<client secret>', tenant='<Active Directory tenant ID>')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location':'eastus'}
    df_params = {'location':'eastus'}

示例#13

0

显示文件

文件： webjob.py 项目： binduchinnasamy/asepoc

def monitorAdf():
    try:
        # Get subscription and service principle data from config file
        subscription_id = configMap['connections']['subscription_id']
        rg_name = configMap['connections']['adf']['rg_name']
        df_name = configMap['connections']['adf']['df_name']
        df_pipeline_name = configMap['connections']['adf']['pipeline_name']
        ad_client_id = configMap['connections']['service_principal'][
            'ad_clientid']
        ad_client_secret = configMap['connections']['service_principal'][
            'ad_client_secret']
        ad_tenantid = configMap['connections']['service_principal'][
            'ad_tenantid']
        #Make credential object
        credentials = ServicePrincipalCredentials(client_id=ad_client_id,
                                                  secret=ad_client_secret,
                                                  tenant=ad_tenantid)
        adf_client = DataFactoryManagementClient(credentials, subscription_id)
        print('adf access success!')
        # Create a pipeline run
        run_response = adf_client.pipelines.create_run(rg_name,
                                                       df_name,
                                                       df_pipeline_name,
                                                       parameters={})
        # Monitor the pipeline run
        time.sleep(30)
        pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                    run_response.run_id)
        print("\n\tPipeline run status: {}".format(pipeline_run.status))
        filter_params = RunFilterParameters(
            last_updated_after=datetime.now() - timedelta(1),
            last_updated_before=datetime.now() + timedelta(1))
        query_response = adf_client.activity_runs.query_by_pipeline_run(
            rg_name, df_name, pipeline_run.run_id, filter_params)
        activity_output = query_response.value[0].output
        # Upload the activity output to Blob storage
        createBlob(activity_output)
    except Exception as e:
        print('Error occurred while accessing adf', e)

示例#14

0

显示文件

def create_datafactory_and_run(files_and_tokens: Dict[str, str],
                               connection_string: str,
                               location: str,
                               is_unittest: bool = False) -> None:
    """
    Builds an Azure Data Factory to download the FastMRI dataset from AWS, and places them in Azure Blob Storage.
    :param location: The Azure location in which the Data Factory should be created (for example, "westeurope")
    :param files_and_tokens: A mapping from file name (like knee.tar.gz) to AWS access token.
    :param is_unittest: If True, download a small tar.gz file from github. If False, download the "real" fastMRI
    datafiles from AWS.
    :param connection_string: The connection string of the Azure storage where the downloaded data should be stored.
    """

    azure_config = AzureConfig.from_yaml(
        yaml_file_path=fixed_paths.SETTINGS_YAML_FILE,
        project_root=fixed_paths.repository_root_directory())

    # The data factory name. It must be globally unique.
    data_factory_name = "fastmri-copy-data-" + uuid.uuid4().hex[:8]

    # Get either the Service Principal authentication, if those are set already, or use interactive auth in the browser
    azureid_auth = get_azure_auth(azure_config)

    # Create a data factory
    adf_client = DataFactoryManagementClient(azureid_auth,
                                             azure_config.subscription_id)
    df_resource = Factory(location=location)
    print(f"Creating data factory {data_factory_name}")
    df = adf_client.factories.create_or_update(azure_config.resource_group,
                                               data_factory_name, df_resource)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(azure_config.resource_group,
                                      data_factory_name)
        time.sleep(1)
    print("Data factory created")

    # Create a linked service pointing to where the downloads come from
    if is_unittest:
        http_service = LinkedServiceResource(properties=HttpLinkedService(
            url="https://github.com",
            enable_server_certificate_validation=True,
            authentication_type="Anonymous"))
    else:
        http_service = LinkedServiceResource(properties=HttpLinkedService(
            url="https://fastmri-dataset.s3.amazonaws.com/",
            enable_server_certificate_validation=True,
            authentication_type="Anonymous"))
    http_name = "AwsHttp"
    adf_client.linked_services.create_or_update(
        resource_group_name=azure_config.resource_group,
        factory_name=data_factory_name,
        linked_service_name=http_name,
        linked_service=http_service)
    # Create a linked service that represents the sink (Azure blob storage)
    blob_storage_name = "AzureBlob"
    blob_storage = AzureBlobStorageLinkedService(
        connection_string=SecureString(value=connection_string))
    blob_storage_service = LinkedServiceResource(properties=blob_storage)
    adf_client.linked_services.create_or_update(
        resource_group_name=azure_config.resource_group,
        factory_name=data_factory_name,
        linked_service_name=blob_storage_name,
        linked_service=blob_storage_service)

    linked_blob_storage = LinkedServiceReference(
        reference_name=blob_storage_name)
    linked_http = LinkedServiceReference(reference_name=http_name)

    def download_and_uncompress(source_file_or_tuple: Union[str, Tuple[str,
                                                                       str]],
                                target_folder: str) -> List[str]:
        """
        Downloads a file from AWS and stores them in blob storage in its compressed form.
        From the compressed file in blob storage, it is then uncompressed, and written to a new folder in blob storage.
        For example, if 'target_folder' is 'foo', the uncompressed file will be written to folder 'foo', and the
        compressed raw data will be written to 'foo_compressed'.
        :param source_file_or_tuple: The name of the .tar.gz or .tar file to download, without any access tokens.
        If the name is a Tuple[str, str], the second tuple element is the "real" extension, for files where the
        extension is misleading.
        :param target_folder: The folder prefix in the target storage account.
        :return: A list of pipelines that this method created.
        """
        if isinstance(source_file_or_tuple, str):
            source_file = source_file_or_tuple
            file_extension = "".join(Path(source_file).suffixes)
            correct_extension = file_extension
        elif isinstance(source_file_or_tuple, tuple):
            source_file, correct_extension = source_file_or_tuple
            file_extension = "".join(Path(source_file).suffixes)
        else:
            raise ValueError(
                f"Type of source_file_or_tuple not recognized: {type(source_file_or_tuple)}"
            )
        source_file_with_correct_extension = source_file[:source_file.rfind(
            file_extension)] + correct_extension
        target_folder_compressed = target_folder + COMPRESSED_DATASET_SUFFIX
        if is_unittest:
            http_source = HttpServerLocation(
                relative_url="gulpjs/gulp/archive/v3.9.1.tar.gz")
        else:
            http_source = HttpServerLocation(
                relative_url=f"{source_file}{files_and_tokens[source_file]}")
        source_file_cleaned = source_file.replace(".", "_")
        # A dataset that reads the files from AWS as-is, no decompression
        source_compressed = BinaryDataset(linked_service_name=linked_http,
                                          location=http_source)
        source_compressed_name = f"{source_file_cleaned} on AWS"
        adf_client.datasets.create_or_update(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            dataset_name=source_compressed_name,
            dataset=DatasetResource(properties=source_compressed))
        # The sink for downloading the datasets as-is (compressed)
        blob_storage_compressed = AzureBlobStorageLocation(
            file_name=source_file_with_correct_extension,
            container=TARGET_CONTAINER,
            folder_path=target_folder_compressed)
        dest_compressed = BinaryDataset(
            linked_service_name=linked_blob_storage,
            location=blob_storage_compressed)
        dest_compressed_name = f"{source_file_cleaned} on Azure"
        adf_client.datasets.create_or_update(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            dataset_name=dest_compressed_name,
            dataset=DatasetResource(properties=dest_compressed))
        # A dataset that reads the files from blob storage and uncompresses on-the-fly
        if correct_extension == ".tar.gz":
            compression = DatasetTarGZipCompression()
            # By default, a folder gets created for each .tar.gzip file that is read. Disable that.
            compression_properties = TarGZipReadSettings(
                preserve_compression_file_name_as_folder=False)
        elif correct_extension == ".tar":
            compression = DatasetTarCompression()
            # By default, a folder gets created for each .tar file that is read. Disable that.
            compression_properties = TarReadSettings(
                preserve_compression_file_name_as_folder=False)
        else:
            raise ValueError(
                f"Unable to determine compression for file {source_file}")
        source_uncompressed = BinaryDataset(
            linked_service_name=linked_blob_storage,
            location=blob_storage_compressed,
            compression=compression)
        source_uncompressed_name = f"read {source_file_cleaned} and uncompress"
        adf_client.datasets.create_or_update(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            dataset_name=source_uncompressed_name,
            dataset=DatasetResource(properties=source_uncompressed))
        # The sink for downloading the datasets uncompressed
        final_dataset = BinaryDataset(linked_service_name=linked_blob_storage,
                                      location=AzureBlobStorageLocation(
                                          container=TARGET_CONTAINER,
                                          folder_path=target_folder))
        final_name = f"save {source_file_cleaned} uncompressed"
        adf_client.datasets.create_or_update(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            dataset_name=final_name,
            dataset=DatasetResource(properties=final_dataset))
        # Copying from compressed source to compressed destination on blob storage
        download = CopyActivity(
            name=f"download {source_file_cleaned}",
            inputs=[DatasetReference(reference_name=source_compressed_name)],
            outputs=[DatasetReference(reference_name=dest_compressed_name)],
            source=HttpSource(),
            sink=BlobSink())
        # Read the compressed file from blob storage and create an uncompressed dataset.
        # This should not create extra folder structure beyond what is already in the tar file - this is specified
        # in compression_properties
        binary_source = BinarySource(format_settings=BinaryReadSettings(
            compression_properties=compression_properties))
        uncompress = CopyActivity(
            name=f"uncompress {source_file_cleaned}",
            inputs=[DatasetReference(reference_name=source_uncompressed_name)],
            outputs=[DatasetReference(reference_name=final_name)],
            source=binary_source,
            sink=BlobSink(),
            # Add a dependent activity: We first need to download
            depends_on=[
                ActivityDependency(activity=download.name,
                                   dependency_conditions=["Succeeded"])
            ])
        # Create a pipeline that first downloads from AWS to blob storage, and then decompresses from blob storage
        # to another blob storage location
        pipeline = f"{source_file_cleaned} to folder {target_folder}"
        adf_client.pipelines.create_or_update(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            pipeline_name=pipeline,
            pipeline=PipelineResource(activities=[download, uncompress]))
        return [pipeline]

    file_list: FolderAndFileList = \
        [("antonsctest", ["foo.tar.gz", "bar.tar"])] if is_unittest else files_to_download
    all_pipelines = []
    print("Creating pipelines:")
    for target_folder, files in file_list:
        for file in files:
            pipelines = download_and_uncompress(file,
                                                target_folder=target_folder)
            for p in pipelines:
                print(f"Created pipeline {p}")
            all_pipelines.extend(pipelines)

    print("Starting all pipelines")
    run_ids_per_pipeline = {}
    for pipeline in all_pipelines:
        run_result = adf_client.pipelines.create_run(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            pipeline_name=pipeline)
        print(f"Started pipeline: {pipeline}")
        run_ids_per_pipeline[run_result.run_id] = pipeline

    print("Waiting for pipelines to complete")
    status_per_run = {
        run_id: "running"
        for run_id in run_ids_per_pipeline.keys()
    }
    while True:
        for run_id in run_ids_per_pipeline.keys():
            if status_per_run[run_id]:
                pipeline_run = adf_client.pipeline_runs.get(
                    resource_group_name=azure_config.resource_group,
                    factory_name=data_factory_name,
                    run_id=run_id)
                status = pipeline_run.status
                if status == "Succeeded" or status == "Failed":
                    print(
                        f"Pipeline '{run_ids_per_pipeline[run_id]}' completed with status {status}"
                    )
                    status_per_run[run_id] = ""
                else:
                    status_per_run[run_id] = status
        remaining_runs = len([v for v in status_per_run.values() if v])
        print(f"Remaining pipelines that are running: {remaining_runs}")
        if remaining_runs == 0:
            break
        time.sleep(30)

    utcnow = datetime.now(timezone.utc)
    filter_params = RunFilterParameters(
        last_updated_after=utcnow - timedelta(days=1),
        last_updated_before=utcnow + timedelta(days=1))
    for run_id, pipeline in run_ids_per_pipeline.items():
        query_response = adf_client.activity_runs.query_by_pipeline_run(
            resource_group_name=azure_config.resource_group,
            factory_name=data_factory_name,
            run_id=run_id,
            filter_parameters=filter_params)
        run_status = query_response.value[0]
        print(f"Status for pipeline {pipeline}: {run_status.status}")
        if run_status.status == 'Succeeded':
            print(f"\tNumber of bytes read: {run_status.output['dataRead']}")
            print(
                f"\tNumber of bytes written: {run_status.output['dataWritten']}"
            )
            print(f"\tCopy duration: {run_status.output['copyDuration']}")
        else:
            print(f"\tErrors: {run_status.error['message']}")

    print("All pipelines completed. Deleting data factory.")
    adf_client.factories.delete(azure_config.resource_group, data_factory_name)

示例#15

0

显示文件

    def main(self):

        try:
            # Create the BlockBlockService that is used to call the Blob service for the storage account
            block_blob_service = BlockBlobService(account_name=accountname,
                                                  account_key=accountkey)

            # Create a container called 'quickstartblobs'.
            container_name = blob_path
            block_blob_service.create_container(container_name)

            # Set the permission so the blobs are public.
            block_blob_service.set_container_acl(
                container_name, public_access=PublicAccess.Container)

            # choose file from system
            path = filepath
            print(container_name)
            head, tail = os.path.split(path)
            print(tail)

            # Upload the created file, use local_file_name for the blob name
            block_blob_service.create_blob_from_path(container_name, tail,
                                                     path)

            # List the blobs in the container
            print("\nList blobs in the container")
            generator = block_blob_service.list_blobs(container_name)
            for blob in generator:
                print("\nBlob name: " + blob.name)

        except Exception as e:
            print(e)

        credentials = ServicePrincipalCredentials(client_id=client_id,
                                                  secret=secret,
                                                  tenant=tenant)

        adf_client = DataFactoryManagementClient(credentials, subscription_id)

        # Create a data factory
        df_resource = Factory(location='eastus')
        df = adf_client.factories.create_or_update(rg_name, df_name,
                                                   df_resource)
        BlobToBlob.print_item(df)
        while df.provisioning_state != 'Succeeded':
            df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

        # Create an Azure Storage linked service
        storage_string = SecureString(storage_account_details)
        ls_azure_storage = AzureStorageLinkedService(
            connection_string=storage_string)

        ls = adf_client.linked_services.create_or_update(
            rg_name, df_name, ls_name, ls_azure_storage)
        BlobToBlob.print_item(ls)

        ds_ls = LinkedServiceReference(ls_name)

        ds_azure_blob = AzureBlobDataset(ds_ls,
                                         folder_path=blob_path,
                                         file_name=tail)
        ds = adf_client.datasets.create_or_update(rg_name, df_name, dsIn_name,
                                                  ds_azure_blob)
        BlobToBlob.print_item(ds)

        dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
        dsOut = adf_client.datasets.create_or_update(rg_name, df_name,
                                                     dsOut_name,
                                                     dsOut_azure_blob)
        BlobToBlob.print_item(dsOut)

        # Create a copy activity
        blob_source = BlobSource()
        blob_sink = BlobSink()
        dsin_ref = DatasetReference(dsIn_name)
        dsOut_ref = DatasetReference(dsOut_name)

        p = ActivityPolicy()
        p.timeout = '3.00:00:00'
        p.retry = 2
        p.retry_interval_in_seconds = 50

        copy_activity = CopyActivity(name=act_name,
                                     description=act_description,
                                     enable_staging='false',
                                     enable_skip_incompatible_row='false',
                                     inputs=[dsin_ref],
                                     outputs=[dsOut_ref],
                                     source=blob_source,
                                     sink=blob_sink,
                                     policy=p)

        params_for_pipeline = {}
        p_obj = PipelineResource(activities=[copy_activity],
                                 parameters=params_for_pipeline)
        p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name,
                                                  p_obj)

        BlobToBlob.print_item(p)

        # Create a pipeline run
        run_response = adf_client.pipelines.create_run(rg_name, df_name,
                                                       p_name, {})
        # Monitor the pipeilne run
        time.sleep(20)

        pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                    run_response.run_id)
        print("\nPineLine Id:{}".format(pipeline_run.run_id))

        print("\nPipeline run status: {}".format(pipeline_run.status))

        activity_runs_paged = list(
            adf_client.activity_runs.list_by_pipeline_run(
                rg_name, df_name, pipeline_run.run_id,
                datetime.now() - timedelta(1),
                datetime.now() + timedelta(1)))
        BlobToBlob.print_activity_run_details(activity_runs_paged[0])

示例#16

0

显示文件

文件： datafactory.py 项目： sanjay-suv/data-factory-copy-blob-to-blob-python

def main():

    # Azure subscription ID
    subscription_id = '<Azure subscription ID>'

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = '<Azure resource group name>'

    # The data factory name. It must be globally unique.
    df_name = '<Data factory name>'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(
        client_id='<AAD application ID>',
        secret='<AAD app authentication key>',
        tenant='<AAD tenant ID>')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'eastus'}
    df_params = {'location': 'eastus'}

    # create the resource group
    # comment out if the resource group already exits
    resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # Create a data factory
    df_resource = Factory(location='eastus')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

    # Create an Azure Storage linked service
    ls_name = 'storageLinkedService'

    # Specify the name and key of your Azure Storage account
    storage_string = SecureString(
        'DefaultEndpointsProtocol=https;AccountName=<Azure storage account>;AccountKey=<Azure storage authentication key>'
    )

    ls_azure_storage = AzureStorageLinkedService(
        connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name,
                                                     ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'ds_in'
    ds_ls = LinkedServiceReference(ls_name)
    blob_path = 'adftutorial/inputpy'
    blob_filename = 'input.txt'
    ds_azure_blob = AzureBlobDataset(ds_ls,
                                     folder_path=blob_path,
                                     file_name=blob_filename)
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name,
                                              ds_azure_blob)
    print_item(ds)

    # Create an Azure blob dataset (output)
    dsOut_name = 'ds_out'
    output_blobpath = 'adftutorial/outputpy'
    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name,
                                                 dsOut_azure_blob)
    print_item(dsOut)

    # Create a copy activity
    act_name = 'copyBlobtoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(ds_name)
    dsOut_ref = DatasetReference(dsOut_name)
    copy_activity = CopyActivity(act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=blob_source,
                                 sink=blob_sink)

    # Create a pipeline with the copy activity
    p_name = 'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    # Create a pipeline run
    run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name,
                                                   {})

    # Monitor the pipeilne run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    activity_runs_paged = list(
        adf_client.activity_runs.list_by_pipeline_run(
            rg_name, df_name, pipeline_run.run_id,
            datetime.now() - timedelta(1),
            datetime.now() + timedelta(1)))
    print_activity_run_details(activity_runs_paged[0])

    # Create a trigger
    tr_name = 'mytrigger'
    scheduler_recurrence = ScheduleTriggerRecurrence(frequency='Minute',
                                                     interval='15',
                                                     start_time=datetime.now(),
                                                     end_time=datetime.now() +
                                                     timedelta(1),
                                                     time_zone='UTC')
    pipeline_parameters = {
        'inputPath': 'adftutorial/inputpy',
        'outputPath': 'adftutorial/outputpy'
    }
    pipelines_to_run = []
    pipeline_reference = PipelineReference('copyPipeline')
    pipelines_to_run.append(
        TriggerPipelineReference(pipeline_reference, pipeline_parameters))
    tr_properties = ScheduleTrigger(description='My scheduler trigger',
                                    pipelines=pipelines_to_run,
                                    recurrence=scheduler_recurrence)
    adf_client.triggers.create_or_update(rg_name, df_name, tr_name,
                                         tr_properties)

    # start the trigger
    adf_client.triggers.start(rg_name, df_name, tr_name)

示例#17

0

显示文件

source          =   source[0]
target          =   target[0]

# This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
#rg_name = 'poc-westeurope-gp-data-rg'
rg_name = 'poc-westeurope-gp-data-rg'

# The data factory name. It must be globally unique.
#df_name = 'poc-westeurope-gp-data-df-atradius'        
df_name = 'poc-westeurope-gp-data-df'        


#credentials = ServicePrincipalCredentials(client_id=client_id, secret=secret, tenant=tenant)
credentials = get_azure_cli_credentials()
resource_client = ResourceManagementClient(credentials[0], credentials[1])
adf_client = DataFactoryManagementClient(credentials[0], credentials[1])

rg_params = {'location':'westeurope'}
df_params = {'location':'westeurope'}

# Create database linked service
ls_tgt_name = 'tgtazuresqldb'

# Create an Azure Storage linked service
ls_src_name = 'srcgrpblob'


# Parameter File with list of tables
with open('param_tables.json') as json_param_file:
    table_name = json.load(json_param_file)

示例#18

0

显示文件

文件： __init__.py 项目： GCBallesteros/Function-Automate

def main(req: func.HttpRequest) -> func.HttpResponse:
    target_table = "PipelinePauseData"
    token = utilities.get_param(req, "token")

    table_service = utilities.setup_table_service(
        os.environ["AzureWebJobsStorage"], target_table,
    )

    # Since we can't use authentication for the API we will check as
    # soon as possible if the token for the pipeline restart is valid.
    # if it is not we halt execution and return a 500 code.
    try:
        paused_pipeline = table_service.get_entity(
            table_name=target_table, partition_key="PauseData", row_key=token
        )
    except AzureMissingResourceHttpError as e:
        raise exceptions.HttpError(
            str(e),
            func.HttpResponse(str(e), status_code=500)
        )

    # acted_upon monitors if a token has already been used. We use it here to
    # block the second and further attempts at restarting.
    acted_upon = paused_pipeline["acted_upon"]

    has_expired = check_if_expired(
        paused_pipeline["Timestamp"], paused_pipeline["expiration_time"],
    )

    if not acted_upon and not has_expired:
        logging.info(token)

        # DefaultAzureCredential does not work when manipulating ADF. It will
        # complain about a missing session method.
        # Remember to give the contributor role to the application.
        # Azure Portal -> Subscriptions -> IAM roles
        credentials = ServicePrincipalCredentials(
            client_id=os.environ["AZURE_CLIENT_ID"],
            secret=os.environ["AZURE_CLIENT_SECRET"],
            tenant=os.environ["AZURE_TENANT_ID"],
        )

        subscription_id = os.environ["subscription_id"]
        adf_client = DataFactoryManagementClient(credentials, subscription_id)
        logging.info(adf_client)

        # The restart data is accessed via a lookup activity from within ADF
        run_response = restart_pipeline(
            adf_client=adf_client,
            resource_group=paused_pipeline["resource_group"],
            factory_name=paused_pipeline["factory_name"],
            pipeline_name=paused_pipeline["pipeline_name"],
            token=token,
        )
        logging.info(run_response)

        # After running acted_upon is set to 1
        paused_pipeline["acted_upon"] = 1
        table_service.update_entity(target_table, paused_pipeline)

        # Retrieve and display success webpage.
        confirmation_site = (
            ShareFileClient.from_connection_string(
                conn_str=os.environ["AzureWebJobsStorage"],
                share_name=paused_pipeline["share_name"],
                file_path=paused_pipeline["web_path"],
            )
            .download_file()
            .readall()
            .decode("utf-8")
        )

        return func.HttpResponse(confirmation_site, mimetype="text/html")

    else:  # already acted_upon or expired
        return func.HttpResponse("Invalid token.", status_code=500,)

示例#19

0

显示文件

文件： cosmosbackuptoblob.py 项目： sparepu/azuredfcosmos-mongo

def datafactory(sourceconnectionstring, sinkconnectionstring):

    # Azure subscription ID
    subscription_id = options.subscription

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = options.resourcegroup

    # The data factory name. It must be globally unique.
    df_name = options.datafactory

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(client_id=options.clientid,
                                              secret=options.clientsecret,
                                              tenant=options.tenantid)
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'westeurope'}
    df_params = {'location': 'westeurope'}

    #Create a data factory
    df_resource = Factory(location='westeurope')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

    if options.integrationruntime is not None:
        integrationruntime = IntegrationRuntimeReference(
            reference_name=options.integrationruntime, parameters=None)

    source_ls_name = 'sourceLinkedService'
    if options.integrationruntime is not None:
        source_ls_azure_cosmos = CosmosDbMongoDbApiLinkedService(
            connection_string=sourceconnectionstring,
            database=options.sourcedatabasename,
            connect_via=integrationruntime)
    else:
        source_ls_azure_cosmos = CosmosDbMongoDbApiLinkedService(
            connection_string=sourceconnectionstring,
            database=options.sourcedatabasename)
    source_ls = adf_client.linked_services.create_or_update(
        rg_name, df_name, source_ls_name, source_ls_azure_cosmos)
    print_item(source_ls)

    # Create an Azure blob dataset (input)
    source_ds_name = 'sourceDS'
    source_ds_ls = LinkedServiceReference(reference_name=source_ls_name)
    collection = options.sourcecollectionname
    sourcedataset = CosmosDbMongoDbApiCollectionDataset(
        linked_service_name=source_ds_ls, collection=collection, schema=None)
    source_ds = adf_client.datasets.create_or_update(rg_name, df_name,
                                                     source_ds_name,
                                                     sourcedataset)
    print_item(source_ds)

    sink_ls_name = 'sinkLinkedService'
    if options.integrationruntime is not None:
        sink_ls_azure_blob = AzureStorageLinkedService(
            connection_string=sinkconnectionstring,
            connect_via=integrationruntime)
    else:
        sink_ls_azure_blob = CosmosDbMongoDbApiLinkedService(
            connection_string=sinkconnectionstring)
    sink_ls = adf_client.linked_services.create_or_update(
        rg_name, df_name, sink_ls_name, sink_ls_azure_blob)
    print_item(sink_ls)

    sink_ds_name = 'sinkDS'
    utc_datetime = datetime.utcnow()
    blob_filename = utc_datetime.strftime("%Y%m%d-%H%M%SZ") + '.json.gz'
    sink_ds_ls = LinkedServiceReference(reference_name=sink_ls_name)
    location = DatasetLocation(type='AzureBlobStorageLocation',
                               folder_path=options.sinkcontainername,
                               file_name=blob_filename)
    compression = DatasetGZipCompression(level='Optimal')
    sinkdataset = JsonDataset(linked_service_name=sink_ds_ls,
                              compression=compression,
                              location=location)
    sink_ds = adf_client.datasets.create_or_update(rg_name, df_name,
                                                   sink_ds_name, sinkdataset)
    print_item(sink_ds)

    if options.incremental is not None:
        if options.incremental == "yes":
            # Create a copy activity
            currentime = datetime.now()
            currentobjectid = 'ObjectId' + '(' + '"' + format(
                int(time.mktime(currentime.timetuple())),
                'x') + "0000000000000000" + '"' + ')'
            previoustime = (datetime.now() - timedelta(minutes=240))
            previousobjectid = 'ObjectId' + '(' + '"' + format(
                int(time.mktime(previoustime.timetuple())),
                'x') + "0000000000000000" + '"' + ')'
            filter = '{_id: {$gte:' + previousobjectid + ',$lt:' + currentobjectid + '}}'

    act_name = 'copyCosmostoblob'
    if options.incremental is not None:
        if options.incremental == "yes":
            cosmos_source = CosmosDbMongoDbApiSource(filter=filter)
        else:
            print("Set incremental to yes for this script to work properly")
            sys.exit(1)
    else:
        cosmos_source = CosmosDbMongoDbApiSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(reference_name=source_ds_name)
    dsOut_ref = DatasetReference(reference_name=sink_ds_name)
    copy_activity = CopyActivity(name=act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=cosmos_source,
                                 sink=blob_sink)

    #Create a pipeline with the copy activity
    p_name = 'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    run_response = adf_client.pipelines.create_run(rg_name,
                                                   df_name,
                                                   p_name,
                                                   parameters={})

    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    filter_params = RunFilterParameters(
        last_updated_after=datetime.now() - timedelta(1),
        last_updated_before=datetime.now() + timedelta(1))
    query_response = adf_client.activity_runs.query_by_pipeline_run(
        rg_name, df_name, pipeline_run.run_id, filter_params)
    print_activity_run_details(query_response.value[0])

示例#20

0

显示文件

文件： adf.py 项目： wjohnson/AzureDataFactory

_SUBSCRIPTION_ID = os.getenv("SUBSCRIPTION_ID")
_CLIENT_ID = os.environ.get("CLIENT_ID")
_CLIENT_KEY = os.environ.get("CLIENT_KEY")
_RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP")
_DATA_FACTORY_NAME = os.environ.get("DATA_FACTORY_NAME")
_TIME_ZONE = os.environ.get("TIME_ZONE")

ISO8601_FORMAT = "%Y%m%dT%H:%M:%S"

credentials = ServicePrincipalCredentials(
    client_id=_CLIENT_ID, 
    secret=_CLIENT_KEY, 
    tenant=_TENANT_ID
)

adf_client = DataFactoryManagementClient(credentials, _SUBSCRIPTION_ID)


def query_activities(run_id, runfilterparam):
    activities = adf_client.activity_runs.query_by_pipeline_run(
        _RESOURCE_GROUP, 
        _DATA_FACTORY_NAME,
        run_id=run_id,
        filter_parameters=runfilterparam).value

    pipe_activities = []
    for activity in activities:
        output_activity = {
            "activity_name":activity.activity_name,
            "status": activity.status,
            "input":activity.input,

示例#21

0

显示文件

文件： adf_pipeline_run.py 项目： ivoeverts/db-automation

def main():
    parser = argparse.ArgumentParser(description="Library path in ADF")
    parser.add_argument("-r",
                        "--resource_group",
                        help="Resource group",
                        required=True)
    parser.add_argument("-a", "--adf_name", help="ADF NAME", required=True)
    parser.add_argument("-p",
                        "--adf_pipeline_name",
                        help="ADF pipeline name",
                        required=True)
    parser.add_argument("-o",
                        "--output_file_path",
                        help="Output file path",
                        required=True)
    parser.add_argument("-pa",
                        "--parameters",
                        help="Parameters",
                        required=False)
    args = parser.parse_args()

    resource_group = args.resource_group
    adf_name = args.adf_name
    adf_pipeline_name = args.adf_pipeline_name
    output_file_path = args.output_file_path
    parameters = args.parameters

    print(f"-resource_group is {resource_group}")
    print(f"-adf_name is {adf_name}")
    print(f"-adf_pipeline_name is {adf_pipeline_name}")
    print(f"-output_file_path is {output_file_path}")
    print(f"-parameters is {parameters}")
    credentials, subscription_id = get_azure_cli_credentials()

    # The data factory name. It must be globally unique.

    get_azure_cli_credentials()
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    # Create a pipeline run
    run_response = adf_client.pipelines.create_run(resource_group,
                                                   adf_name,
                                                   adf_pipeline_name,
                                                   parameters=parameters)

    # Monitor the pipeline run
    time.sleep(5)
    pipeline_run = adf_client.pipeline_runs.get(resource_group, adf_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    filter_params = RunFilterParameters(
        last_updated_after=datetime.utcnow() - timedelta(1),
        last_updated_before=datetime.utcnow() + timedelta(1))
    query_response = adf_client.activity_runs.query_by_pipeline_run(
        resource_group, adf_name, pipeline_run.run_id, filter_params)

    while query_response.value[0].status in ['InProgress']:
        print_activity_run_details(query_response.value[0])
        time.sleep(3)
        query_response = adf_client.activity_runs.query_by_pipeline_run(
            resource_group, adf_name, pipeline_run.run_id, filter_params)

    print_activity_run_details(query_response.value[0])

示例#22

0

显示文件

文件： data_factory.py 项目： leahecole/airflow

 def _create_client(credential: Credentials, subscription_id: str):
     return DataFactoryManagementClient(
         credential=credential,
         subscription_id=subscription_id,
     )

示例#23

0

显示文件

文件： azureManager.py 项目： Wajdovic/bord-poc

def getAdfClient():
    subscription_id = app.config["SUBSCRIPTION_ID"]
    credentials = getCredentials()
    return DataFactoryManagementClient(credentials, subscription_id)

示例#24

0

显示文件

文件： datafactory.py 项目： gswwsg/az

def main():
    # Load parameters
    params = load_config()

    # Azure subscription ID
    subscription_id = params['subscription_id']  # '<subscription ID>'

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = params['rg_name']  # '<resource group>'

    # The data factory name. It must be globally unique.
    df_name = params['df_name']  # '<factory name>'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ClientSecretCredential(client_id=params['client_id'],
                                         client_secret=params['client_secret'],
                                         tenant_id=params['tenant_id'])
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'westeurope'}
    df_params = {'location': 'westeurope'}

    # create the resource group
    # comment out if the resource group already exits
    # resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # Create a data factory
    df_resource = Factory(location='westeurope')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

    # Create an Azure Storage linked service
    ls_name = 'storageLinkedService001'

    # IMPORTANT: specify the name and key of your Azure Storage account.
    storage_string = SecureString(value=params['storage_string'])

    ls_azure_storage = LinkedServiceResource(
        properties=AzureStorageLinkedService(connection_string=storage_string))
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name,
                                                     ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'ds_in'
    ds_ls = LinkedServiceReference(reference_name=ls_name)
    blob_path = params['blob_path']  # '<container>/<folder path>'
    blob_filename = params['blob_filename']  # '<file name>'
    ds_azure_blob = DatasetResource(
        properties=AzureBlobDataset(linked_service_name=ds_ls,
                                    folder_path=blob_path,
                                    file_name=blob_filename))
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name,
                                              ds_azure_blob)
    print_item(ds)

    # Create an Azure blob dataset (output)
    dsOut_name = 'ds_out'
    output_blobpath = params['blob_path_output']  # <container>/<folder path>'
    dsOut_azure_blob = DatasetResource(properties=AzureBlobDataset(
        linked_service_name=ds_ls, folder_path=output_blobpath))
    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name,
                                                 dsOut_azure_blob)
    print_item(dsOut)

    # Create a copy activity
    act_name = 'copyBlobtoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(reference_name=ds_name)
    dsOut_ref = DatasetReference(reference_name=dsOut_name)
    copy_activity = CopyActivity(name=act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=blob_source,
                                 sink=blob_sink)

    # Create a pipeline with the copy activity
    p_name = 'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    # Create a pipeline run
    run_response = adf_client.pipelines.create_run(rg_name,
                                                   df_name,
                                                   p_name,
                                                   parameters={})

    # Monitor the pipeline run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    filter_params = RunFilterParameters(
        last_updated_after=datetime.now() - timedelta(1),
        last_updated_before=datetime.now() + timedelta(1))
    query_response = adf_client.activity_runs.query_by_pipeline_run(
        rg_name, df_name, pipeline_run.run_id, filter_params)
    print_activity_run_details(query_response.value[0])

示例#25

0

显示文件

文件： run.py 项目： ajeetpandeyy/azure-adf-py-demo

def main():

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(client_id=APP_CLIENT_ID, secret=APP_SECRET, tenant=TENANT_ID)
    adf_client = DataFactoryManagementClient(credentials, SUBSCRIPTION_ID)

    # Get the data factory
    df = adf_client.factories.get(RESOURCE_GROUP, DATA_FACTORY_NAME)
    print_item(df)

    # Create an Azure Storage linked service
    ls_name = 'storageLinkedService'

    # IMPORTANT: specify the name and key of your Azure Storage account.
    storage_string = SecureString(
        'DefaultEndpointsProtocol=https;AccountName={};AccountKey={}'.format(STORAGE_NAME, STORAGE_KEY))

    ls_azure_storage = AzureStorageLinkedService(connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(RESOURCE_GROUP, DATA_FACTORY_NAME, ls_name, ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'ds_in'
    ds_ls = LinkedServiceReference(ls_name)
    blob_path= 'input'
    blob_filename = 'input.txt'
    ds_azure_blob= AzureBlobDataset(ds_ls, folder_path=blob_path, file_name = blob_filename)
    ds = adf_client.datasets.create_or_update(RESOURCE_GROUP, DATA_FACTORY_NAME, ds_name, ds_azure_blob)
    print_item(ds)

    # Create an Azure blob dataset (output)
    dsOut_name = 'ds_out'
    output_blobpath = 'output'
    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
    dsOut = adf_client.datasets.create_or_update(RESOURCE_GROUP, DATA_FACTORY_NAME, dsOut_name, dsOut_azure_blob)
    print_item(dsOut)

    # Create a copy activity
    act_name =  'copyBlobtoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(ds_name)
    dsOut_ref = DatasetReference(dsOut_name)
    copy_activity = CopyActivity(act_name,inputs=[dsin_ref], outputs=[dsOut_ref], source=blob_source, sink=blob_sink)

    #Create a pipeline with the copy activity
    p_name =  'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity], parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(RESOURCE_GROUP, DATA_FACTORY_NAME, p_name, p_obj)
    print_item(p)

    #Create a pipeline run.
    run_response = adf_client.pipelines.create_run(RESOURCE_GROUP, DATA_FACTORY_NAME, p_name,
        {
        }
    )

    # Monitor the pipeline run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(RESOURCE_GROUP, DATA_FACTORY_NAME, run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    activity_runs_paged = list(adf_client.activity_runs.list_by_pipeline_run(RESOURCE_GROUP, DATA_FACTORY_NAME, pipeline_run.run_id,
                                                                             datetime.now() - timedelta(1),
                                                                             datetime.now() + timedelta(1)))
    print_activity_run_details(activity_runs_paged[0])

示例#26

0

显示文件

def main():

    # Azure subscription ID
    subscription_id = '86d62b86-1ed2-45c1-8f6c-164c9b3db93a'

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = 'sshResourceGroup'

    # The data factory name. It must be globally unique.
    df_name = 'sshDF'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(
        client_id='276d4d10-d006-48e0-a360-572267e5d400',
        secret='aphJTM107vbXsrJLC9Ehsk9S2pLxysvGycWnxVE4pjc=',
        tenant='da67ef1b-ca59-4db2-9a8c-aa8d94617a16')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'westus'}
    df_params = {'location': 'westus'}

    # Create the resource group
    # Comment out if the resource group already exits
    resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # Create the resource group
    # Comment out if the resource group already exits
    resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # Create a data factory
    df_resource = Factory(location='westus')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

# Create an Azure Storage linked service
    ls_name = 'storageLinkedService'

    # IMPORTANT: specify the name and key of your Azure Storage account
    storage_string = SecureString(
        'DefaultEndpointsProtocol=https;AccountName=sshstorageaccount03;AccountKey=et+CstqRneJos+tjRAZcjubCdc2kdpBhISErdyIG/t94iLcrYAKSdD0txJjvR3C7wCrSz+9mcdjJAl05jGlChw=='
    )

    ls_azure_storage = AzureStorageLinkedService(
        connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name,
                                                     ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'dset_in'
    ds_ls = LinkedServiceReference(ls_name)
    blob_path = 'playerscontainer/myteam'
    blob_filename = 'astroplayers.txt'
    ds_azure_blob = AzureBlobDataset(ds_ls,
                                     folder_path=blob_path,
                                     file_name=blob_filename)
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name,
                                              ds_azure_blob)
    print_item(ds)

    # Create an Azure blob dataset (output)
    dsOut_name = 'dset_out'
    output_blobpath = 'playerscontainer/output'
    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name,
                                                 dsOut_azure_blob)
    print_item(dsOut)

    # Create a copy activity
    act_name = 'copyBlobPleayerstoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(ds_name)
    dsOut_ref = DatasetReference(dsOut_name)
    copy_activity = CopyActivity(act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=blob_source,
                                 sink=blob_sink)

    # Create a pipeline with the copy activity
    p_name = 'copyPipeliness'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    # Create a pipeline run.
    run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name,
                                                   {})

    # Monitor the pipeline run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    activity_runs_paged = list(
        adf_client.activity_runs.list_by_pipeline_run(
            rg_name, df_name, pipeline_run.run_id,
            datetime.now() - timedelta(1),
            datetime.now() + timedelta(1)))
    print_activity_run_details(activity_runs_paged[0])

示例#27

0

显示文件

文件： datafactory.py 项目： ajeetpandeyy/AzurePythonScript

def main():

    #load values
    project_folder = os.path.expanduser(
        '/home/admin1/Desktop/AzurePythonScript')  # adjust as appropriate
    load_dotenv(os.path.join(project_folder, '.env'))

    # Azure subscription ID
    subscription_id = os.environ.get('AZURE_SUBSCRIPTION_ID')

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = 'ArunScriptResource'

    # The data factory name. It must be globally unique.
    df_name = 'TwitterFactoryPyScript'

    clientid = ''
    secretkey = ''
    tenantid = ''

    print(type(os.environ.get('AZURE_CLIENT_ID')))

    clientid += os.environ.get('AZURE_CLIENT_ID')
    secretkey += os.environ.get('AZURE_CLIENT_SECRET')
    tenantid += os.environ.get('AZURE_TENANT_ID')

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(client_id=clientid,
                                              secret=secretkey,
                                              tenant=tenantid)
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'eastus'}
    df_params = {'location': 'eastus'}

    # create the resource group
    # comment out if the resource group already exits
    resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # Create a data factory
    df_resource = Factory(location='eastus')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

    # Create an Azure Storage linked service
    ls_name = 'AzurePyScriptLinkedService'

    # Specify the name and key of your Azure Storage account
    #storage_string = SecureString( value=
    #    'DefaultEndpointsProtocol=https;AccountName=arunkafkastorage;AccountKey=tV6Yx8ngd36I6eu8Ow9Lklq7DDLKeFJuslOLnGaX6jD33zCr7AghPso3lkjXKh0SMNMy83NWoklaGRHJTMk/4A==;EndpointSuffix=core.windows.net')
    storage_string = SecureString(
        value=
        'DefaultEndpointsProtocol=https;AccountName=arunstorage12;AccountKey=iFCTVZveS/XvhhHfL/Phpf/r3UM3CPwSBkEwiQWePdALeW9hamYc6mAEXQMeSjQVrAdCY19hfFlUBLmKbwsbog==;EndpointSuffix=core.windows.net'
    )

    ls_azure_storage = AzureStorageLinkedService(
        connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name,
                                                     ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'ds_in'
    ds_ls = LinkedServiceReference(reference_name=ls_name)
    blob_path = 'adfv2tutorial/input'
    blob_filename = 'input.txt'
    ds_azure_blob = AzureBlobDataset(ds_ls,
                                     folder_path=blob_path,
                                     file_name=blob_filename)
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name,
                                              ds_azure_blob)
    print_item(ds)

    # Create an Azure blob dataset (output)
    dsOut_name = 'ds_out'
    output_blobpath = 'adfv2tutorial/output'
    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name,
                                                 dsOut_azure_blob)
    print_item(dsOut)

    # Create a copy activity
    act_name = 'copyBlobtoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(ds_name)
    dsOut_ref = DatasetReference(dsOut_name)
    copy_activity = CopyActivity(act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=blob_source,
                                 sink=blob_sink)

    # Create a pipeline with the copy activity
    p_name = 'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    # Create a pipeline run
    run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name,
                                                   {})

    # Monitor the pipeline run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    #activity_runs_paged = list(adf_client.activity_runs.list_by_pipeline_run(rg_name, df_name, pipeline_run.run_id, datetime.now() - timedelta(1), datetime.now() + timedelta(1)))
    activity_runs_paged = list(
        adf_client.activity_runs.query_by_pipeline_run(
            rg_name, df_name, pipeline_run.run_id,
            datetime.now() - timedelta(1),
            datetime.now() + timedelta(1)))
    print_activity_run_details(activity_runs_paged[0])

示例#28

0

显示文件

文件： copy_from_blob_to_azure_datafactory.py 项目： gayathri-anantharaman-jdas/azure-poc

def main():

    # Azure subscription ID
    subscription_id = 'a1b8793b-91d4-42e0-9e7a-f55af294f275'

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = 'newcopyBlobToSqlRg'

    # The data factory name. It must be globally unique.
    df_name = 'newcopyBlobToSqlDf'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(
        client_id='31fe72da-bb34-4243-a365-288a003d57e9',
        secret='702f129a-cc5e-4b03-9a5d-362ee0a6d4e3',
        tenant='c80b7188-f79b-48e5-8008-f9402f981907')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'eastus'}
    df_params = {'location': 'eastus'}

    # # create the resource group
    # # comment out if the resource group already exits
    # resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # # Create a data factory
    # df_resource = Factory(location='eastus')
    # df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    # print_item(df)
    # while df.provisioning_state != 'Succeeded':
    #     df = adf_client.factories.get(rg_name, df_name)
    #     time.sleep(1)

    # Create an Azure Storage linked service
    ls_name = 'storageLinkedService'

    # Specify the name and key of your Azure Storage account
    storage_string = SecureString(
        value=
        'DefaultEndpointsProtocol=https;AccountName=copyblobtosqlstorage;AccountKey=WlOWgmkCT9a8FB2phDVEgZhCfsrP1p/ZT8pA9Rg63iHyXB2+cZcQmHb8h0g+d3c6WoLa1aDef4fCJ4szkj0ipg=='
    )

    ls_azure_storage = AzureStorageLinkedService(
        connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name,
                                                     ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'salary_details_in'
    ds_ls = LinkedServiceReference(reference_name=ls_name)
    blob_path = 'data-streaming-sync/csv/salaryDetails/'
    # blob_filename = 'input.txt'
    ds_azure_blob = AzureBlobDataset(linked_service_name=ds_ls,
                                     folder_path=blob_path)
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name,
                                              ds_azure_blob)
    print_item(ds)

    # Create an Azure Sql database linked service
    ls_sql_name = 'sqlDatabaseLinkedService'
    rg_sql_name = 'cloud-shell-storage-southeastasia'
    df_sql_name = 'datafactoryBlobToSql'

    storage_string = 'Server = tcp:datafactorysync-kpi-server.database.windows.net, 1433;' + 'Database=datafactorysync_kpi'
    ls_azure_sql_storage = AzureSqlDatabaseLinkedService(
        connection_string=storage_string,
        password='******',
        service_principal_id='31fe72da-bb34-4243-a365-288a003d57e9',
        type='AzureSqlDatabase')
    ls = adf_client.linked_services.create_or_update(rg_sql_name, df_sql_name,
                                                     ls_sql_name,
                                                     ls_azure_sql_storage)
    print_item(ls)

    # Create an Azure sql database (output)
    dsOut_name = 'salary_details_out'
    ds_sql_ls = LinkedServiceReference(reference_name=ls_sql_name)
    ds_sql_table_name = 'dbo.Salary_Details'
    ds = adf_client.datasets.create_or_update(rg_sql_name, df_sql_name,
                                              ds_sql_ls, ds_sql_table_name)
    print_item(ds)

    # Create a copy activity
    act_name = 'copyBlobtoSql'
    blob_source = BlobSource()
    sql_sink = SqlSink()
    dsin_ref = DatasetReference(reference_name=ds_name)
    dsOut_ref = DatasetReference(reference_name=dsOut_name)
    copy_activity = CopyActivity(name=act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=blob_source,
                                 sink=sql_sink)

    # Create a pipeline with the copy activity
    p_name = 'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    # Create a pipeline run
    run_response = adf_client.pipelines.create_run(rg_name,
                                                   df_name,
                                                   p_name,
                                                   parameters={})

    # Monitor the pipeline run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    filter_params = RunFilterParameters(
        last_updated_after=datetime.now() - timedelta(1),
        last_updated_before=datetime.now() + timedelta(1))
    query_response = adf_client.activity_runs.query_by_pipeline_run(
        rg_name, df_name, pipeline_run.run_id, filter_params)
    print_activity_run_details(query_response.value[0])

示例#29

0

显示文件

def main():

    # Azure subscription ID
    subscription_id = '97427991-cf70-407e-92c8-19ce6406c848'

    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
    rg_name = 'ADFTutorialResourceGroup'

    # The data factory name. It must be globally unique.
    df_name = 'MidTermVet'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(client_id='Default Directory/fee31cfc-b74a-4daf-89e9-af494c350705', secret='MicroRu$h2112', tenant='fee31cfc-b74a-4daf-89e9-af494c350705')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location':'eastus'}
    df_params = {'location':'eastus'}

# create the resource group
    # comment out if the resource group already exits
    resource_client.resource_groups.create_or_update(rg_name, rg_params)

    #Create a data factory
    df_resource = Factory(location='eastus')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

# Create an Azure Storage linked service
    ls_name = 'storageLinkedService'

    # IMPORTANT: specify the name and key of your Azure Storage account.
    storage_string = SecureString('DefaultEndpointsProtocol=https;AccountName=<storageaccountname>;AccountKey=<storageaccountkey>')

    ls_azure_storage = AzureStorageLinkedService(connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name, ls_azure_storage)
    print_item(ls)

# Create an Azure blob dataset (input)
    ds_name = 'ds_in'
    ds_ls = LinkedServiceReference(ls_name)
    blob_path= 'adfv2tutorial/input'
    blob_filename = 'input.txt'
    ds_azure_blob= AzureBlobDataset(ds_ls, folder_path=blob_path, file_name = blob_filename)
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name, ds_azure_blob)
    print_item(ds)

# Create an Azure blob dataset (output)
    dsOut_name = 'ds_out'
    output_blobpath = 'adfv2tutorial/output'
    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name, dsOut_azure_blob)
    print_item(dsOut)

  # Create a copy activity
    act_name =  'copyBlobtoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(ds_name)
    dsOut_ref = DatasetReference(dsOut_name)
    copy_activity = CopyActivity(act_name,inputs=[dsin_ref], outputs=[dsOut_ref], source=blob_source, sink=blob_sink)

    #Create a pipeline with the copy activity
    p_name =  'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity], parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    #Create a pipeline run.
    run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name,
        {
        }
    )

   #Monitor the pipeline run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name, run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    activity_runs_paged = list(adf_client.activity_runs.list_by_pipeline_run(rg_name, df_name, pipeline_run.run_id, datetime.now() - timedelta(1),  datetime.now() + timedelta(1)))
    print_activity_run_details(activity_runs_paged[0])

示例#30

0

显示文件

def main():

    # Azure subscription ID
    subscription_id = 'alxfed'

    # This program creates this resource group. If it's an existing resource group,
    # comment out the code that creates the resource group
    rg_name = 'lakemichigan'  # <Azure resource group name>'

    # The data factory name. It must be globally unique.
    df_name = 'lakemichigan_datafactory'

    # Specify your Active Directory client ID, client secret, and tenant ID
    credentials = ServicePrincipalCredentials(
        client_id='657b13a2-da4d-44e1-9c0e-5763fd2d67a4',  # <AD client ID>',
        secret='I23s]f1:.VN*2u+RWmRJZ/xBxe2dwFys',  # <client secret>',
        tenant='0263cb8d-97fc-41e7-b762-40e2ea8dc1da')  # <tenant ID>')
    resource_client = ResourceManagementClient(credentials, subscription_id)
    adf_client = DataFactoryManagementClient(credentials, subscription_id)

    rg_params = {'location': 'eastus'}
    df_params = {'location': 'eastus'}

    # create the resource group
    # comment out if the resource group already exits
    # resource_client.resource_groups.create_or_update(rg_name, rg_params)

    # Create a data factory
    df_resource = Factory(location='eastus')
    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
    print_item(df)
    while df.provisioning_state != 'Succeeded':
        df = adf_client.factories.get(rg_name, df_name)
        time.sleep(1)

    # Create an Azure Storage linked service
    ls_name = 'storageLinkedService'

    # Specify the name and key of your Azure Storage account
    string = r'DefaultEndpointsProtocol=https;AccountName=lakemichigan;AccountKey={account_key}'
    storage_string = SecureString(string)

    ls_azure_storage = AzureStorageLinkedService(
        connection_string=storage_string)
    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name,
                                                     ls_azure_storage)
    print_item(ls)

    # Create an Azure blob dataset (input)
    ds_name = 'ds_in'
    ds_ls = LinkedServiceReference(ls_name)
    blob_path = 'factory/input'
    blob_filename = 'input.txt'
    ds_azure_blob = AzureBlobDataset(ds_ls,
                                     folder_path=blob_path,
                                     file_name=blob_filename)
    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name,
                                              ds_azure_blob)
    print_item(ds)

    # Create an Azure blob dataset (output)
    dsOut_name = 'ds_out'
    output_blobpath = 'factory/output'
    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name,
                                                 dsOut_azure_blob)
    print_item(dsOut)

    # Create a copy activity
    act_name = 'copyBlobtoBlob'
    blob_source = BlobSource()
    blob_sink = BlobSink()
    dsin_ref = DatasetReference(ds_name)
    dsOut_ref = DatasetReference(dsOut_name)
    copy_activity = CopyActivity(act_name,
                                 inputs=[dsin_ref],
                                 outputs=[dsOut_ref],
                                 source=blob_source,
                                 sink=blob_sink)

    # Create a pipeline with the copy activity
    p_name = 'copyPipeline'
    params_for_pipeline = {}
    p_obj = PipelineResource(activities=[copy_activity],
                             parameters=params_for_pipeline)
    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
    print_item(p)

    # Create a pipeline run
    run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name,
                                                   {})

    # Monitor the pipeilne run
    time.sleep(30)
    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name,
                                                run_response.run_id)
    print("\n\tPipeline run status: {}".format(pipeline_run.status))
    activity_runs_paged = list(
        adf_client.activity_runs.list_by_pipeline_run(
            rg_name, df_name, pipeline_run.run_id,
            datetime.now() - timedelta(1),
            datetime.now() + timedelta(1)))
    print_activity_run_details(activity_runs_paged[0])