示例#1
0
def download_gen1_terms(config):
    catalog_name = config["ADCGen1Client"]["CATALOG_NAME"]
    glossary_name = config["ADCGen1Client"]["GLOSSARY_NAME"]
    api_version = "2016-03-30"

    auth = ServicePrincipalAuthentication(
        tenant_id=config["ADCGen1Client"]["TENANT_ID"],
        client_id=config["ADCGen1Client"]["CLIENT_ID"],
        client_secret=config["ADCGen1Client"]["CLIENT_SECRET"]
    )

    # Need to update the resource we're authenticating against
    auth.data.update({"resource": "https://api.azuredatacatalog.com"})

    # This endpoint provides you with all glossary terms in ADC Gen1
    enumerate_uri = f"https://api.azuredatacatalog.com/catalogs/{catalog_name}/glossaries/{glossary_name}/terms?api-version={api_version}"

    output = []
    while(True):
        results = requests.get(
            enumerate_uri,
            headers=auth.get_authentication_headers()
        )
        content = results.json()
        output.extend(content["value"])
        if "nextLink" not in content:
            break
        else:
            enumerate_uri = content["nextLink"]

    with open(config["Default"]["ADCTermsPath"], 'w') as fp:
        json.dump(output, fp, indent=1)

    return output
示例#2
0
def uploadPurview(purview_lineage):

    oauth = ServicePrincipalAuthentication(tenant_id=Azure.tenant_id,
                                           client_id=Azure.client_id,
                                           client_secret=Azure.client_secret)

    client = AtlasClient(endpoint_url=Azure.endpoint_url, authentication=oauth)

    results = client.upload_entities(batch=purview_lineage)
示例#3
0
def test_purview_client_integration():
    # Authenticate against your Purview service
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", "")
    )
    client = PurviewClient(
        account_name = os.environ.get("PURVIEW_NAME", ""),
        authentication=oauth
    )

    results = client.get_glossary()

    assert(results is not None)
示例#4
0
def save_entities(atlas_mysql):
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get('AZURE_TENANT_ID', ''),
        client_id=os.environ.get('AZURE_CLIENT_ID', ''),
        client_secret=os.environ.get('AZURE_CLIENT_SECRET', ''))
    client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME',
                                                       ''),
                           authentication=oauth)
    entities = []
    entities.append(atlas_mysql.instance)
    for db in atlas_mysql.dbs:
        entities.append(db)
    for table in atlas_mysql.db_tables:
        entities.append(table)
    for column in atlas_mysql.table_columns:
        entities.append(column)

    assignments = client.upload_entities(entities)['guidAssignments']
    f = open(f"entities.{time.time()}.txt", "a")
    for guid in assignments:
        f.write(assignments[guid] + "\n")
    f.close()
示例#5
0
import json
import os
import time

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core.client import PurviewClient
from pyapacheatlas.core import AtlasEntity
from pyapacheatlas.core.typedef import EntityTypeDef

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get("TENANT_ID", ""),
    client_id=os.environ.get("CLIENT_ID", ""),
    client_secret=os.environ.get("CLIENT_SECRET", "")
)
client = PurviewClient(
    account_name = os.environ.get("PURVIEW_NAME", ""),
    authentication=oauth
)

def test_set_relationship_different_ways():

    ae = AtlasEntity("rel01","hive_table", "tests://rel01", guid=-1)
    c1 = AtlasEntity("rel01#01", "hive_column", "tests://rel01#c", guid=-2, attributes={"type":"str"})
    c2 = AtlasEntity("rel01#02", "hive_column", "tests://rel02#c", guid=-3, attributes={"type":"str"})
    c3 = AtlasEntity("rel01#03", "hive_column", "tests://rel03#c", guid=-4, attributes={"type":"str"})
    c4 = AtlasEntity("rel01#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type":"str"})

    # Add c1 as the only relationship
    ae.addRelationship(columns=[c1.to_json(minimum=True)])

    c2.relationshipAttributes.update({"table": ae.to_json(minimum=True) })
示例#6
0
unchanged_path = os.path.join(folder_path, "glossary.json")
glossary_prep_path = os.path.join(output_path, "glossary_prepared.json")

relationships_guid_path = config["Default"]["GlossaryRelationships"]
old_to_new_glossary_guid_path = config["Default"]["GlossaryOldToNew"]

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--skip-download",
        action="store_true",
        help="Use if you've already written the glossary to disk.")
    args = parser.parse_args()

    oauth_old = ServicePrincipalAuthentication(
        tenant_id=config["OldClient"]["TENANT_ID"],
        client_id=config["OldClient"]["CLIENT_ID"],
        client_secret=config["OldClient"]["CLIENT_SECRET"])
    old_client = AtlasClient(endpoint_url=config["OldClient"]["ENDPOINT_URL"],
                             authentication=oauth_old)
    oauth_new = ServicePrincipalAuthentication(
        tenant_id=config["NewClient"]["TENANT_ID"],
        client_id=config["NewClient"]["CLIENT_ID"],
        client_secret=config["NewClient"]["CLIENT_SECRET"])
    new_client = AtlasClient(endpoint_url=config["NewClient"]["ENDPOINT_URL"],
                             authentication=oauth_new)

    gt = GuidTracker()

    # Export the glossary terms
    if not args.skip_download:
        print("Exporting the old glossary terms")
示例#7
0
        for row in output:
            # Update the related term if it exists
            if row["related_term"] != "":
                row["related_term"] = term_id_to_name[row["related_term"]]
            importwriter.writerow(list(row.values()))


if __name__ == "__main__":
    config = configparser.ConfigParser()
    config.read("./samples/migrateADCGen1/config.ini")

    # Configure your Purview Authentication
    oauth = ServicePrincipalAuthentication(
        tenant_id=config["PurviewClient"]["TENANT_ID"],
        client_id=config["PurviewClient"]["CLIENT_ID"],
        client_secret=config["PurviewClient"]["CLIENT_SECRET"]
    )
    client = PurviewClient(
        account_name=config["PurviewClient"]["PURVIEW_ACCOUNT_NAME"],
        authentication=oauth
    )

    # Download the Gen 1 Terms to a json document
    print("Downloading ADC Gen 1 Terms...")
    download_gen1_terms(config)
    print("Successfully downloaded ADC Gen 1 Terms.")

    # Convert the json to a csv for import
    print("Converting ADC Gen 1 Terms to be CSV for Purview Upload...")
    convert_gen1_to_purview_terms(config)
示例#8
0
# Databricks notebook source
import argparse
import json
import os

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess, TypeCategory
from pyapacheatlas.core.util import GuidTracker
from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef
from pyapacheatlas.readers import ExcelConfiguration, ExcelReader

# The above cell gets the v_tenant_id,v_client_id etc.

auth = ServicePrincipalAuthentication(tenant_id=v_tenant_id,
                                      client_id=v_client_id,
                                      client_secret=v_client_secret)

# Create a client to connect to your service.
client = PurviewClient(account_name=v_data_catalog_name, authentication=auth)

guid = GuidTracker()

# COMMAND ----------

# Search for the entity you want to delete
import json
import os
search = client.search_entities("loan_risk_data.csv")
for page in search:
    print(json.dumps(page, indent=2))
示例#9
0
import time
import os
import sys
import array

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess

filename = sys.argv[1]

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get('AZURE_TENANT_ID', ''),
    client_id=os.environ.get('AZURE_CLIENT_ID', ''),
    client_secret=os.environ.get('AZURE_CLIENT_SECRET', ''))
client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME', ''),
                       authentication=oauth)

infile = open(filename)
guids = []
for line in infile:
    guids.append(line.strip())
client.delete_entity(guids)
infile.close()
os.remove(filename)