Exemplo n.º 1
0
    wb.save(file_path)


if __name__ == "__main__":
    """
    This sample provides an end to end sample of reading an excel file,
    creating a custom type and then uploading an entity of that custom type.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # SETUP: This is just setting up the excel file for you
    file_path = "./demo_custom_type_and_entity_upload.xlsx"
    excel_config = ExcelConfiguration()
    excel_reader = ExcelReader(excel_config)

    # Create an empty excel template to be populated
    excel_reader.make_template(file_path)
    # This is just a helper to fill in some demo data
    fill_in_type_workbook(file_path, excel_config)
    fill_in_entity_workbook(file_path, excel_config)

    # ACTUAL WORK: This parses our excel file and creates a batch to upload
    typedefs = excel_reader.parse_entity_defs(file_path)
    entities = excel_reader.parse_bulk_entities(file_path)

if __name__ == "__main__":
    """
    This sample provides an end to end sample of reading an excel file and
    generating a set of entities that would create or update a Process entity
    that links (applies lineage to) an input and output entity that already
    exists in your data catalog.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # SETUP: This is just setting up the excel file for you
    file_path = "./demo_update_lineage_upload.xlsx"
    excel_config = ExcelConfiguration()
    excel_reader = ExcelReader(excel_config)

    # We are going to cheat here and create some entities before
    # we get to parsing the spreadsheet so we have something to work with.
    # This is not necessary if you are working with existing entities.
    inputTable = AtlasEntity(
        name="demo_hive_source",
        typeName="hive_table",
        qualified_name="pyapacheatlas://demo_update_lineage_input",
        guid=-100)
    outputTable = AtlasEntity(
Exemplo n.º 3
0
import os

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess, TypeCategory
from pyapacheatlas.core.util import GuidTracker
from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef
from pyapacheatlas.readers import ExcelConfiguration, ExcelReader

# The above cell gets the v_tenant_id,v_client_id etc.

auth = ServicePrincipalAuthentication(tenant_id=v_tenant_id,
                                      client_id=v_client_id,
                                      client_secret=v_client_secret)

# Create a client to connect to your service.
client = PurviewClient(account_name=v_data_catalog_name, authentication=auth)

guid = GuidTracker()

# COMMAND ----------

# Search for the entity you want to delete
import json
import os
search = client.search_entities("loan_risk_data.csv")
for page in search:
    print(json.dumps(page, indent=2))

# COMMAND ----------

# MAGIC %md
Exemplo n.º 4
0
import os
import json

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess

print(os.environ.get('AZURE_TENANT_ID', ''))

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get('AZURE_TENANT_ID', ''),
    client_id=os.environ.get('AZURE_CLIENT_ID', ''),
    client_secret=os.environ.get('AZURE_CLIENT_SECRET', ''))
client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME', ''),
                       authentication=oauth)
client.upload_typedefs(json.load(
    open('./pyapacheatlas_mysql_typedefs_v2.json', 'r')),
                       force_update=True)
from pyapacheatlas.core.util import GuidTracker
from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef

# Add your credentials here or set them as environment variables
tenant_id = ""
client_id = ""
client_secret = ""
data_catalog_name = ""

# COMMAND ----------

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get("TENANT_ID", tenant_id),
    client_id=os.environ.get("CLIENT_ID", client_id),
    client_secret=os.environ.get("CLIENT_SECRET", client_secret))
client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                       authentication=oauth)
guid = GuidTracker()

# COMMAND ----------

# Set up a few types and relationships
# This is a one time thing but necessary to make the demo work
# It also demonstrates how you can capture different attributes
# for your dataframes, dataframe columns, and jobs.
type_spark_df = EntityTypeDef(
    name="custom_spark_dataframe",
    attributeDefs=[AtlasAttributeDef(name="format").to_json()],
    superTypes=["DataSet"],
    options={"schemaElementAttribute": "columns"})
type_spark_columns = EntityTypeDef(
    name="custom_spark_dataframe_column",
                                RelationshipTypeDef)
from pyapacheatlas.core.typedef import EntityTypeDef
from pyapacheatlas.core.util import GuidTracker

if __name__ == "__main__":
    """
    This sample provides shows how to create custom type definitions and
    use a relationship to create a table/columns connection. Lastly it
    creates entities to demonstrate using the custom types with the
    relationship.
    """
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create Type Definitions and Relationship Definition
    # The Relationship defines the table / columns connection between
    # the entities.

    column_entity_def = EntityTypeDef(name="pyapacheatlas_demo_column",
                                      superTypes=["DataSet"],
                                      attributeDefs=[
                                          AtlasAttributeDef("data_type",
                                                            typeName="string",
                                                            isOptional=False)
                                      ])

    table_entity_def = EntityTypeDef(
        name="pyapacheatlas_demo_table",
Exemplo n.º 7
0
    wb.save(file_path)


if __name__ == "__main__":
    """
    This sample provides an end to end sample of reading an excel file,
    generating a batch of entities, and then uploading the entities to
    your data catalog.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # SETUP: This is just setting up the excel file for you
    file_path = "./demo_bulk_entities_upload.xlsx"
    excel_config = ExcelConfiguration()
    excel_reader = ExcelReader(excel_config)

    # Create an empty excel template to be populated
    excel_reader.make_template(file_path)
    # This is just a helper to fill in some demo data
    fill_in_workbook(file_path, excel_config)

    # ACTUAL WORK: This parses our excel file and creates a batch to upload
    entities = excel_reader.parse_bulk_entities(file_path)

    # This is what is getting sent to your Atlas server
Exemplo n.º 8
0
    Custom Lineage requires at least three entities. One 'Process' entity
    and at least two 'DataSet' entities. The process entity takes in the
    two dataset entities ('minified' to be just the guid, qualifiedname,
    and type) as inputs and outputs.

    Then the entities are uploaded to your Data Catalog and resulting json
    is printed.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create two entities with AtlasEntity
    # You must provide a name, typeName, qualified_name, and guid
    # the guid must be a negative number and unique in your batch
    # being uploaded.
    input01 = AtlasEntity(name="input01",
                          typeName="DataSet",
                          qualified_name="pyapacheatlas://demoinput01",
                          guid=-100)
    output01 = AtlasEntity(name="output01",
                           typeName="DataSet",
                           qualified_name="pyapacheatlas://demooutput01",
                           guid=-101)

    # The Atlas Process is the lineage component that links the two
Exemplo n.º 9
0
# PyApacheAtlas packages
# Connect to Atlas via a Service Principal
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasClassification, AtlasEntity, AtlasProcess
from pyapacheatlas.core.util import AtlasException

if __name__ == "__main__":
    """
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create an entity
    # You must provide a name, typeName, qualified_name, and guid
    # the guid must be a negative number and unique in your batch
    # being uploaded.
    input01 = AtlasEntity(
        name="input01",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demoinputclassification01",
        guid=-100)
    input02 = AtlasEntity(
        name="input02",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demoinputclassification02",
        guid=-101)
Exemplo n.º 10
0
    This sample provides an example of updating a custom lineage and an existing
    entity 'manually' through the rest api / pyapacheatlas classes.

    Lineage can be updated for an entity by changing the inputs or outputs
    attributes of a Process entity.

    An existing entity can be updated by uploading a partial update. Only
    attributes that are referenced will be updated, all others stay the same.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Assuming you want to update all of the inputs, all of the outputs, or
    # all of both, we can create an AtlasProcess object with the minimum
    # requirements of name, typeName, and qualifedName.

    # Start with null/None inputs and outputs and we will fill them in.
    # existing_process = AtlasProcess(
    #     name="sample_process_xyz",
    #     typeName="Process",
    #     qualified_name="pyapacheatlas://democustomprocess",
    #     inputs=None, # Set to None so no update will occur
    #     outputs=None, # We will update this with .outputs below
    #     guid=-101
    # )
Exemplo n.º 11
0
import time

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess, TypeCategory
from pyapacheatlas.core.util import GuidTracker
from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef
from pyapacheatlas.readers import ExcelConfiguration, ExcelReader

# The above cell gets the v_tenant_id,v_client_id etc.

auth = ServicePrincipalAuthentication(tenant_id=v_tenant_id,
                                      client_id=v_client_id,
                                      client_secret=v_client_secret)

# Create a client to connect to your service.
client = PurviewClient(account_name=v_data_catalog_name, authentication=auth)

guid = GuidTracker()

# COMMAND ----------

# MAGIC %md
# MAGIC ##### 2. Setup Custom Entity Types
# MAGIC Setup custom entities to capture Databricks Tables, Columns and Jobs

# COMMAND ----------

# DBTITLE 0,databricks-table entity type
# Set up the new entity types to capture delta lake tables and databricks jobs

# Databricks Table
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import AtlasAttributeDef, AtlasEntity, PurviewClient
from pyapacheatlas.core.typedef import EntityTypeDef, TypeCategory

if __name__ == "__main__":
    """
    This sample provides shows how to create custom type definitions and
    how to creates entities using a custom type.
    """

    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create an entity type definition with three columns (column1, 2, 3)
    # with column1 required.
    edef = EntityTypeDef(name="pyapacheatlas_create_type_def_sample",
                         attributeDefs=[
                             AtlasAttributeDef("column1",
                                               typeName="string",
                                               isOptional=False),
                             AtlasAttributeDef("column2", typeName="int"),
                             AtlasAttributeDef("column3",
                                               typeName="array<string>",
                                               cardinality="SET"),
                         ],
                         superTypes=["DataSet"])
Exemplo n.º 13
0
# Connect to Atlas via a Service Principal
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient  # Communicate with your Atlas server

if __name__ == "__main__":
    """
    This sample provides an example of removing a classification from a given
    entity.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # When you know the GUID that you want to delete
    response = client.declassify_entity(
        guid="b58fc81e-a85f-4dfc-aad1-ee33b3421b83",
        classificationName="MICROSOFT.PERSONAL.IPADDRESS")
    print(json.dumps(response, indent=2))

    # When you need to find multiple classifications to delete on an entity.
    # Get all the classifications and then retrieve the "list" attribute.
    classifications = client.get_entity_classifications(
        guid="b58fc81e-a85f-4dfc-aad1-ee33b3421b83")["list"]

    # For every classification, remove it.
    for classification in classifications:
        response = client.declassify_entity(
Exemplo n.º 14
0
    wb.save(file_path)


if __name__ == "__main__":
    """
    This sample provides an end to end sample of reading an excel file,
    generating a table and column lineage set of entities, and then
    uploading the entities to your data catalog.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create an empty excel template to be populated
    file_path = "./atlas_excel_template.xlsx"
    excel_config = ExcelConfiguration()
    excel_reader = ExcelReader(excel_config)

    excel_reader.make_template(file_path)

    fill_in_workbook(file_path, excel_config)

    # Generate the base atlas type defs for the demo of table and column lineage
    atlas_type_defs = column_lineage_scaffold("demo",
                                              use_column_mapping=True,
                                              column_attributes=[{
                                                  "name":
Exemplo n.º 15
0
    minimum table and columns scaffolding with a relationship
    definition between the table and column types.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--prefix",
        help="The prefix for the table, columns, and relationship types.")
    args = parser.parse_args()
    datasource = args.prefix

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    src_table_columns_typeName = "{}_table_columns".format(datasource)

    # TODO: Create all combinations of datasource
    # Define {datasource}_column
    column_entity = EntityTypeDef(
        name="{}_column".format(datasource),
        superTypes=["DataSet"],
    )
    # Define {datasource}_table
    table_entity = EntityTypeDef(
        name="{}_table".format(datasource),
        superTypes=["DataSet"],
        relationshipAttributeDefs=[],
        options={"schemaElementsAttribute": "columns"})
Exemplo n.º 16
0
    * In the example below, we create a batch of entities that includes a
    partial set of columns defined on the hive_table and an additional
    hive_column with the table relationship attribute set.

    Lastly, you can always upload an individual relationship with hive_table
    and hive_columns defined on each end. However, this is the slowest path
    as it can only take one upload at a time whereas entity uploads can be
    many entities at a time.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Creating the entities that will be used in uploads.
    table = AtlasEntity("rel01", "hive_table", "tests://rel01", guid=-1)
    c1 = AtlasEntity("rel01#01",
                     "hive_column",
                     "tests://rel01#c",
                     guid=-2,
                     attributes={"type": "str"})
    c2 = AtlasEntity("rel01#02",
                     "hive_column",
                     "tests://rel02#c",
                     guid=-3,
                     attributes={"type": "str"})
    c3 = AtlasEntity("rel01#03",
                     "hive_column",
Exemplo n.º 17
0
from pyapacheatlas.core.util import GuidTracker

if __name__ == "__main__":
    """
    This sample demonstrates using the columnMapping feature of Azure Purview.
    You will create a process with two inputs and one output. From there you
    will create a valid column mapping JSON object that will display column
    level lineage in the Purview UI.
    """
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", "")
    )
    client = PurviewClient(
        account_name=os.environ.get("PURVIEW_NAME", ""),
        authentication=oauth
    )

    # We need a custom process entity type that contains the definition for
    # a columnMapping attribute.
    procType = EntityTypeDef(
        "ProcessWithColumnMapping",
        superTypes=["Process"],
        attributeDefs = [
            AtlasAttributeDef("columnMapping")
        ]
    )

    # Upload the type definition
    type_results = client.upload_typedefs(entityDefs=[procType], force_update=True)
    print(json.dumps(type_results,indent=2))
Exemplo n.º 18
0
    You need the Guid of the entity and the .

    The schema of the response follows the /v2/entity/bulk GET operation
    even if you are requesting only one entity by Guid.
    https://atlas.apache.org/api/v2/json_AtlasEntitiesWithExtInfo.html

    The response of get_entity will be a dict that has an "entities" key
    that contains a list of the entities you requested.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # For a given guid, check if a given classification type is applied
    # If it's not, an AtlasException is thrown.
    try:
        single_class_check = client.get_entity_classification(
            guid="b58fc81e-a85f-4dfc-aad1-ee33b3421b87",
            classificationName="MICROSOFT.PERSONAL.IPADDRESS")
        print(json.dumps(single_class_check, indent=2))
    except AtlasException as e:
        print(
            "The provided classification was not found on the provied entity.")
        print(e)

    # You can also get ALL of the classifications from a given entity
    all_class_check = client.get_entity_classifications(
Exemplo n.º 19
0
from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef

# Add your credentials here or set them as environment variables
tenant_id = ""
client_id = ""
client_secret = ""
purview_account_name = ""

# COMMAND ----------

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get("TENANT_ID", tenant_id),
    client_id=os.environ.get("CLIENT_ID", client_id),
    client_secret=os.environ.get("CLIENT_SECRET", client_secret))
client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME",
                                                   purview_account_name),
                       authentication=oauth)
guid = GuidTracker()

# COMMAND ----------

# Set up a few types and relationships
# This is a one time thing but necessary to make the demo work
# It also demonstrates how you can capture different attributes
# for your dataframes, dataframe columns, and jobs.
type_spark_df = EntityTypeDef(name="custom_spark_dataframe",
                              attributeDefs=[AtlasAttributeDef(name="format")],
                              superTypes=["DataSet"],
                              options={"schemaElementAttribute": "columns"})
type_spark_columns = EntityTypeDef(
    name="custom_spark_dataframe_column",
Exemplo n.º 20
0
# PyApacheAtlas packages
# Connect to Atlas via a Service Principal
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient  # Communicate with your Atlas server

if __name__ == "__main__":
    """
    This sample provides an example of deleting an entity through the Atlas API.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # When you know the GUID that you want to delete
    response = client.delete_entity(guid="123-abc-456-def")
    print(json.dumps(response, indent=2))

    # When you need to find multiple Guids to delete and they all
    # are the same type
    entities = client.get_entity(
        qualifiedName=["qualifiedname1", "qualifiedname2", "qualifiedname3"],
        typeName="my_type")

    for entity in entities.get("entities"):
        guid = entity["guid"]
        delete_response = client.delete_entity(guid=guid)
        print(json.dumps(delete_response, indent=2))
from pyapacheatlas.core import PurviewClient, AtlasEntity, TypeCategory
from pyapacheatlas.core.typedef import EntityTypeDef, RelationshipTypeDef

if __name__ == "__main__":
    """
    This sample shows how you might create a process that contains a set of
    steps but no intermediate data is produced / being captured. You might
    want to capture all the steps in a process and that can be accomplished
    with creating a custom relationship and a custom 'process_step' type.
    """

    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Create a Process Type that will store the steps
    processWithSteps = EntityTypeDef(
        name="process_with_steps",
        superTypes=["Process"],
        options={
            # This makes the step entities appear in the schema tab
            "schemaElementsAttribute": "steps"
        },
    )

    # Create a step in a process type to house the actual types
    processSteps = EntityTypeDef(name="step_in_process",
                                 superTypes=["DataSet"])
Exemplo n.º 22
0
import time
import os
import sys
import array

from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient, AtlasEntity, AtlasProcess

filename = sys.argv[1]

oauth = ServicePrincipalAuthentication(
    tenant_id=os.environ.get('AZURE_TENANT_ID', ''),
    client_id=os.environ.get('AZURE_CLIENT_ID', ''),
    client_secret=os.environ.get('AZURE_CLIENT_SECRET', ''))
client = PurviewClient(account_name=os.environ.get('PURVIEW_CATALOG_NAME', ''),
                       authentication=oauth)

infile = open(filename)
guids = []
for line in infile:
    guids.append(line.strip())
client.delete_entity(guids)
infile.close()
os.remove(filename)
from pyapacheatlas.core.util import GuidTracker
from pyapacheatlas.core.typedef import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef
from pyapacheatlas.readers import ExcelConfiguration, ExcelReader


# The above cell gets the v_tenant_id,v_client_id etc. 

auth = ServicePrincipalAuthentication(
    tenant_id = v_tenant_id, 
    client_id = v_client_id, 
    client_secret = v_client_secret
)

# Create a client to connect to your service.
client = PurviewClient(
    account_name = v_data_catalog_name,
    authentication = auth
)

guid = GuidTracker()


# COMMAND ----------

# MAGIC %md
# MAGIC ##### 4. Collect Notebook Metadata

# COMMAND ----------

# # Path to scan
path = '/Shared'
  
Exemplo n.º 24
0
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--threshold",
        "--t",
        help="The level of the search score to filter to, Defaults to 3.0",
        default=3.0,
        type=float)
    args = parser.parse_args()

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    glossary = client.get_glossary()
    try:
        terms = glossary["terms"]
    except KeyError:
        print("Your default glossary appears to be empty.")
        print("Please add a term to your glossary and try this demo again.")
        exit(3)
    # Consists of DisplayText and term
    term_variants = {
        t["displayText"]: {
            'guid': t["termGuid"],
            "variants": []
        }
        for t in terms
Exemplo n.º 25
0
    This sample provides an example of searching for an existing entity
    through the rest api / pyapacheatlas classes.

    NOTE: This example is specific to Azure Purview's Advanced Search.

    The response is a Python generator that allows you to page through the
    search results. For each page in the search results, you have a list
    of search entities that can be iterated over.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Assuming you have an entity with the word demo in the name or description
    search = client.search_entities("demo")

    # Alternative search methods include...
    # Searching across a given attribute:
    # Search only the name (or qualifiedName) field and it begins with demo
    # Must include a wildcard character (*) at the end, does not support
    # wildcard at the beginning or middle.

    # search = client.search_entities("name:demo*")
    # search = client.search_entities("qualifiedName:demo*")

    # Searching within a given type and include subtypes...
    # Provide a search filter that specifies the typeName and whether