示例#1
0
    You need either the Guid of the entity or the qualified name and type name.

    The schema of the response follows the /v2/entity/bulk GET operation
    even if you are requesting only one entity by Guid.
    https://atlas.apache.org/api/v2/json_AtlasEntitiesWithExtInfo.html

    The response of get_entity will be a dict that has an "entities" key
    that contains a list of the entities you requested.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # When you know the GUID that you want to get
    response = client.get_entity(guid="123-abc-456-def")
    print(json.dumps(response, indent=2))

    # When you need to find multiple Guids and they all are the same type
    entities = client.get_entity(
        qualifiedName=["qualifiedname1", "qualifiedname2", "qualifiedname3"],
        typeName="my_type")

    for entity in entities.get("entities"):
        print(json.dumps(entity, indent=2))
示例#2
0
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient  # Communicate with your Atlas server

if __name__ == "__main__":
    """
    This sample provides an example of deleting an entity through the Atlas API.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # When you know the GUID that you want to delete
    response = client.delete_entity(guid="123-abc-456-def")
    print(json.dumps(response, indent=2))

    # When you need to find multiple Guids to delete and they all
    # are the same type
    entities = client.get_entity(
        qualifiedName=["qualifiedname1", "qualifiedname2", "qualifiedname3"],
        typeName="my_type")

    for entity in entities.get("entities"):
        guid = entity["guid"]
        delete_response = client.delete_entity(guid=guid)
        print(json.dumps(delete_response, indent=2))
pdf.columns = ['notebook', 'source', 'target']
pdf.to_csv(adls_dir+"/notebook_mapping.csv",index=False)

# COMMAND ----------

# MAGIC %md
# MAGIC ##### 6. Upload Notebook mapping into Purview

# COMMAND ----------

maps = spark.read.option("header","true").csv("/mnt/datafiles/purview/notebook_mapping.csv")
for map in maps.rdd.collect():
  nbname = map.notebook.split('/')[-1]
  print("Adding :"+nbname)
  InputEntity = client.get_entity(
        qualifiedName=[map.source],
        typeName= 'azure_datalake_gen2_path'
    )
  OutputEntity = client.get_entity(
        qualifiedName=[map.target],
        typeName="databricks_table"
    )
  job_process = AtlasProcess(
  name=nbname,
  qualified_name = "databricks://"+v_databricks_domain+"/notebooks/"+nbname,
  typeName="databricks_job",
  guid=guid.get_guid(),
  attributes = {"job_type":"notebook","notebook_path":map.notebook},
  inputs = [InputEntity.get("entities")[0]],
  outputs = [OutputEntity.get("entities")[0]] )

  client.upload_entities(job_process)
示例#4
0
                     "hive_column",
                     "tests://rel04#c",
                     guid=-5,
                     attributes={"type": "str"})

    # Add c1 as the only relationship to the table
    table.addRelationship(columns=[c1.to_json(minimum=True)])

    c2.relationshipAttributes.update({"table": table.to_json(minimum=True)})
    c3.addRelationship(table=table)

    assignments = client.upload_entities([table, c1, c2, c3,
                                          c4])["guidAssignments"]

    try:
        live_table = client.get_entity(guid=assignments["-1"])["entities"][0]

        # Should have two attributes because one is from the table having the
        # relationship defined as an array of columns and the second two from
        # the column's having the table relationshipAttribute defined on them.
        print("Here's what the upload looks like!")
        print(json.dumps(live_table["relationshipAttributes"], indent=2))
        print("Now we are creating a relationship.")

        relationship = {
            # When creating manually, you have to "know" the typeName
            # and the types of each end.
            "typeName": "hive_table_columns",
            "attributes": {},
            "guid": -100,
            # Ends are either guid or guid + typeName
示例#5
0
    # print(json.dumps(results, indent=2))

    print("Starting Append Scenario...")
    # A second scenario would have us appending to an existing process
    # To do that, we need to query for the existing entity
    dummy_existing_process = AtlasProcess(
        name="sample_process_xyz",
        typeName="Process",
        qualified_name="pyapacheatlas://democustomprocess",
        inputs=None,  # Set to None so no update will occur
        outputs=None,  # We will update this with .outputs below
        guid=-104)

    real_existing_process = client.get_entity(
        typeName="Process",
        qualifiedName="pyapacheatlas://democustomprocess")["entities"][0]
    print("Working with process guid: {}".format(
        real_existing_process["guid"]))

    # Get the list of existing outputs from the attributes.
    existing_outputs = real_existing_process["attributes"]["outputs"]

    # Create one more output to be added.
    one_more_output = AtlasEntity(
        name="output_added_later",
        typeName="DataSet",
        qualified_name="pyapacheatlas://demooutput04",
        guid=-103)

    # Add the existing and new output to the dummy process