You need either the Guid of the entity or the qualified name and type name. The schema of the response follows the /v2/entity/bulk GET operation even if you are requesting only one entity by Guid. https://atlas.apache.org/api/v2/json_AtlasEntitiesWithExtInfo.html The response of get_entity will be a dict that has an "entities" key that contains a list of the entities you requested. """ # Authenticate against your Atlas server oauth = ServicePrincipalAuthentication( tenant_id=os.environ.get("TENANT_ID", ""), client_id=os.environ.get("CLIENT_ID", ""), client_secret=os.environ.get("CLIENT_SECRET", "")) client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""), authentication=oauth) # When you know the GUID that you want to get response = client.get_entity(guid="123-abc-456-def") print(json.dumps(response, indent=2)) # When you need to find multiple Guids and they all are the same type entities = client.get_entity( qualifiedName=["qualifiedname1", "qualifiedname2", "qualifiedname3"], typeName="my_type") for entity in entities.get("entities"): print(json.dumps(entity, indent=2))
from pyapacheatlas.auth import ServicePrincipalAuthentication from pyapacheatlas.core import PurviewClient # Communicate with your Atlas server if __name__ == "__main__": """ This sample provides an example of deleting an entity through the Atlas API. """ # Authenticate against your Atlas server oauth = ServicePrincipalAuthentication( tenant_id=os.environ.get("TENANT_ID", ""), client_id=os.environ.get("CLIENT_ID", ""), client_secret=os.environ.get("CLIENT_SECRET", "")) client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""), authentication=oauth) # When you know the GUID that you want to delete response = client.delete_entity(guid="123-abc-456-def") print(json.dumps(response, indent=2)) # When you need to find multiple Guids to delete and they all # are the same type entities = client.get_entity( qualifiedName=["qualifiedname1", "qualifiedname2", "qualifiedname3"], typeName="my_type") for entity in entities.get("entities"): guid = entity["guid"] delete_response = client.delete_entity(guid=guid) print(json.dumps(delete_response, indent=2))
pdf.columns = ['notebook', 'source', 'target'] pdf.to_csv(adls_dir+"/notebook_mapping.csv",index=False) # COMMAND ---------- # MAGIC %md # MAGIC ##### 6. Upload Notebook mapping into Purview # COMMAND ---------- maps = spark.read.option("header","true").csv("/mnt/datafiles/purview/notebook_mapping.csv") for map in maps.rdd.collect(): nbname = map.notebook.split('/')[-1] print("Adding :"+nbname) InputEntity = client.get_entity( qualifiedName=[map.source], typeName= 'azure_datalake_gen2_path' ) OutputEntity = client.get_entity( qualifiedName=[map.target], typeName="databricks_table" ) job_process = AtlasProcess( name=nbname, qualified_name = "databricks://"+v_databricks_domain+"/notebooks/"+nbname, typeName="databricks_job", guid=guid.get_guid(), attributes = {"job_type":"notebook","notebook_path":map.notebook}, inputs = [InputEntity.get("entities")[0]], outputs = [OutputEntity.get("entities")[0]] ) client.upload_entities(job_process)
"hive_column", "tests://rel04#c", guid=-5, attributes={"type": "str"}) # Add c1 as the only relationship to the table table.addRelationship(columns=[c1.to_json(minimum=True)]) c2.relationshipAttributes.update({"table": table.to_json(minimum=True)}) c3.addRelationship(table=table) assignments = client.upload_entities([table, c1, c2, c3, c4])["guidAssignments"] try: live_table = client.get_entity(guid=assignments["-1"])["entities"][0] # Should have two attributes because one is from the table having the # relationship defined as an array of columns and the second two from # the column's having the table relationshipAttribute defined on them. print("Here's what the upload looks like!") print(json.dumps(live_table["relationshipAttributes"], indent=2)) print("Now we are creating a relationship.") relationship = { # When creating manually, you have to "know" the typeName # and the types of each end. "typeName": "hive_table_columns", "attributes": {}, "guid": -100, # Ends are either guid or guid + typeName
# print(json.dumps(results, indent=2)) print("Starting Append Scenario...") # A second scenario would have us appending to an existing process # To do that, we need to query for the existing entity dummy_existing_process = AtlasProcess( name="sample_process_xyz", typeName="Process", qualified_name="pyapacheatlas://democustomprocess", inputs=None, # Set to None so no update will occur outputs=None, # We will update this with .outputs below guid=-104) real_existing_process = client.get_entity( typeName="Process", qualifiedName="pyapacheatlas://democustomprocess")["entities"][0] print("Working with process guid: {}".format( real_existing_process["guid"])) # Get the list of existing outputs from the attributes. existing_outputs = real_existing_process["attributes"]["outputs"] # Create one more output to be added. one_more_output = AtlasEntity( name="output_added_later", typeName="DataSet", qualified_name="pyapacheatlas://demooutput04", guid=-103) # Add the existing and new output to the dummy process