def test_batches_entities_with_real_guid(): gt = GuidTracker() a = AtlasEntity("A", "DataSet", "A", guid=gt.get_guid()) b = AtlasEntity("B", "DataSet", "B", guid=gt.get_guid()) b.addRelationship(table=a) c = AtlasEntity("C", "DataSet", "C", guid=gt.get_guid()) d = AtlasEntity("D", "DataSet", "D", guid=gt.get_guid()) c.addRelationship(tester={"guid": "abc-123"}) entities = [x.to_json() for x in [a, b, c, d]] results = batch_dependent_entities(entities, batch_size=2) assert (len(results) == 2)
def test_set_relationship_different_ways(): ae = AtlasEntity("rel01","hive_table", "tests://rel01", guid=-1) c1 = AtlasEntity("rel01#01", "hive_column", "tests://rel01#c", guid=-2, attributes={"type":"str"}) c2 = AtlasEntity("rel01#02", "hive_column", "tests://rel02#c", guid=-3, attributes={"type":"str"}) c3 = AtlasEntity("rel01#03", "hive_column", "tests://rel03#c", guid=-4, attributes={"type":"str"}) c4 = AtlasEntity("rel01#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type":"str"}) # Add c1 as the only relationship ae.addRelationship(columns=[c1.to_json(minimum=True)]) c2.relationshipAttributes.update({"table": ae.to_json(minimum=True) }) c3.addRelationship(table = ae) assignments = client.upload_entities([ae, c1, c2, c3, c4])["guidAssignments"] try: live_table = client.get_entity(guid=assignments["-1"])["entities"][0] # Should have two attributes because one is from the table having the # relationship defined as an array of columns and the second two from # the column's having the table relationshipAttribute defined on them. assert(len(live_table["relationshipAttributes"]["columns"]) == 3) relationship = { "typeName": "hive_table_columns", "attributes": {}, "guid": -100, # Ends are either guid or guid + typeName # (in case there are ambiguities?) "end1": { "guid": assignments["-1"] }, "end2": { "guid": assignments["-5"] } } relation_upload = client.upload_relationship(relationship) # Check that we have one more relationship # There are caching issues here :-( live_table_post_relationship = client.get_entity(guid=assignments["-1"])["entities"][0] assert(len(live_table["relationshipAttributes"]["columns"]) == 4) finally: # Need to delete all columns BEFORE you delete the table for local_id in [str(s) for s in range(-5,0)]: guid = assignments[local_id] _ = client.delete_entity(guid)
def test_batches_entities_dependent(): gt = GuidTracker() a = AtlasEntity("A", "DataSet", "A", guid=gt.get_guid()) b = AtlasEntity("B", "DataSet", "B", guid=gt.get_guid()) b.addRelationship(table=a) c = AtlasEntity("C", "DataSet", "C", guid=gt.get_guid()) d = AtlasEntity("D", "DataSet", "D", guid=gt.get_guid()) c.addRelationship(parent=b) d.addRelationship(parent=b) e = AtlasEntity("E", "DataSet", "E", guid=gt.get_guid()) e.addRelationship(table=a) f = AtlasEntity("F", "DataSet", "F", guid=gt.get_guid()) g = AtlasEntity("G", "DataSet", "G", guid=gt.get_guid()) g.addRelationship(table=f) h = AtlasEntity("H", "DataSet", "H", guid=gt.get_guid()) h.addRelationship(parent=g) # Intentionally out of order j = AtlasEntity("J", "DataSet", "J", guid=gt.get_guid()) k = AtlasEntity("K", "DataSet", "K", guid=gt.get_guid()) i = AtlasEntity("I", "DataSet", "I", guid=gt.get_guid()) i.addRelationship(colA=j) i.addRelationship(colB=k) l = AtlasEntity("L", "DataSet", "L", guid=gt.get_guid()) m = AtlasEntity("M", "DataSet", "M", guid=gt.get_guid()) n = AtlasEntity("N", "DataSet", "N", guid=gt.get_guid()) o = AtlasEntity("O", "DataSet", "O", guid=gt.get_guid()) p = AtlasEntity("P", "DataSet", "P", guid=gt.get_guid()) entities = [ x.to_json() for x in [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p] ] results = batch_dependent_entities(entities, batch_size=7) # There are sixteen results, batch size of 7 means at least three groups # One group has seven connected # One group should have only three # All others are independent assert (len(results) == 3)
"description": "This is the first column." }, guid=gt.get_guid()) column02 = AtlasEntity( name="column02", typeName="pyapacheatlas_demo_column", qualified_name= "pyapacheatlas://sample_tablepyapacheatlas_custom_type@column02", attributes={ "data_type": "int", "description": "This is the second column." }, guid=gt.get_guid()) # Add the "table" relationship attribute to your column entities column01.addRelationship(table=table_entity) column02.addRelationship(table=table_entity) # Do the upload and view the entities in the UI upload_results = client.upload_entities( batch=[table_entity, column01, column02]) print(json.dumps(upload_results, indent=2)) # To remove, delete the entity created and then the entity type. # client.delete_entity(guid=["..."]) # delete_relationship = client.delete_type("pyapacheatlas_table_column_relationship") # delete_results = client.delete_type("pyapacheatlas_demo_table") # delete_results = client.delete_type("pyapacheatlas_demo_column") # print(json.dumps(delete_results, indent=2))
"tests://rel02#c", guid=-3, attributes={"type": "str"}) c3 = AtlasEntity("rel01#03", "hive_column", "tests://rel03#c", guid=-4, attributes={"type": "str"}) c4 = AtlasEntity("rel01#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type": "str"}) # Add c1 as the only relationship to the table table.addRelationship(columns=[c1.to_json(minimum=True)]) c2.relationshipAttributes.update({"table": table.to_json(minimum=True)}) c3.addRelationship(table=table) assignments = client.upload_entities([table, c1, c2, c3, c4])["guidAssignments"] try: live_table = client.get_entity(guid=assignments["-1"])["entities"][0] # Should have two attributes because one is from the table having the # relationship defined as an array of columns and the second two from # the column's having the table relationshipAttribute defined on them. print("Here's what the upload looks like!") print(json.dumps(live_table["relationshipAttributes"], indent=2))
"hive_column", "tests://rel03#c", guid=-4, attributes={"type": "str"}) c4 = AtlasEntity("rel10#04", "hive_column", "tests://rel04#c", guid=-5, attributes={"type": "str"}) # Add relationships to the columns from the table overwriting existing columns # Good if you want to overwrite existing schema or creating a brand new table # and Schema. columns_to_add = [c1, c2, c3] # Use a list comprehension to convert them into dictionaries when adding a list table.addRelationship( columns=[c.to_json(minimum=True) for c in columns_to_add]) # OR Add a table relationship to a column. This lets you essentially APPEND # a column to a table's schema. c4.addRelationship(table=table) # Upload all of the tables and columns that are referenced. assignments = client.upload_entities([table, c1, c2, c3, c4])["guidAssignments"] # Check that we have one more relationship print( "Now we can see that there should be one more relationship attribute.") live_table_post_relationship = client.get_entity( guid=assignments["-1"])["entities"][0] print(