def ingest_schema(communityName, domainName, schemaList, session, parentCommunityName=None): community_list.append(communityName) domain_list.append((communityName, domainName, 'Physical Data Dictionary')) # adds all the schema to the asset list for schemaName in schemaList: asset_set.add((communityName, domainName, schemaName, 'Schema')) #convert asset set to a list asset_list = list(asset_set) for community in community_list: json_obj.append(parser.getCommunityObj(community, parentCommunityName)) for communityName, domainName, domainType in domain_list: json_obj.append( parser.getDomainObj(communityName, domainName, domainType)) for communityName, domainName, assetName, assetType in asset_list: json_obj.append( parser.getAssetObj(communityName, domainName, assetName, assetType, assetName)) with open("schema_template.json", "w") as write_file: json.dump(json_obj, write_file) url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job' # url = 'https://asu.collibra.com/rest/2.0/import/json-job' files = {'file': open('schema_template.json', 'rb')} payload = {'sendNotification': 'true'} response = session.post(url, files=files, data=payload) # print(response.request.headers) if response: print(response.json()) else: print(response.text)
def ingest_table(communityName, domainName, schemaList, tableList, attrList, session, parentCommunityName=None): community_list.append(communityName) domain_list.append((communityName, domainName, 'Physical Data Dictionary')) if schemaList != None: print("Schema List is not null") # adds all the table under the given schema to the asset list if attrList != None: print("Table attributes is not null") for schemaName, tableName, tableAttr in zip( schemaList, tableList, attrList): # (relation_type_id:relation_direction, relation_asset_name) - (schema type id , schema name) relations = ('00000000-0000-0000-0000-000000007043:SOURCE', schemaName) asset_set.add( (communityName, domainName, schemaName + '__' + tableName, 'Table', tableName, tableAttr, relations)) else: print("Table Attributes list is null") for schemaName, tableName in zip(schemaList, tableList): # (relation_type_id:relation_direction, relation_asset_name) - (schema type id , schema name) relations = ('00000000-0000-0000-0000-000000007043:SOURCE', schemaName) asset_set.add( (communityName, domainName, schemaName + '__' + tableName, 'Table', tableName, None, relations)) else: print("Schema List is null") # adds all the table to the asset list if attrList != None: print("Table attributes is not null") for tableName, tableAttr in zip(tableList, attrList): asset_set.add((communityName, domainName, tableName, 'Table', tableName, tableAttr, None)) else: print("Table attributes is null") for tableName in tableList: asset_set.add((communityName, domainName, tableName, 'Table', tableName, None, None)) asset_list = list(asset_set) for community in community_list: json_obj.append(parser.getCommunityObj(community, parentCommunityName)) for communityName, domainName, domainType in domain_list: json_obj.append( parser.getDomainObj(communityName, domainName, domainType)) for communityName, domainName, assetName, assetType, tableName, attrList, relation in asset_list: json_obj.append( parser.getAssetObj(communityName, domainName, assetName, assetType, tableName, attrList, relation)) with open("table_template.json", "w") as write_file: json.dump(json_obj, write_file) url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job' # url = 'https://asu.collibra.com/rest/2.0/import/json-job' files = {'file': open('table_template.json', 'rb')} payload = {'sendNotification': 'true'} response = session.post(url, files=files, data=payload) if response: print(response.json()) else: print(response.text)
def ingest_column(communityName, domainName, schemaList, tableList, colList, attrList, session, parentCommunityName=None): global json_obj community_list.append(communityName) domain_list.append((communityName, domainName, 'Physical Data Dictionary')) if schemaList != None: for schemaName, tableName, columnName, columnAttr in zip( schemaList, tableList, colList, attrList): # (relation_type_id:relation_direction, relation_asset_name) relations = ('00000000-0000-0000-0000-000000007042:TARGET', schemaName + '.' + tableName) asset_set.add((communityName, domainName, schemaName + '.' + tableName + '.' + columnName, 'Column', columnName, columnAttr, relations)) else: for tableName, columnName, columnAttr in zip(tableList, colList, attrList): # (relation_type_id:relation_direction, relation_asset_name) relations = ('00000000-0000-0000-0000-000000007042:TARGET', tableName) asset_set.add( (communityName, domainName, tableName + '.' + columnName, 'Column', columnName, columnAttr, relations)) asset_list = list(asset_set) for community in community_list: json_obj.append(parser.getCommunityObj(community, parentCommunityName)) for communityName, domainName, domainType in domain_list: json_obj.append( parser.getDomainObj(communityName, domainName, domainType)) for communityName, domainName, assetName, assetType, columnName, attrList, relation in asset_list: # split if file size is roughly around 22MB if (sys.getsizeof(json_obj) > 400000): list_json_obj.append(json_obj) json_obj = [] json_obj.append( parser.getAssetObj(communityName, domainName, assetName, assetType, columnName, attrList, relation)) list_json_obj.append(json_obj) for index, json_obj in enumerate(list_json_obj, 1): with open(f"column_template_{index}.json", "w") as write_file: json.dump(json_obj, write_file) print( f"Ingesting data from template file: column_template_{index}.json") url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job' # url = 'https://asu.collibra.com/rest/2.0/import/json-job' files = {'file': open(f'column_template_{index}.json', 'rb')} payload = {'sendNotification': 'true'} response = session.post(url, files=files, data=payload) # print(response.request.headers) if response: print(response.json()) else: print(response.text)