Exemplo n.º 1
0
        }
        for t in terms
    }

    # TODO: You apply business logic to massage the terms in different ways
    # based on how you might say "Master Data" in the glossary but have it
    # on a table as "m-data" (as an example).

    # For every term, search across all dataset entities (so no process entities).
    for term in terms:
        primary_display_text = term["displayText"]
        term_guid = term["termGuid"]

        search_query = client.search_entities(query=f"{primary_display_text}*",
                                              search_filter={
                                                  "typeName": "DataSet",
                                                  "includeSubTypes": True
                                              })
        lowest_seen_score = 99
        threshold = args.threshold

        search_intermediate_results = []

        # Iterate over the search results for each term
        # Discover all of the entities that are relevant by cutting the search
        # off at a specific relevance threshold (default is 3.0).
        for batch in search_query:
            for entity in batch:
                if entity["typeName"] == "AtlasGlossaryTerm":
                    continue
                search_score = entity['@search.score']
Exemplo n.º 2
0
auth = ServicePrincipalAuthentication(tenant_id=v_tenant_id,
                                      client_id=v_client_id,
                                      client_secret=v_client_secret)

# Create a client to connect to your service.
client = PurviewClient(account_name=v_data_catalog_name, authentication=auth)

guid = GuidTracker()

# COMMAND ----------

# Search for the entity you want to delete
import json
import os
search = client.search_entities("loan_risk_data.csv")
for page in search:
    print(json.dumps(page, indent=2))

# COMMAND ----------

# MAGIC %md
# MAGIC #####3. Bulk delete upto 50 entities

# COMMAND ----------

import json
import os
search = client.search_entities("databricksassets")
for page in search:
    print(page["id"])
Exemplo n.º 3
0
    The response is a Python generator that allows you to page through the
    search results. For each page in the search results, you have a list
    of search entities that can be iterated over.
    """

    # Authenticate against your Atlas server
    oauth = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", ""))
    client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME", ""),
                           authentication=oauth)

    # Assuming you have an entity with the word demo in the name or description
    search = client.search_entities("demo")

    # Alternative search methods include...
    # Searching across a given attribute:
    # Search only the name (or qualifiedName) field and it begins with demo
    # Must include a wildcard character (*) at the end, does not support
    # wildcard at the beginning or middle.

    # search = client.search_entities("name:demo*")
    # search = client.search_entities("qualifiedName:demo*")

    # Searching within a given type and include subtypes...
    # Provide a search filter that specifies the typeName and whether
    # you want to include sub types of that type or not.

    # filter_setup = {"typeName": "DataSet", "includeSubTypes": True}
for dir in dirs.rdd.collect():
  #print(dir.notebook_path.split('/')[-1])
  nbname = dir.notebook_path.split('/')[-1]
  df = spark.read.text(notebook_mnt+"/"+nbname+".py")
  cnt = df.filter("lower(value) like '%create %' or  lower(value) like '%merge %' or  lower(value) like '%insert %' or lower(value) like '%spark.write%'").count()
  if cnt > 0:
    notebook_list.append(dir.notebook_path)
notebook_list

# COMMAND ----------

# Get Databricks Tables in purview
import json
import os
tables = []
search = client.search_entities("databricks_table")
for page in search:
  tblln = []
  tname = page["qualifiedName"].split('/')[-1]
  tblln.insert(0,tname.lower())
  tblln.insert(1,page["qualifiedName"])
  tables.append(tblln)


# COMMAND ----------

# Get ADLS files in purview
import json
import os
adlsfiles = []
filter_setup = {"typeName": "azure_datalake_gen2_path"}