Пример #1
0
def test_add_attributes_later():
    s = AtlasStructDef(name="blah", category=TypeCategory.ENTITY)

    a1 = AtlasAttributeDef(name="test")
    a2 = AtlasAttributeDef(name="test2").to_json()

    s.addAttributeDef(a1, a2)

    assert (len(s.attributeDefs) == 2)
    assert (all([isinstance(e, dict) for e in s.attributeDefs]))
Пример #2
0
def test_add_attributes_at_start():
    s = AtlasStructDef(name="blah",
                       category=TypeCategory.ENTITY,
                       attributeDefs=[
                           AtlasAttributeDef(name="test"),
                           AtlasAttributeDef(name="test2").to_json(),
                           AtlasAttributeDef(name="test3")
                       ])
    c = ClassificationTypeDef(name="blah",
                              attributeDefs=[
                                  AtlasAttributeDef(name="test"),
                                  AtlasAttributeDef(name="test2").to_json(),
                                  AtlasAttributeDef(name="test3")
                              ])
    ent = EntityTypeDef(name="blah",
                        attributeDefs=[
                            AtlasAttributeDef(name="test"),
                            AtlasAttributeDef(name="test2").to_json(),
                            AtlasAttributeDef(name="test3")
                        ])

    # Base Struct handles this
    assert (len(s.attributeDefs) == 3)
    assert (all([isinstance(e, dict) for e in s.attributeDefs]))

    # ClassificationDefs should also handle this behavior
    assert (len(c.attributeDefs) == 3)
    assert (all([isinstance(e, dict) for e in c.attributeDefs]))

    # EntityDefs should also handle this behavior
    assert (len(ent.attributeDefs) == 3)
    assert (all([isinstance(e, dict) for e in ent.attributeDefs]))
Пример #3
0
def test_parse_entity_defs_extended():
    rc = ReaderConfiguration()
    reader = Reader(rc)
    json_rows = [{
        "Entity TypeName": "generic",
        "name": "attrib1",
        "description": "desc1",
        "isOptional": "True",
        "isUnique": "False",
        "defaultValue": None
    }, {
        "Entity TypeName": "generic",
        "name": "attrib2",
        "description": "desc2",
        "isOptional": "True",
        "isUnique": "False",
        "defaultValue": None,
        "cardinality": "SINGLE"
    }, {
        "Entity TypeName": "demo",
        "name": "attrib3",
        "description": "desc3",
        "isOptional": "False",
        "isUnique": "False",
        "cardinality": "SET"
    }]

    output = reader.parse_entity_defs(json_rows)
    # It is an AtlasTypesDef composite wrapper
    assert ("entityDefs" in output.keys())
    # There are two entity typenames specified so there should be only two entityDefs
    assert (len(output["entityDefs"]) == 2)

    genericEntityDef = None
    demoEntityDef = None

    for entityDef in output["entityDefs"]:
        if entityDef["name"] == "generic":
            genericEntityDef = entityDef
        elif entityDef["name"] == "demo":
            demoEntityDef = entityDef

    # Generic has two attributes
    assert (len(genericEntityDef["attributeDefs"]) == 2)

    # Demo has one attribute
    assert (len(demoEntityDef["attributeDefs"]) == 1)

    assert (demoEntityDef["attributeDefs"][0] == AtlasAttributeDef(
        name="attrib3",
        **{
            "description": "desc3",
            "isOptional": "False",
            "isUnique": "False",
            "cardinality": "SET"
        }).to_json())
Пример #4
0
    tenant_id=os.environ.get("TENANT_ID", tenant_id),
    client_id=os.environ.get("CLIENT_ID", client_id),
    client_secret=os.environ.get("CLIENT_SECRET", client_secret))
client = PurviewClient(account_name=os.environ.get("PURVIEW_NAME",
                                                   "purview_account_name"),
                       authentication=oauth)
guid = GuidTracker()

# COMMAND ----------

# Set up a few types and relationships
# This is a one time thing but necessary to make the demo work
# It also demonstrates how you can capture different attributes
# for your dataframes, dataframe columns, and jobs.
type_spark_df = EntityTypeDef(name="custom_spark_dataframe",
                              attributeDefs=[AtlasAttributeDef(name="format")],
                              superTypes=["DataSet"],
                              options={"schemaElementAttribute": "columns"})
type_spark_columns = EntityTypeDef(
    name="custom_spark_dataframe_column",
    attributeDefs=[AtlasAttributeDef(name="data_type")],
    superTypes=["DataSet"],
)
type_spark_job = EntityTypeDef(name="custom_spark_job_process",
                               attributeDefs=[
                                   AtlasAttributeDef(name="job_type",
                                                     isOptional=False),
                                   AtlasAttributeDef(name="schedule",
                                                     defaultValue="adHoc")
                               ],
                               superTypes=["Process"])
Пример #5
0
        tenant_id=os.environ.get("TENANT_ID", ""),
        client_id=os.environ.get("CLIENT_ID", ""),
        client_secret=os.environ.get("CLIENT_SECRET", "")
    )
    client = PurviewClient(
        account_name=os.environ.get("PURVIEW_NAME", ""),
        authentication=oauth
    )

    # We need a custom process entity type that contains the definition for
    # a columnMapping attribute.
    procType = EntityTypeDef(
        "ProcessWithColumnMapping",
        superTypes=["Process"],
        attributeDefs = [
            AtlasAttributeDef("columnMapping")
        ]
    )

    # Upload the type definition
    type_results = client.upload_typedefs(entityDefs=[procType], force_update=True)
    print(json.dumps(type_results,indent=2))

    # Set up a guid tracker to make it easier to generate negative guids
    gt = GuidTracker()

    # Now we can create the entities, we will have two inputs and one output
    colMapInput01 = AtlasEntity(
        "Input for Column Mapping",
        "hive_table",
        "pyapacheatlas://colMapInput01",
Пример #6
0
# MAGIC %md
# MAGIC ##### 2. Setup Custom Entity Types
# MAGIC Setup custom entities to capture Databricks Tables, Columns and Jobs

# COMMAND ----------

# DBTITLE 0,databricks-table entity type
# Set up the new entity types to capture delta lake tables and databricks jobs

# Databricks Table
databricks_table_type = EntityTypeDef(
    name="databricks_table",
    attributeDefs=[
        AtlasAttributeDef(name="format",
                          defaultValue="parquet",
                          isOptional=True).to_json(),
        AtlasAttributeDef(name="location", isOptional=True).to_json(),
        AtlasAttributeDef(name="num_files", isOptional=True).to_json(),
        AtlasAttributeDef(name="size", isOptional=True).to_json()
    ],
    superTypes=["DataSet"],
    options={"schemaElementAttribute": "columns"})
typedef_results = client.upload_typedefs(
    {"entityDefs": [databricks_table_type.to_json()]}, force_update=True)
print(typedef_results)

# COMMAND ----------

# DBTITLE 1,databricks-column entity type
# Databricks Column