示例#1
0
def do_discover_report(sf):
    """Describes a Salesforce instance's reports and generates a JSON schema for each field."""
    sf_custom_setting_objects = []
    object_to_tag_references = {}

    # For each SF Report describe it, loop its fields and build a schema
    entries = []

    report_description = sf.describe()

    report_name = report_description['attributes']['reportName']
    fields = report_description['reportExtendedMetadata']['detailColumnInfo']

    unsupported_fields = set()
    properties = {}
    mdata = metadata.new()

    # Loop over the report's fields
    for field_name, field in fields.items():
        property_schema, mdata = create_report_property_schema(
            field, mdata, sf.source_type)

        # Compound Address fields and geolocations cannot be queried by the Bulk API, so we ignore them
        if field['dataType'] in (
                "address", "location"
        ) and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE:
            mdata.pop(('properties', field_name), None)
            continue

        # we haven't been able to observe any records with a json field, so we
        # are marking it as unavailable until we have an example to work with
        if field['dataType'] == "json":
            unsupported_fields.add((
                field_name,
                'do not currently support json fields - please contact support'
            ))

        inclusion = metadata.get(mdata, ('properties', field_name),
                                 'inclusion')

        if sf.select_fields_by_default and inclusion != 'unsupported':
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'selected-by-default', True)

        properties[field_name] = property_schema

    # There are cases where compound fields are referenced by the associated
    # subfields but are not actually present in the field list
    field_name_set = {f for f in fields}
    filtered_unsupported_fields = [
        f for f in unsupported_fields if f[0] in field_name_set
    ]
    missing_unsupported_field_names = [
        f[0] for f in unsupported_fields if f[0] not in field_name_set
    ]

    if missing_unsupported_field_names:
        LOGGER.info(
            "Ignoring the following unsupported fields for report %s as they are missing from the field list: %s",
            sf.report_id, ', '.join(sorted(missing_unsupported_field_names)))

    if filtered_unsupported_fields:
        LOGGER.info(
            "Not syncing the following unsupported fields for report %s: %s",
            sf.report_id,
            ', '.join(sorted([k for k, _ in filtered_unsupported_fields])))

    # Any property added to unsupported_fields has metadata generated and
    # removed
    for prop, description in filtered_unsupported_fields:
        if metadata.get(mdata, ('properties', prop), 'selected-by-default'):
            metadata.delete(mdata, ('properties', prop), 'selected-by-default')

        mdata = metadata.write(mdata, ('properties', prop),
                               'unsupported-description', description)
        mdata = metadata.write(mdata, ('properties', prop), 'inclusion',
                               'unsupported')

    # this is the last entry with empty breadcumb which is required othwerise stream won't be picked up
    # table-key-properties is also required
    mdata = metadata.write(mdata, (), 'table-key-properties', [])

    schema = {
        'type': 'object',
        'additionalProperties': False,
        'properties': properties
    }

    entry = {
        'stream': report_name,
        'tap_stream_id': sf.report_id,
        'schema': schema,
        'metadata': metadata.to_list(mdata),
        'column_order': [str(column) for column in properties]
    }

    entries.append(entry)

    # For each custom setting field, remove its associated tag from entries
    # See Blacklisting.md for more information
    unsupported_tag_objects = [
        object_to_tag_references[f] for f in sf_custom_setting_objects
        if f in object_to_tag_references
    ]
    if unsupported_tag_objects:
        LOGGER.info(  # pylint:disable=logging-not-lazy
            "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects "
            + "are not supported by the Bulk API:")
        LOGGER.info(unsupported_tag_objects)
        entries = [
            e for e in entries if e['stream'] not in unsupported_tag_objects
        ]

    result = {'streams': entries}
    json.dump(result, sys.stdout, indent=4)
def do_discover(sf):
    """Describes a Salesforce instance's objects and generates a JSON schema for each field."""
    global_description = sf.describe()

    objects_to_discover = {o['name'] for o in global_description['sobjects']}
    key_properties = ['Id']

    sf_custom_setting_objects = []
    object_to_tag_references = {}

    # For each SF Object describe it, loop its fields and build a schema
    entries = []
    for sobject_name in objects_to_discover:

        # Skip blacklisted SF objects depending on the api_type in use
        # ChangeEvent objects are not queryable via Bulk or REST (undocumented)
        if sobject_name in sf.get_blacklisted_objects() \
           or sobject_name.endswith("ChangeEvent"):
            continue

        sobject_description = sf.describe(sobject_name)

        # Cache customSetting and Tag objects to check for blacklisting after
        # all objects have been described
        if sobject_description.get("customSetting"):
            sf_custom_setting_objects.append(sobject_name)
        elif sobject_name.endswith("__Tag"):
            relationship_field = next(
                (f for f in sobject_description["fields"] if f.get("relationshipName") == "Item"),
                None)
            if relationship_field:
                # Map {"Object":"Object__Tag"}
                object_to_tag_references[relationship_field["referenceTo"]
                                         [0]] = sobject_name

        fields = sobject_description['fields']
        replication_key = get_replication_key(sobject_name, fields)

        unsupported_fields = set()
        properties = {}
        mdata = metadata.new()

        found_id_field = False

        # Loop over the object's fields
        for f in fields:
            field_name = f['name']
            field_type = f['type']

            if field_name == "Id":
                found_id_field = True

            property_schema, mdata = create_property_schema(
                f, mdata)

            # Compound Address fields cannot be queried by the Bulk API
            if f['type'] == "address" and sf.api_type == tap_salesforce.salesforce.BULK_API_TYPE:
                unsupported_fields.add(
                    (field_name, 'cannot query compound address fields with bulk API'))

            # we haven't been able to observe any records with a json field, so we
            # are marking it as unavailable until we have an example to work with
            if f['type'] == "json":
                unsupported_fields.add(
                    (field_name, 'do not currently support json fields - please contact support'))

            # Blacklisted fields are dependent on the api_type being used
            field_pair = (sobject_name, field_name)
            if field_pair in sf.get_blacklisted_fields():
                unsupported_fields.add(
                    (field_name, sf.get_blacklisted_fields()[field_pair]))

            inclusion = metadata.get(
                mdata, ('properties', field_name), 'inclusion')

            if sf.select_fields_by_default and inclusion != 'unsupported':
                mdata = metadata.write(
                    mdata, ('properties', field_name), 'selected-by-default', True)

            properties[field_name] = property_schema

        if replication_key:
            mdata = metadata.write(
                mdata, ('properties', replication_key), 'inclusion', 'automatic')

        # There are cases where compound fields are referenced by the associated
        # subfields but are not actually present in the field list
        field_name_set = {f['name'] for f in fields}
        filtered_unsupported_fields = [f for f in unsupported_fields if f[0] in field_name_set]
        missing_unsupported_field_names = [f[0] for f in unsupported_fields if f[0] not in field_name_set]

        if missing_unsupported_field_names:
            LOGGER.info("Ignoring the following unsupported fields for object %s as they are missing from the field list: %s",
                        sobject_name,
                        ', '.join(sorted(missing_unsupported_field_names)))

        if filtered_unsupported_fields:
            LOGGER.info("Not syncing the following unsupported fields for object %s: %s",
                        sobject_name,
                        ', '.join(sorted([k for k, _ in filtered_unsupported_fields])))

        # Salesforce Objects are skipped when they do not have an Id field
        if not found_id_field:
            LOGGER.info(
                "Skipping Salesforce Object %s, as it has no Id field",
                sobject_name)
            continue

        # Any property added to unsupported_fields has metadata generated and
        # removed
        for prop, description in filtered_unsupported_fields:
            if metadata.get(mdata, ('properties', prop),
                            'selected-by-default'):
                metadata.delete(
                    mdata, ('properties', prop), 'selected-by-default')

            mdata = metadata.write(
                mdata, ('properties', prop), 'unsupported-description', description)
            mdata = metadata.write(
                mdata, ('properties', prop), 'inclusion', 'unsupported')

        if replication_key:
            mdata = metadata.write(
                mdata, (), 'replication-key', replication_key)
        else:
            mdata = metadata.write(
                mdata,
                (),
                'forced-replication-method',
                {
                    'replication-method': 'FULL_TABLE',
                    'reason': 'No replication keys found from the Salesforce API'})

        mdata = metadata.write(mdata, (), 'table-key-properties', key_properties)

        schema = {
            'type': 'object',
            'additionalProperties': False,
            'properties': properties
        }

        entry = {
            'stream': sobject_name,
            'tap_stream_id': sobject_name,
            'schema': schema,
            'metadata': metadata.to_list(mdata)
        }

        entries.append(entry)

    # For each custom setting field, remove its associated tag from entries
    # See Blacklisting.md for more information
    unsupported_tag_objects = [object_to_tag_references[f]
                               for f in sf_custom_setting_objects if f in object_to_tag_references]
    if unsupported_tag_objects:
        LOGGER.info( #pylint:disable=logging-not-lazy
            "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " +
            "are not supported by the Bulk API:")
        LOGGER.info(unsupported_tag_objects)
        entries = [e for e in entries if e['stream']
                   not in unsupported_tag_objects]

    result = {'streams': entries}
    json.dump(result, sys.stdout, indent=4)
示例#3
0
def do_discover(sf):
    """Describes a Salesforce instance's objects and generates a JSON schema for each field."""
    global_description = sf.describe()

    objects_set = {o["name"] for o in global_description["sobjects"]}
    objects_to_discover = [
        "Account", "Contact", "Lead", "Opportunity", "Campaign",
        "AccountContactRelation", "AccountContactRole",
        "OpportunityContactRole", "CampaignMember", "Task", "Invoice__c",
        "OpportunityHistory", "AccountHistory", "LeadHistory", "User"
    ]
    key_properties = ["Id"]

    sf_custom_setting_objects = []
    object_to_tag_references = {}

    # For each SF Object describe it, loop its fields and build a schema
    entries = []

    # Check if the user has BULK API enabled
    if sf.api_type == "BULK" and not Bulk(sf).has_permissions():
        raise TapSalesforceBulkAPIDisabledException(
            'This client does not have Bulk API permissions, received "API_DISABLED_FOR_ORG" error code'
        )

    for sobject_name in objects_to_discover:

        # Skip blacklisted SF objects depending on the api_type in use
        # ChangeEvent objects are not queryable via Bulk or REST (undocumented)
        if sobject_name in sf.get_blacklisted_objects(
        ) or sobject_name.endswith("ChangeEvent"):
            continue
        if sobject_name not in objects_set:
            continue

        sobject_description = sf.describe(sobject_name)

        # Cache customSetting and Tag objects to check for blacklisting after
        # all objects have been described
        if sobject_description.get("customSetting"):
            sf_custom_setting_objects.append(sobject_name)
        elif sobject_name.endswith("__Tag"):
            relationship_field = next(
                (f for f in sobject_description["fields"]
                 if f.get("relationshipName") == "Item"),
                None,
            )
            if relationship_field:
                # Map {"Object":"Object__Tag"}
                object_to_tag_references[relationship_field["referenceTo"]
                                         [0]] = sobject_name

        fields = sobject_description["fields"]
        replication_key = get_replication_key(sobject_name, fields)

        unsupported_fields = set()
        properties = {}
        mdata = metadata.new()

        found_id_field = False

        # Loop over the object's fields
        for f in fields:
            field_name = f["name"]

            if field_name == "Id":
                found_id_field = True

            property_schema, mdata = create_property_schema(f, mdata)

            # Compound Address fields cannot be queried by the Bulk API
            if (f["type"] == "address" and sf.api_type
                    == tap_salesforce.salesforce.BULK_API_TYPE):
                unsupported_fields.add(
                    (field_name,
                     "cannot query compound address fields with bulk API"))

            # we haven't been able to observe any records with a json field, so we
            # are marking it as unavailable until we have an example to work with
            if f["type"] == "json":
                unsupported_fields.add((
                    field_name,
                    "do not currently support json fields - please contact support",
                ))

            # Blacklisted fields are dependent on the api_type being used
            field_pair = (sobject_name, field_name)
            if field_pair in sf.get_blacklisted_fields():
                unsupported_fields.add(
                    (field_name, sf.get_blacklisted_fields()[field_pair]))

            inclusion = metadata.get(mdata, ("properties", field_name),
                                     "inclusion")

            if sf.select_fields_by_default and inclusion != "unsupported":
                mdata = metadata.write(mdata, ("properties", field_name),
                                       "selected-by-default", True)

            properties[field_name] = property_schema

        if replication_key:
            mdata = metadata.write(mdata, ("properties", replication_key),
                                   "inclusion", "automatic")

        # There are cases where compound fields are referenced by the associated
        # subfields but are not actually present in the field list
        field_name_set = {f["name"] for f in fields}
        filtered_unsupported_fields = [
            f for f in unsupported_fields if f[0] in field_name_set
        ]
        missing_unsupported_field_names = [
            f[0] for f in unsupported_fields if f[0] not in field_name_set
        ]

        if missing_unsupported_field_names:
            LOGGER.info(
                "Ignoring the following unsupported fields for object %s as they are missing from the field list: %s",
                sobject_name,
                ", ".join(sorted(missing_unsupported_field_names)),
            )

        if filtered_unsupported_fields:
            LOGGER.info(
                "Not syncing the following unsupported fields for object %s: %s",
                sobject_name,
                ", ".join(sorted([k for k, _ in filtered_unsupported_fields])),
            )

        # Salesforce Objects are skipped when they do not have an Id field
        if not found_id_field:
            LOGGER.info("Skipping Salesforce Object %s, as it has no Id field",
                        sobject_name)
            continue

        # Any property added to unsupported_fields has metadata generated and
        # removed
        for prop, description in filtered_unsupported_fields:
            if metadata.get(mdata, ("properties", prop),
                            "selected-by-default"):
                metadata.delete(mdata, ("properties", prop),
                                "selected-by-default")

            mdata = metadata.write(mdata, ("properties", prop),
                                   "unsupported-description", description)
            mdata = metadata.write(mdata, ("properties", prop), "inclusion",
                                   "unsupported")

        if replication_key:
            mdata = metadata.write(mdata, (), "valid-replication-keys",
                                   [replication_key])
        else:
            mdata = metadata.write(
                mdata,
                (),
                "forced-replication-method",
                {
                    "replication-method": "FULL_TABLE",
                    "reason":
                    "No replication keys found from the Salesforce API",
                },
            )

        mdata = metadata.write(mdata, (), "table-key-properties",
                               key_properties)
        mdata = metadata.write(mdata, (), "selected", True)

        schema = {
            "type": "object",
            "additionalProperties": False,
            "properties": properties,
        }

        entry = {
            "stream": sobject_name,
            "tap_stream_id": sobject_name,
            "schema": schema,
            "metadata": metadata.to_list(mdata),
        }

        entries.append(entry)

    # For each custom setting field, remove its associated tag from entries
    # See Blacklisting.md for more information
    unsupported_tag_objects = [
        object_to_tag_references[f] for f in sf_custom_setting_objects
        if f in object_to_tag_references
    ]
    if unsupported_tag_objects:
        LOGGER.info(  # pylint:disable=logging-not-lazy
            "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects "
            + "are not supported by the Bulk API:")
        LOGGER.info(unsupported_tag_objects)
        entries = [
            e for e in entries if e["stream"] not in unsupported_tag_objects
        ]

    result = {"streams": entries}
    return result