Пример #1
0
def transform_contact(contact):
    '''Transform the properties on a contact
    to be more database friendly

    Do this explicitly for the boolean and timestamp props
    '''
    boolean_props = ["anywhere_page_visits", "anywhere_form_submits", "anywhere_utm"]
    timestamp_props = ["mail_received", "mail_opened", "mail_clicked", "mail_bounced", "mail_complained", "mail_unsubscribed", "mail_hardbounced"]

    for prop in boolean_props:
        if prop in contact:
            formatted_array = []
            for row in contact[prop]:
                formatted_array.append({
                    "url": row,
                    "value": contact[prop][row]
                })
            contact[prop] = formatted_array

    for prop in timestamp_props:
        if prop in contact:
            formatted_array = []
            for row in contact[prop]:
                formatted_array.append({
                    "id": row,
                    "timestamp": _transform_datetime(
                        (contact[prop][row]),
                        UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
                })
            contact[prop] = formatted_array

    return contact
Пример #2
0
def sync_contacts(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    bookmark_key = "versionTimestamp"
    start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
    LOGGER.info("sync_contacts from %s", start)

    max_bk_value = start
    schema = load_schema("contacts")

    singer.write_schema("contacts", schema, ["vid"], [bookmark_key],
                        catalog.get("stream_alias"))

    url = get_url("contacts_all")

    vids = []
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(
                STATE,
                "contacts",
                url,
                default_contact_params,
                "contacts",
                "has-more",
            ["vid-offset"],
            ["vidOffset"],
        ):
            modified_time = None
            if bookmark_key in row:
                modified_time = utils.strptime_with_tz(
                    _transform_datetime(  # pylint: disable=protected-access
                        row[bookmark_key],
                        UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

            if not modified_time or modified_time >= start:
                vids.append(row["vid"])

            if modified_time and modified_time >= max_bk_value:
                max_bk_value = modified_time

            if len(vids) == 100:
                _sync_contact_vids(catalog, vids, schema, bumble_bee)
                vids = []

        _sync_contact_vids(catalog, vids, schema, bumble_bee)

    STATE = singer.write_bookmark(STATE, "contacts", bookmark_key,
                                  utils.strftime(max_bk_value))
    singer.write_state(STATE)
    return STATE
Пример #3
0
def sync_contacts(STATE, stream):
    '''Sync contacts from the Autopilot API

    The API returns data in the following format

    {
        "contacts": [{...},{...}],
        "total_contacts": 400,
        "bookmark": "person_9EAF39E4-9AEC-4134-964A-D9D8D54162E7"
    }

    Params:
    STATE - State dictionary
    stream - Stream dictionary from the catalog
    '''
    tap_stream_id = stream['tap_stream_id']
    singer.write_schema(tap_stream_id, stream['schema'], ["contact_id"])

    # NB: Params is modified in gen_request by reference
    params = {}
    start = utils.strptime_with_tz(
        get_start(STATE, tap_stream_id, "updated_at"))

    LOGGER.info("Only syncing contacts updated since " + utils.strftime(start))
    max_updated_at = start

    for row in gen_request(STATE, get_url(tap_stream_id), params):
        updated_at = None
        if "updated_at" in row:
            updated_at = utils.strptime_with_tz(
                _transform_datetime(  # pylint: disable=protected-access
                    row["updated_at"],
                    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

        if not updated_at or updated_at >= start:
            singer.write_record(tap_stream_id, transform_contact(row))

        if updated_at and updated_at > max_updated_at:
            max_updated_at = updated_at

    STATE = singer.write_bookmark(STATE, tap_stream_id, "updated_at",
                                  utils.strftime(max_updated_at))
    singer.write_state(STATE)

    LOGGER.info("Completed Contacts Sync")
    return STATE
Пример #4
0
def sync_contacts(STATE, catalog):
    start = utils.strptime_with_tz(
        get_start(STATE, "contacts", 'lastmodifieddate'))
    LOGGER.info("sync_contacts from %s", start)

    max_bk_value = start
    schema = load_schema("contacts")

    singer.write_schema("contacts", schema, ["vid"],
                        catalog.get('stream_alias'))

    url = get_url("contacts_all")

    vids = []
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(STATE, 'contacts', url, default_contact_params,
                               'contacts', 'has-more', ['vid-offset'],
                               ['vidOffset']):
            modified_time = None
            if 'lastmodifieddate' in row['properties']:
                modified_time = utils.strptime_with_tz(
                    _transform_datetime(  # pylint: disable=protected-access
                        row['properties']['lastmodifieddate']['value'],
                        UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

            if not modified_time or modified_time >= start:
                vids.append(row['vid'])

            if modified_time and modified_time >= max_bk_value:
                max_bk_value = modified_time

            if len(vids) == 100:
                _sync_contact_vids(catalog, vids, schema, bumble_bee)
                vids = []

        _sync_contact_vids(catalog, vids, schema, bumble_bee)

    STATE = singer.write_bookmark(STATE, 'contacts', 'lastmodifieddate',
                                  utils.strftime(max_bk_value))
    singer.write_state(STATE)
    return STATE
Пример #5
0
def sync_contacts(STATE, catalog):
    '''Sync contacts from the Autopilot API

    The API returns data in the following format

    {
        "contacts": [{...},{...}],
        "total_contacts": 400,
        "bookmark": "person_9EAF39E4-9AEC-4134-964A-D9D8D54162E7"
    }
    '''
    schema = load_schema("contacts")
    singer.write_schema("contacts", schema, ["contact_id"],
                        catalog.get("stream_alias"))

    params = {}
    start = utils.strptime_with_tz(get_start(STATE, "contacts", "updated_at"))

    LOGGER.info("Only syncing contacts updated since " + utils.strftime(start))
    max_updated_at = start

    for row in gen_request(STATE, get_url("contacts"), params):
        updated_at = None
        if "updated_at" in row:
            updated_at = utils.strptime_with_tz(
                _transform_datetime(  # pylint: disable=protected-access
                    row["updated_at"],
                    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

        if not updated_at or updated_at >= start:
            singer.write_record("contacts", transform_contact(row))

        if updated_at and updated_at > max_updated_at:
            max_updated_at = updated_at

    STATE = singer.write_bookmark(STATE, 'contacts', 'updated_at',
                                  utils.strftime(max_updated_at))
    singer.write_state(STATE)

    LOGGER.info("Completed Contacts Sync")
    return STATE
Пример #6
0
def sync_contacts(state: State):
    bookmark_key = 'versionTimestamp'
    start = utils.strptime_with_tz(get_start(state, "contacts", bookmark_key))
    logger.info("sync_contacts from %s", start)

    max_bk_value = start
    schema = load_schema("contacts")
    singer.write_schema("hubspot_contacts", schema, ["vid"], [bookmark_key])

    url = get_url("contacts_all")

    vids = []
    for row in gen_request(state, 'contacts', url, default_contact_params,
                           'contacts', 'has-more', ['vid-offset'],
                           ['vidOffset']):
        modified_time = None
        if bookmark_key in row:
            modified_time = utils.strptime_with_tz(
                _transform_datetime(  # pylint: disable=protected-access
                    row[bookmark_key],
                    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

        if not modified_time or modified_time >= start:
            vids.append(row['vid'])

        if modified_time and modified_time >= max_bk_value:
            max_bk_value = modified_time

        if len(vids) == 100:
            _sync_contact_vids(vids, schema)
            vids = []

    _sync_contact_vids(vids, schema)

    state = singer.write_bookmark(state, 'hubspot_contacts', bookmark_key,
                                  utils.strftime(max_bk_value))
    singer.write_state(state)
    return state