def transform_contact(contact): '''Transform the properties on a contact to be more database friendly Do this explicitly for the boolean and timestamp props ''' boolean_props = ["anywhere_page_visits", "anywhere_form_submits", "anywhere_utm"] timestamp_props = ["mail_received", "mail_opened", "mail_clicked", "mail_bounced", "mail_complained", "mail_unsubscribed", "mail_hardbounced"] for prop in boolean_props: if prop in contact: formatted_array = [] for row in contact[prop]: formatted_array.append({ "url": row, "value": contact[prop][row] }) contact[prop] = formatted_array for prop in timestamp_props: if prop in contact: formatted_array = [] for row in contact[prop]: formatted_array.append({ "id": row, "timestamp": _transform_datetime( (contact[prop][row]), UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) }) contact[prop] = formatted_array return contact
def sync_contacts(STATE, ctx): catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE)) bookmark_key = "versionTimestamp" start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key)) LOGGER.info("sync_contacts from %s", start) max_bk_value = start schema = load_schema("contacts") singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get("stream_alias")) url = get_url("contacts_all") vids = [] with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee: for row in gen_request( STATE, "contacts", url, default_contact_params, "contacts", "has-more", ["vid-offset"], ["vidOffset"], ): modified_time = None if bookmark_key in row: modified_time = utils.strptime_with_tz( _transform_datetime( # pylint: disable=protected-access row[bookmark_key], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)) if not modified_time or modified_time >= start: vids.append(row["vid"]) if modified_time and modified_time >= max_bk_value: max_bk_value = modified_time if len(vids) == 100: _sync_contact_vids(catalog, vids, schema, bumble_bee) vids = [] _sync_contact_vids(catalog, vids, schema, bumble_bee) STATE = singer.write_bookmark(STATE, "contacts", bookmark_key, utils.strftime(max_bk_value)) singer.write_state(STATE) return STATE
def sync_contacts(STATE, stream): '''Sync contacts from the Autopilot API The API returns data in the following format { "contacts": [{...},{...}], "total_contacts": 400, "bookmark": "person_9EAF39E4-9AEC-4134-964A-D9D8D54162E7" } Params: STATE - State dictionary stream - Stream dictionary from the catalog ''' tap_stream_id = stream['tap_stream_id'] singer.write_schema(tap_stream_id, stream['schema'], ["contact_id"]) # NB: Params is modified in gen_request by reference params = {} start = utils.strptime_with_tz( get_start(STATE, tap_stream_id, "updated_at")) LOGGER.info("Only syncing contacts updated since " + utils.strftime(start)) max_updated_at = start for row in gen_request(STATE, get_url(tap_stream_id), params): updated_at = None if "updated_at" in row: updated_at = utils.strptime_with_tz( _transform_datetime( # pylint: disable=protected-access row["updated_at"], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)) if not updated_at or updated_at >= start: singer.write_record(tap_stream_id, transform_contact(row)) if updated_at and updated_at > max_updated_at: max_updated_at = updated_at STATE = singer.write_bookmark(STATE, tap_stream_id, "updated_at", utils.strftime(max_updated_at)) singer.write_state(STATE) LOGGER.info("Completed Contacts Sync") return STATE
def sync_contacts(STATE, catalog): start = utils.strptime_with_tz( get_start(STATE, "contacts", 'lastmodifieddate')) LOGGER.info("sync_contacts from %s", start) max_bk_value = start schema = load_schema("contacts") singer.write_schema("contacts", schema, ["vid"], catalog.get('stream_alias')) url = get_url("contacts_all") vids = [] with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee: for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']): modified_time = None if 'lastmodifieddate' in row['properties']: modified_time = utils.strptime_with_tz( _transform_datetime( # pylint: disable=protected-access row['properties']['lastmodifieddate']['value'], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)) if not modified_time or modified_time >= start: vids.append(row['vid']) if modified_time and modified_time >= max_bk_value: max_bk_value = modified_time if len(vids) == 100: _sync_contact_vids(catalog, vids, schema, bumble_bee) vids = [] _sync_contact_vids(catalog, vids, schema, bumble_bee) STATE = singer.write_bookmark(STATE, 'contacts', 'lastmodifieddate', utils.strftime(max_bk_value)) singer.write_state(STATE) return STATE
def sync_contacts(STATE, catalog): '''Sync contacts from the Autopilot API The API returns data in the following format { "contacts": [{...},{...}], "total_contacts": 400, "bookmark": "person_9EAF39E4-9AEC-4134-964A-D9D8D54162E7" } ''' schema = load_schema("contacts") singer.write_schema("contacts", schema, ["contact_id"], catalog.get("stream_alias")) params = {} start = utils.strptime_with_tz(get_start(STATE, "contacts", "updated_at")) LOGGER.info("Only syncing contacts updated since " + utils.strftime(start)) max_updated_at = start for row in gen_request(STATE, get_url("contacts"), params): updated_at = None if "updated_at" in row: updated_at = utils.strptime_with_tz( _transform_datetime( # pylint: disable=protected-access row["updated_at"], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)) if not updated_at or updated_at >= start: singer.write_record("contacts", transform_contact(row)) if updated_at and updated_at > max_updated_at: max_updated_at = updated_at STATE = singer.write_bookmark(STATE, 'contacts', 'updated_at', utils.strftime(max_updated_at)) singer.write_state(STATE) LOGGER.info("Completed Contacts Sync") return STATE
def sync_contacts(state: State): bookmark_key = 'versionTimestamp' start = utils.strptime_with_tz(get_start(state, "contacts", bookmark_key)) logger.info("sync_contacts from %s", start) max_bk_value = start schema = load_schema("contacts") singer.write_schema("hubspot_contacts", schema, ["vid"], [bookmark_key]) url = get_url("contacts_all") vids = [] for row in gen_request(state, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']): modified_time = None if bookmark_key in row: modified_time = utils.strptime_with_tz( _transform_datetime( # pylint: disable=protected-access row[bookmark_key], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)) if not modified_time or modified_time >= start: vids.append(row['vid']) if modified_time and modified_time >= max_bk_value: max_bk_value = modified_time if len(vids) == 100: _sync_contact_vids(vids, schema) vids = [] _sync_contact_vids(vids, schema) state = singer.write_bookmark(state, 'hubspot_contacts', bookmark_key, utils.strftime(max_bk_value)) singer.write_state(state) return state