def test_transform_json_handles_list_custom_fields(self): expected = [{'custom_fields': [{'field_set_id': '_custom_1', 'id': 'id', 'value': '1'}, {'field_set_id': '_custom_2', 'id': 'id', 'value': '2'}, {'field_set_id': '_custom_2', 'id': 'index', 'value': '0'}, {'field_set_id': '_custom_2', 'id': 'id', 'value': '3'}, {'field_set_id': '_custom_2', 'id': 'index', 'value': '1'}], 'id': '1'}] actual = transform_json([{"_custom_1" : {"id": '1'}, "_custom_2" : [{"id": '2', "index": '0'}, {"id": '3', "index": '1'}], "id": '1'}], "my_path") self.assertEqual(expected, actual)
def sync_endpoint( client, #pylint: disable=too-many-branches catalog, state, start_date, stream_name, path, endpoint_config, api_version, api_method, static_params, sub_type, bookmark_query_field=None, bookmark_field=None, bookmark_type=None, data_key=None, body=None, id_fields=None, parent=None, parent_id=None): # Get the latest bookmark for the stream and set the last_integer/datetime last_datetime = None last_integer = None max_bookmark_value = None if bookmark_type == 'integer': last_integer = get_bookmark(state, stream_name, sub_type, 0) max_bookmark_value = last_integer else: last_datetime = get_bookmark(state, stream_name, sub_type, start_date) max_bookmark_value = last_datetime write_schema(catalog, stream_name) # pagination: loop thru all pages of data # Pagination reference: https://api.mambu.com/?http#pagination # Each page has an offset (starting value) and a limit (batch size, number of records) # Increase the "offset" by the "limit" for each batch. # Continue until the "record_count" returned < "limit" is null/zero or offset = 0 # Starting offset value for each batch API call limit = client.page_size # Batch size; Number of records per API call total_records = 0 # Initialize total record_count = limit # Initialize, reset for each API call while record_count == limit: # break out of loop when record_count < limit (or not data returned) params = { 'offset': offset, 'limit': limit, **static_params # adds in endpoint specific, sort, filter params } if bookmark_query_field: if bookmark_type == 'datetime': params[bookmark_query_field] = last_datetime elif bookmark_type == 'integer': params[bookmark_query_field] = last_integer LOGGER.info('Stream: {}, Type: {} - Sync start {}'.format( stream_name, sub_type, 'since: {}, '.format(last_datetime) if bookmark_query_field else '')) # Squash params to query-string params querystring = '&'.join( ['%s=%s' % (key, value) for (key, value) in params.items()]) LOGGER.info('URL for {} ({}, {}): {}/{}?{}'\ .format(stream_name, api_method, api_version, client.base_url, path, querystring)) if body is not None: LOGGER.info('body = {}'.format(body)) # API request data data = client.request(method=api_method, path=path, version=api_version, params=querystring, endpoint=stream_name, json=body) # time_extracted: datetime when the data was extracted from the API time_extracted = utils.now() if not data or data is None or data == []: record_count = 0 LOGGER.warning('Stream: {} - NO DATA RESULTS') break # NO DATA # Transform data with transform_json from transform.py # This function converts camelCase to snake_case for fieldname keys. # The data_key may identify array/list of records below the <root> element # LOGGER.info('data = {}'.format(data)) # TESTING, comment out transformed_data = [] # initialize the record list data_list = [] # If a single record dictionary, append to a list[] if isinstance(data, dict): data_list.append(data) data = data_list if data_key is None: transformed_data = transform_json(data, stream_name) elif data_key in data: transformed_data = transform_json(data, data_key)[data_key] # LOGGER.info('transformed_data = {}'.format(transformed_data)) # TESTING, comment out if not transformed_data or transformed_data is None: record_count = 0 LOGGER.warning('Stream: {} - NO TRANSFORMED DATA RESULTS') break # No data results # Process records and get the max_bookmark_value and record_count for the set of records max_bookmark_value, record_count = process_records( catalog=catalog, stream_name=stream_name, records=transformed_data, time_extracted=time_extracted, bookmark_field=bookmark_field, bookmark_type=bookmark_type, max_bookmark_value=max_bookmark_value, last_datetime=last_datetime, last_integer=last_integer, parent=parent, parent_id=parent_id) total_records = total_records + record_count # Loop thru parent batch records for each children objects (if should stream) children = endpoint_config.get('children') if children: for child_stream_name, child_endpoint_config in children.items(): should_stream, last_stream_child = should_sync_stream( get_selected_streams(catalog), None, child_stream_name) if should_stream: # For each parent record for record in transformed_data: i = 0 # Set parent_id for id_field in id_fields: if i == 0: parent_id_field = id_field if id_field == 'id': parent_id_field = id_field i = i + 1 parent_id = record.get(parent_id_field) # sync_endpoint for child LOGGER.info( 'Syncing: {}, parent_stream: {}, parent_id: {}'. format(child_stream_name, stream_name, parent_id)) child_path = child_endpoint_config.get('path').format( str(parent_id)) child_total_records = sync_endpoint( client=client, catalog=catalog, state=state, start_date=start_date, stream_name=child_stream_name, path=child_path, endpoint_config=child_endpoint_config, api_version=child_endpoint_config.get( 'api_version', 'v2'), api_method=child_endpoint_config.get( 'api_method', 'GET'), static_params=child_endpoint_config.get( 'params', {}), sub_type=sub_type, bookmark_query_field=child_endpoint_config.get( 'bookmark_query_field'), bookmark_field=child_endpoint_config.get( 'bookmark_field'), bookmark_type=child_endpoint_config.get( 'bookmark_type'), data_key=child_endpoint_config.get( 'data_key', None), body=child_endpoint_config.get('body', None), id_fields=child_endpoint_config.get('id_fields'), parent=child_endpoint_config.get('parent'), parent_id=parent_id) LOGGER.info( 'Synced: {}, parent_id: {}, total_records: {}'. format(child_stream_name, parent_id, child_total_records)) # Update the state with the max_bookmark_value for the stream if bookmark_field: write_bookmark(state, stream_name, sub_type, max_bookmark_value) # to_rec: to record; ending record for the batch to_rec = offset + limit if record_count < limit: to_rec = total_records LOGGER.info('{} - Synced records: {} to {}'.format( stream_name, offset, to_rec)) # Pagination: increment the offset by the limit (batch-size) offset = offset + limit # End: while record_count == limit # Return total_records across all batches return total_records
def test_transform_json_handles_dictionary_custom_fields(self): expected = [{'custom_fields': [{'field_set_id': '_custom_1', 'id': 'id', 'value': '1'}], 'id': '1'}] actual = transform_json([{"_custom_1" : {"id": '1'}, "id": '1'}], "my_path") self.assertEqual(expected, actual)
def test_transform_no_custom_fields(self): expected = [{'custom_fields': [], 'id': '1'}] actual = transform_json([{"id": '1'}], "my_path") self.assertEquals(expected, actual)