def __process_all_records_data_stream(req_state): max_bookmark_value = req_state.last_date record_count = 0 records = ilevel.get_all_objects(req_state.stream_name, req_state.client) if len(records) == 0: return 0 # Process records process_record_count = 0 max_bookmark_value, process_record_count = process_records( result_records=records, req_state=req_state, deletion_flag=False, max_bookmark_value=max_bookmark_value) record_count = record_count + process_record_count # Data not sorted # Update the state with the max_bookmark_value for the stream after ALL records if req_state.bookmark_field and process_record_count > 0: singer_ops.write_bookmark(req_state.state, req_state.stream_name, max_bookmark_value) return record_count
def __process_periodic_data_calcs(req_state, scenario_name='Actual', currency_code='USD'): entity_types = ['assets'] # Currently: assets only (not funds) period_types = req_state.period_types.strip().replace(' ', '').split(',') batch_size = 10000 start_dttm = datetime.strptime(req_state.last_date, '%Y-%m-%d') end_dttm = req_state.end_date max_bookmark_value = req_state.last_date # Init params_list and results i_get_params_list = req_state.client.factory.create( 'ArrayOfBaseRequestParameters') results = [] req_id = 1 batch = 1 update_count = 0 # Base objects data_value_types = req_state.client.factory.create('DataValueTypes') # scenario_id for scenario_name scenarios = req_state.client.service.GetScenarios() scenario = [i for i in scenarios.NamedEntity if i.Name == scenario_name][0] scenario_id = scenario.Id # current_date date_types = req_state.client.factory.create('DateTypes') current_date = req_state.client.factory.create('Date') current_date.Type = date_types.Current # latest_date latest_date = req_state.client.factory.create('Date') latest_date.Type = date_types.Latest # Get all calc data items data_item_search_criteria = req_state.client.factory.create( 'DataItemsSearchCriteria') data_item_search_criteria.GetGlobalDataItemsOnly = True # Global Data Items ONLY data_items = req_state.client.service.GetDataItems( data_item_search_criteria) calc_data_items = [ i for i in data_items.DataItemObjectEx if i.FormulaTypeIDsString ] # TESTING (add): and 'Gross Margin' in i.Name calc_data_items_len = len(calc_data_items) last_calc_data_item = calc_data_items[-1] # entity_type loop for entity_type in entity_types: # funds, assets LOGGER.info('entity_type = {}'.format(entity_type)) # COMMENT OUT # entity_ids for funds_or_assets if entity_type == 'funds': entities = req_state.client.service.GetFunds() entity_objs = entities.Fund # entity_objs = [i for i in entity_objs if 'IV, L.P.' in i.ExcelName] # COMMENT OUT else: # assets entities = req_state.client.service.GetAssets() entity_objs = entities.Asset # entity_objs = [i for i in entity_objs if 'Guild Education' in i.Name] # TESTING: COMMENT OUT entity_objs_len = len(entity_objs) # calc_data_items loop cdi = 1 for data_item in calc_data_items: data_item_id = data_item.Id data_item_name = data_item.Name LOGGER.info('data_item_name = {} ({})'.format( data_item_name, data_item_id)) # COMMENT OUT # data_value_type for data_item data_value_type_id = data_item.DataValueType data_value_type = data_value_types[data_value_type_id] # entity loop ent = 1 for entity in entity_objs: entity_dict = ilevel.sobject_to_dict(entity) entity_id = entity_dict.get('Id') entity_name = entity_dict.get('Name') # LOGGER.info('entity = {} ({})'.format(entity_name, entity_id)) # COMMENT OUT entity_initial_dttm = datetime.strptime( entity_dict.get('InitialPeriod')[:10], '%Y-%m-%d') max_dttm = [start_dttm, entity_initial_dttm] start_dttm = max(i for i in max_dttm if i is not None) # LOGGER.info('periodic_data_calculated: {}, {}: {} ({})'.format( # data_item_name, entity_type, entity_name, entity_id)) # COMMENT OUT entity_path = ilevel.create_entity_path(req_state, [entity_id]) # period_type loop last_period_type = period_types[-1] for period_type in period_types: period, period_diff = ilevel.get_periods( req_state, start_dttm, end_dttm, period_type) # offset_period loop (0, -1, -2, ...) look-back pd = 0 while pd <= period_diff + 1: # LOGGER.info('{}: periodic_data_calculated: {}, Period Type: {}, Offset: {}'.format( # req_id, data_item_name, period_type, -pd)) # COMMENT OUT offset_period = copy.copy(period) offset_period.IsOffset = True offset_period.Quantity = int(-1 * pd) i_get_params = req_state.client.factory.create( 'AssetAndFundGetRequestParameters') i_get_params.RequestIdentifier = req_id i_get_params.DataValueType = data_value_type i_get_params.EntitiesPath = entity_path i_get_params.DataItemId = data_item_id i_get_params.ScenarioId = scenario_id i_get_params.Period = period i_get_params.Offset = offset_period i_get_params.EndOfPeriod = latest_date i_get_params.ReportedDate = current_date i_get_params.CurrencyCode = currency_code i_get_params_list.BaseRequestParameters.append( i_get_params) # LOGGER.info('i_get_params = {}'.format(i_get_params)) # COMMENT OUT # run iGetBatch end_of_batches = False if (pd == (period_diff + 1) and period_type == last_period_type \ and ent == entity_objs_len and cdi == calc_data_items_len and entity_type == 'assets'): end_of_batches = True LOGGER.info('xxx END OF BATCHES xxx') if (req_id % batch_size == 0) or end_of_batches: LOGGER.info('xxx BATCH: {} xxx'.format(batch)) i_get_count = len(i_get_params_list) i_get_request = req_state.client.factory.create( 'DataServiceRequest') i_get_request.IncludeStandardizedDataInfo = True i_get_request.IncludeExcelFormula = True i_get_request.ParametersList = i_get_params_list # LOGGER.info('i_get_request = {}'.format(i_get_request)) # COMMENT OUT # pylint: disable=unused-variable metrics_string = ( 'periodic_data_calculated, iGetBatch #{}: {} requests' .format(batch, i_get_count)) with metrics.http_request_timer( metrics_string) as timer: data_values = req_state.client.service.iGetBatch( i_get_request) # LOGGER.info('data_values = {}'.format(data_values)) # COMMENT OUT if isinstance(data_values, str): continue try: periodic_data_records = data_values.DataValue except Exception as err: LOGGER.error('{}'.format(err)) LOGGER.error('data_values dict = {}'.format( ilevel.sobject_to_dict(data_values))) raise err for periodic_data_record in periodic_data_records: if "Error" in periodic_data_record: continue if "NoDataAvailable" in periodic_data_record: continue periodic_data_record_dict = ilevel.sobject_to_dict( periodic_data_record) # LOGGER.info('period_data_record_dict = {}'.format(periodic_data_record_dict)) # COMMENT OUT transformed_record = transform_json( periodic_data_record_dict) # LOGGER.info('transformed_record = {}'.format(transformed_record)) # COMMENT OUT if 'value' in transformed_record: value = transformed_record.get('value') value_string = str(value) if type(value) in (int, float): value_numeric = float(value) else: value_numeric = None if value == 'No Data Available': continue sd_parameters = transformed_record.get( 'sd_parameters', {}) excel_formula = transformed_record.get( 'excel_formula') currency_code = sd_parameters.get( 'currency_code') data_item_id = sd_parameters.get( 'data_item_id') data_value_type = sd_parameters.get( 'data_value_type') detail_id = sd_parameters.get('detail_id') entity_id = next( iter( sd_parameters.get( 'entities_path', {}).get('path', {}).get('int', [])), None) scenario_id = sd_parameters.get( 'scenario_id') period_type = sd_parameters.get( 'period', {}).get('type') end_of_period_value = sd_parameters.get( 'end_of_period', {}).get('value') reported_date_value = sd_parameters.get( 'reported_date', {}).get('value') exchange_rate_type = sd_parameters.get( 'exchange_rate', {}).get('type') request_id = sd_parameters.get( 'request_identifier') standardized_data_id = sd_parameters.get( 'standardized_data_id') dimensions = { 'data_item_id': data_item_id, 'entity_id': entity_id, 'scenario_id': scenario_id, 'period_type': period_type, 'end_of_period_value': end_of_period_value, 'currency_code': currency_code, 'exchange_rate_type': exchange_rate_type, 'data_value_type': data_value_type } hash_key = str( hash_data( json.dumps(dimensions, sort_keys=True))) # Primary key dimensions, create md5 hash key new_record = { 'hash_key': hash_key, 'excel_formula': excel_formula, 'currency_code': currency_code, 'data_item_id': data_item_id, 'data_value_type': data_value_type, 'detail_id': detail_id, 'entity_id': entity_id, 'scenario_id': scenario_id, 'period_type': period_type, 'end_of_period_value': end_of_period_value, 'reported_date_value': reported_date_value, 'exchange_rate_type': exchange_rate_type, 'request_id': request_id, 'standardized_data_id': standardized_data_id, 'value': value, 'value_string': value_string, 'value_numeric': value_numeric } results.append(new_record) # end for rec in period_data_records # Process batch records max_bookmark_value, process_record_count = process_records( result_records=results, req_state=req_state, deletion_flag=False, max_bookmark_value=max_bookmark_value) update_count = update_count + process_record_count # Init new params_list and results i_get_params_list = req_state.client.factory.create( 'ArrayOfBaseRequestParameters') results = [] batch = batch + 1 # end iGetBatch req_id = req_id + 1 pd = pd + 1 # end offset_period loop # end period_type loop ent = ent + 1 # end entity_id loop cdi = cdi + 1 # end calc_data_items loop # end entity_type loop # Update the state with the max_bookmark_value for the stream after ALL records # Always process past year of calculated data (Subtract 365 days from max_bookmark_value) max_bookmark_dttm = datetime.strptime(max_bookmark_value[:10], "%Y-%m-%d") - timedelta(days=365) max_bookmark_value = max_bookmark_dttm.strftime("%Y-%m-%d") singer_ops.write_bookmark(req_state.state, req_state.stream_name, max_bookmark_value) return update_count
def __process_standardized_data_stream(req_state): max_bookmark_value = req_state.last_date update_count = 0 #Split date windows: API call restricts date windows based on 30 day periods. date_chunks = ilevel.get_date_chunks(req_state.last_date, req_state.end_date, MAX_DATE_WINDOW) cur_start_date = None cur_end_date = None cur_date_criteria_length = len(date_chunks) cur_date_range_index = 0 LOGGER.info('Preparing to process %s date chunks', len(date_chunks)) date_chunk_index = 0 while cur_date_range_index < cur_date_criteria_length: if cur_start_date is None: cur_start_date = date_chunks[0] cur_end_date = date_chunks[1] cur_date_range_index = 2 else: cur_start_date = cur_end_date cur_end_date = date_chunks[cur_date_range_index] cur_date_range_index = cur_date_range_index + 1 LOGGER.info( 'periodic_data_standardized, {} - {}, Date Range: {} of {}'.format( cur_start_date, cur_end_date, cur_date_range_index, cur_date_criteria_length)) #Get updated records based on date range updated_object_id_sets = ilevel.get_standardized_data_id_chunks( cur_start_date, cur_end_date, req_state.client) if len(updated_object_id_sets) == 0: continue LOGGER.info( 'periodic_data_standardized, {} - {}, Updated Sets: {}'.format( cur_start_date, cur_end_date, len(updated_object_id_sets))) #Translate standardized ids to objects batch = 1 for id_set in updated_object_id_sets: processed_record_count = 0 temp_max_bookmark_value, processed_record_count = process_iget_batch_for_standardized_id_set( id_set, req_state) temp_max_bookmark_value_dttm = datetime.strptime( temp_max_bookmark_value, "%Y-%m-%d") max_bookmark_value_dttm = datetime.strptime( max_bookmark_value, "%Y-%m-%d") if temp_max_bookmark_value_dttm > max_bookmark_value_dttm: max_bookmark_value = temp_max_bookmark_value LOGGER.info( 'periodic_data_standardized, {} - {}, Batch #{}, Requests: {}, Results: {}' .format(cur_start_date, cur_end_date, batch, len(id_set), processed_record_count)) update_count = update_count + processed_record_count batch = batch + 1 # Some reported_date_value (bookmark) are in the future? max_bookmark_value_dttm = datetime.strptime(max_bookmark_value, "%Y-%m-%d") if max_bookmark_value_dttm > cur_end_date: max_bookmark_value = cur_end_date.strftime("%Y-%m-%d") date_chunk_index = date_chunk_index + 1 # Data not sorted # Update the state with the max_bookmark_value for the stream after ALL records if req_state.bookmark_field and processed_record_count > 0: singer_ops.write_bookmark(req_state.state, req_state.stream_name, max_bookmark_value) return update_count
def __process_incremental_stream(req_state): record_count = 0 date_chunks = ilevel.get_date_chunks(req_state.last_date, req_state.end_date, MAX_DATE_WINDOW) max_bookmark_value_upd = req_state.last_date max_bookmark_value_del = req_state.last_date cur_start_date = None cur_end_date = None cur_date_criteria_length = len(date_chunks) cur_date_range_index = 0 # Loop through date, and id 'chunks' as appropriate, processing each window. while cur_date_range_index < cur_date_criteria_length: if cur_start_date is None: cur_start_date = date_chunks[0] cur_end_date = date_chunks[1] cur_date_range_index = 2 else: cur_start_date = cur_end_date cur_end_date = date_chunks[cur_date_range_index] cur_date_range_index = cur_date_range_index + 1 LOGGER.info('{}: Processing date range {} of {} total ({} - {})'.format( req_state.stream_name, cur_date_range_index, cur_date_criteria_length, \ cur_start_date, cur_end_date)) #Retrieve updated entities for given date range, and send for processing updated_object_id_sets = ilevel.get_updated_object_id_sets( cur_start_date, cur_end_date, req_state.client, req_state.stream_name) update_bookmark = False if len(updated_object_id_sets) > 0: cur_id_set_index = 0 for id_set in updated_object_id_sets: updated_record_count = 0 LOGGER.info('{}: Processing id set {} of {} total sets'.format( req_state.stream_name, cur_id_set_index + 1, len(updated_object_id_sets))) # Process updated object stream id set max_bookmark_value_upd, updated_record_count = \ __process_updated_object_stream_id_set( object_ids=list(id_set), req_state=req_state, max_bookmark_value=max_bookmark_value_upd) record_count = record_count + updated_record_count if updated_record_count > 0: update_bookmark = True #Retrieve deleted entities for given date range, and send for processing deleted_object_id_sets = ilevel.get_deleted_object_id_sets( cur_start_date, cur_end_date, req_state.client, req_state.stream_name) if len(deleted_object_id_sets) > 0: cur_id_set_index = 0 for id_set in deleted_object_id_sets: deleted_record_count = 0 LOGGER.info( '{}: Processing deleted id set {} of {} total sets'.format( req_state.stream_name, cur_id_set_index + 1, len(deleted_object_id_sets))) # Process deleted records max_bookmark_value_del, deleted_record_count = \ __process_deleted_object_stream_id_set( object_ids=list(id_set), req_state=req_state, max_bookmark_value=max_bookmark_value_del) record_count = record_count + deleted_record_count if deleted_record_count > 0: update_bookmark = True cur_id_set_index = cur_id_set_index + 1 # Get max_bookmark_value from update (_1) and delete (_2) max_bookmark_value = max(req_state.start_date, req_state.last_date, \ max_bookmark_value_upd, max_bookmark_value_del) max_bookmark_value_dttm = datetime.strptime(max_bookmark_value, "%Y-%m-%d") if max_bookmark_value_dttm > cur_end_date: max_bookmark_value = cur_end_date.strftime("%Y-%m-%d") # Data not sorted # Update the state with the max_bookmark_value for the stream after ALL records if update_bookmark: singer_ops.write_bookmark(req_state.state, req_state.stream_name, max_bookmark_value) return record_count