def to_representation(self, instance): ret = super(TableauDataSerializer, self).to_representation(instance) if 'json' in ret: ret = ret['json'] # Remove metadata fields from the instance remove_metadata_fields(ret) return ret
def get_tableau_column_headers(self): ''' Retrieve columns headers that are valid in tableau. ''' tableau_colulmn_headers = [] def append_to_tableau_colulmn_headers(header, question_type=None): quest_type = 'string' if question_type: quest_type = question_type # alias can be updated in the future to question labels tableau_colulmn_headers.append({ 'id': header, 'dataType': quest_type, 'alias': header }) # Remove metadata fields from the column headers # Calling set to remove duplicates in group data xform_headers = set(remove_metadata_fields(self.xform_headers)) # using nested loops to determine what valid data types to set for # tableau. for header in xform_headers: for quest_name, quest_type in self.flattened_dict.items(): if header == quest_name or header.endswith('_%s' % quest_name): append_to_tableau_colulmn_headers(header, quest_type) break else: if header == '_id': append_to_tableau_colulmn_headers(header, "int") else: append_to_tableau_colulmn_headers(header) return tableau_colulmn_headers
def process_tableau_data(data, xform): """ Streamlines the row header fields with the column header fields for the same form. Handles Flattenning repeat data for tableau """ def get_ordered_repeat_value(key, item, index): """ Return Ordered Dict of repeats in the order in which they appear in the XForm. """ index_tags = DEFAULT_INDEX_TAGS children = xform.get_child_elements(key, split_select_multiples=False) item_list = OrderedDict() data = {} for elem in children: if not question_types_to_exclude(elem.type): new_xpath = elem.get_abbreviated_xpath() item_list[new_xpath] = item.get(new_xpath, DEFAULT_NA_REP) # Loop through repeat data and flatten it # given the key "children/details" and nested_key/ # abbreviated xpath "children/details/immunization/polio_1", # generate ["children", index, "immunization/polio_1"] for (nested_key, nested_val) in item_list.items(): xpaths = [ '{key}{open_tag}{index}{close_tag}'.format( key=nested_key.split('/')[0], open_tag=index_tags[0], index=index, close_tag=index_tags[1])] + [ nested_key.split('/')[1]] xpaths = "/".join(xpaths) data[xpaths] = nested_val return data result = [] if data: headers = xform.get_headers() tableau_headers = remove_metadata_fields(headers) for row in data: diff = set(tableau_headers).difference(set(row)) flat_dict = dict.fromkeys(diff, None) for (key, value) in row.items(): if isinstance(value, list) and key not in [ ATTACHMENTS, NOTES, GEOLOCATION]: for index, item in enumerate(value, start=1): # order repeat according to xform order item = get_ordered_repeat_value(key, item, index) flat_dict.update(item) else: flat_dict[key] = value result.append(flat_dict) return result
def process_tableau_data(data, xform): """ Streamlines the row header fields with the column header fields for the same form. Handles Flattenning repeat data for tableau """ def get_xpath(key, nested_key): val = nested_key.split('/') nested_key_diff = val[len(key.split('/')):] xpaths = key + f'[{index}]/' + '/'.join(nested_key_diff) return xpaths def get_updated_data_dict(key, value, data_dict): """ Generates key, value pairs for select multiple question types. Defining the new xpaths from the question name(key) and the choice name(value) in accordance with how we generate the tableau schema. """ if isinstance(value, str) and data_dict: choices = value.split(" ") for choice in choices: xpaths = f'{key}/{choice}' data_dict[xpaths] = choice elif isinstance(value, list): try: for item in value: for (nested_key, nested_val) in item.items(): xpath = get_xpath(key, nested_key) data_dict[xpath] = nested_val except AttributeError: data_dict[key] = value return data_dict def get_ordered_repeat_value(key, item, index): """ Return Ordered Dict of repeats in the order in which they appear in the XForm. """ children = xform.get_child_elements(key, split_select_multiples=False) item_list = OrderedDict() data = {} for elem in children: if not question_types_to_exclude(elem.type): new_xpath = elem.get_abbreviated_xpath() item_list[new_xpath] = item.get(new_xpath, DEFAULT_NA_REP) # Loop through repeat data and flatten it # given the key "children/details" and nested_key/ # abbreviated xpath "children/details/immunization/polio_1", # generate ["children", index, "immunization/polio_1"] for (nested_key, nested_val) in item_list.items(): qstn_type = xform.get_element(nested_key).type xpaths = get_xpath(key, nested_key) if qstn_type == MULTIPLE_SELECT_TYPE: data = get_updated_data_dict(xpaths, nested_val, data) elif qstn_type == REPEAT_SELECT_TYPE: data = get_updated_data_dict(xpaths, nested_val, data) else: data[xpaths] = nested_val return data result = [] if data: headers = xform.get_headers() tableau_headers = remove_metadata_fields(headers) for row in data: diff = set(tableau_headers).difference(set(row)) flat_dict = dict.fromkeys(diff, None) for (key, value) in row.items(): if isinstance(value, list) and key not in [ ATTACHMENTS, NOTES, GEOLOCATION ]: for index, item in enumerate(value, start=1): # order repeat according to xform order item = get_ordered_repeat_value(key, item, index) flat_dict.update(item) else: try: qstn_type = xform.get_element(key).type if qstn_type == MULTIPLE_SELECT_TYPE: flat_dict = get_updated_data_dict( key, value, flat_dict) if qstn_type == 'geopoint': parts = value.split(' ') gps_xpaths = \ DataDictionary.get_additional_geopoint_xpaths( key) gps_parts = dict([(xpath, None) for xpath in gps_xpaths]) if len(parts) == 4: gps_parts = dict(zip(gps_xpaths, parts)) flat_dict.update(gps_parts) else: flat_dict[key] = value except AttributeError: flat_dict[key] = value result.append(flat_dict) return result