def validate(self): if not is_valid_xml_tag(self.name): msg = ( "The name '%s' is an invalid xml tag. Names must begin with a letter, colon, or underscore, subsequent characters can include numbers, dashes, and periods." % self.name ) raise PyXFormError(msg)
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ # ensure required headers are present survey_header_sheet = u'%s_header' % constants.SURVEY if survey_header_sheet in workbook_dict: survey_headers = workbook_dict.get(survey_header_sheet) if not survey_headers: raise PyXFormError(u"The survey sheet is missing column headers.") tmp = [h for h in [u'type', u'name'] if h in survey_headers[0].keys()] if tmp.__len__() is not 2: raise PyXFormError(u"The survey sheet must have on the first row" u" name and type columns.") del workbook_dict[survey_header_sheet] choices_header_sheet = u'%s_header' % constants.CHOICES if choices_header_sheet in workbook_dict: choices_headers = workbook_dict.get(choices_header_sheet) if not choices_headers: raise PyXFormError(u"The choices sheet is missing column headers.") choices_header_list = [u'list name', u'list_name', u'name'] tmp = [ h for h in choices_header_list if h in choices_headers[0].keys() ] if tmp.__len__() is not 2: raise PyXFormError(u"The choices sheet must have on the first row" u" list_name and name.") del workbook_dict[choices_header_sheet] if warnings is None: # Set warnings to a list that will be discarded. warnings = [] rowFormatString = '[row : %s]' # Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) # We check for double columns to determine whether to use them # or single colons to delimit grouped headers. # Single colons are bad because they conflict with with the xform namespace # syntax (i.e. jr:constraintMsg), # so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) # Break the spreadsheet dict into easier to access objects # (settings, choices, survey_sheet): # ########## Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), aliases.settings_header, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) # add_none_option is a boolean that when true, # indicates a none option should automatically be added to selects. # It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = aliases.yes_no.get( settings[u"add_none_option"], False) # Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) sms_keyword = settings.get(constants.SMS_KEYWORD, id_string) xml_root = settings.get(constants.XML_ROOT, form_name) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: xml_root, constants.TITLE: id_string, constants.ID_STRING: id_string, constants.SMS_KEYWORD: sms_keyword, constants.DEFAULT_LANGUAGE: default_language, # By default the version is based on the date and time yyyymmddhh # Leaving default version out for now since it might cause # problems for formhub. # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"), constants.CHILDREN: [] } # Here the default settings are overridden by those in the settings sheet json_dict.update(settings) # ########## Choices sheet ########## # Columns and "choices and columns" sheets are deprecated, # but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get( constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, aliases.list_header, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, aliases.list_header, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, aliases.list_header, use_double_colons, default_language) # ########## Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, []) if len(cascading_choices): if 'choices' in cascading_choices[0]: choices_sheet = choices_sheet + cascading_choices[0]['choices'] combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists # Make sure all the options have the required properties: warnedabout = set() for list_name, options in choices.items(): for option in options: if 'name' not in option: info = "[list_name : " + list_name + ']' raise PyXFormError("On the choices sheet there is " "a option with no name. " + info) if 'label' not in option: info = "[list_name : " + list_name + ']' warnings.append( "On the choices sheet there is a option with no label. " + info) # chrislrobert's fix for a cryptic error message: # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa for headername in option.keys(): # Using warnings and removing the bad columns # instead of throwing errors because some forms # use choices column headers for notes. if ' ' in headername: if headername not in warnedabout: warnedabout.add(headername) warnings.append("On the choices sheet there is " + "a column (\"" + headername + "\") with an illegal header. " + "Headers cannot include spaces.") del option[headername] elif headername == '': warnings.append("On the choices sheet there is a value" + " in a column with no header.") del option[headername] # ########## Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] # Process the headers: clean_text_values_enabled = aliases.yes_no.get( settings.get("clean_text_values", "true()")) if clean_text_values_enabled: survey_sheet = clean_text_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, aliases.survey_header, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) osm_sheet = workbook_dict.get(constants.OSM, []) osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME) # ################################# # Parse the survey sheet while generating a survey in our json format: row_number = 1 # We start at 1 because the column header row is not # included in the survey sheet (presumably). # A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] # If a group has a table-list appearance flag # this will be set to the name of the list table_list = None # For efficiency we compile all the regular expressions # that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(aliases.control.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(aliases.control.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile( r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) + r")) (?P<list_name>\S+)" + "( (?P<specify_other>(or specify other|or_other|or other)))?$") cascading_regexp = re.compile(r"^(?P<cascading_command>(" + '|'.join(aliases.cascading.keys()) + r")) (?P<cascading_level>\S+)?$") osm_regexp = re.compile(r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) + ')) (?P<list_name>\S+)') for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] # Disabled should probably be first # so the attributes below can be disabled. if u"disabled" in row: warnings.append( rowFormatString % row_number + " The 'disabled' column header is not part of the current" + " spec. We recommend using relevant instead.") disabled = row.pop(u"disabled") if aliases.yes_no.get(disabled): continue # skip empty rows if len(row) == 0: continue # Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, # then its a comment row, and we skip it with warning if not ((constants.NAME in row) or (constants.LABEL in row)): warnings.append(rowFormatString % row_number + " Row without name," " text, or label is being skipped:\n" + str(row)) continue raise PyXFormError(rowFormatString % row_number + " Question with no type.\n" + str(row)) continue if question_type == 'calculate': calculation = row.get('bind', {}).get('calculate') if not calculation: raise PyXFormError(rowFormatString % row_number + " Missing calculation.") # Check if the question is actually a setting specified # on the survey sheet settings_type = aliases.settings_header.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue # Try to parse question as a end control statement # (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = aliases.control[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( rowFormatString % row_number + " Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue # Make sure the row has a valid name if constants.NAME not in row: if row['type'] == 'note': # autogenerate names for notes without them row['name'] = "generated_note_name_" + str(row_number) # elif 'group' in row['type'].lower(): # # autogenerate names for groups without them # row['name'] = "generated_group_name_" + str(row_number) else: raise PyXFormError(rowFormatString % row_number + " Question or group with no name.") question_name = unicode(row[constants.NAME]) if not is_valid_xml_tag(question_name): error_message = rowFormatString % row_number error_message += " Invalid question name [" + question_name + "]" error_message += "Names must begin with a letter, colon,"\ + " or underscore." error_message += "Subsequent characters can include numbers,"\ + " dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in aliases.label_optional_types: # TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned # about false positives. warnings.append(rowFormatString % row_number + " Question has no label: " + str(row)) # Try to parse question as begin control statement # (i.e. begin loop/repeat/group): begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: # Create a new json dict with children, and the proper type, # and add it to parent_children_array in place of a question. # parent_children_array will then be set to its children array # (so following questions are nested under it) # until an end command is encountered. control_type = aliases.control[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): # TODO: Perhaps warn and make repeat into a group? raise PyXFormError(rowFormatString % row_number + " Repeat loop without list name.") list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError( rowFormatString % row_number + " List name not in columns sheet: " + list_name) new_json_dict[constants.COLUMNS] = choices[list_name] # Generate a new node for the jr:count column so # xpath expressions can be used. repeat_count_expression = new_json_dict.get('control', {}).get('jr:count') if repeat_count_expression: generated_node_name = new_json_dict['name'] + "_count" parent_children_array.append({ "name": generated_node_name, "bind": { "readonly": "true()", "calculate": repeat_count_expression, }, "type": "calculate", }) new_json_dict['control']['jr:count'] = \ "${" + generated_node_name + "}" # Code to deal with table_list appearance flags # (for groups of selects) ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance") if ctrl_ap == constants.TABLE_LIST: table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" # Generate a note label element so hints and labels # work as expected in table-lists. # see https://github.com/modilabs/pyxform/issues/62 if 'label' in new_json_dict or 'hint' in new_json_dict: generated_label_element = { "type": "note", "name": "generated_table_list_label_" + str(row_number) } if 'label' in new_json_dict: generated_label_element[constants.LABEL] = \ new_json_dict[constants.LABEL] del new_json_dict[constants.LABEL] if 'hint' in new_json_dict: generated_label_element['hint'] = \ new_json_dict['hint'] del new_json_dict['hint'] child_list.append(generated_label_element) parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError(rowFormatString % row_number + " Cascading select needs a name.") # cascading_json = get_cascading_json( # cascading_choices, cascading_prefix, cascading_level) if len(cascading_choices) <= 0 or\ 'questions' not in cascading_choices[0]: raise PyXFormError("Found a cascading_select " + cascading_level + ", but could not" " find " + cascading_level + "in cascades sheet.") cascading_json = cascading_choices[0]['questions'] json_dict['choices'] = choices include_bindings = False if 'bind' in row: include_bindings = True for cq in cascading_json: # include bindings if include_bindings: cq['bind'] = row['bind'] def replace_prefix(d, prefix): for k, v in d.items(): if isinstance(v, basestring): d[k] = v.replace('$PREFIX$', prefix) elif isinstance(v, dict): d[k] = replace_prefix(v, prefix) elif isinstance(v, list): d[k] = map(lambda x: replace_prefix(x, prefix), v) return d parent_children_array.append( replace_prefix(cq, cascading_prefix)) continue # so the row isn't put in as is # Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = aliases.select[parse_dict["select_command"]] if select_type == 'select one external'\ and 'choice_filter' not in row: warnings.append(rowFormatString % row_number + u" select one external is only meant for" u" filtered selects.") select_type = aliases.select['select_one'] list_name = parse_dict["list_name"] if list_name not in choices\ and select_type != 'select one external': if not choices: raise PyXFormError( u"There should be a choices sheet in this xlsform." u" Please ensure that the choices sheet name is " u"all in small caps.") raise PyXFormError(rowFormatString % row_number + " List name not in choices sheet: " + list_name) # Validate select_multiple choice names by making sure # they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added " "to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") specify_other_question = None if parse_dict.get("specify_other") is not None: select_type += u" or specify other" # With this code we no longer need to handle or_other # questions in survey builder. # However, it depends on being able to use choice filters # and xpath expressions that return empty sets. # choices[list_name].append( # { # 'name': 'other', # 'label': {default_language : 'Other'}, # 'orOther': 'true', # }) # or_other_xpath = 'isNull(orOther)' # if 'choice_filter' in row: # row['choice_filter'] += ' or ' + or_other_xpath # else: # row['choice_filter'] = or_other_xpath # specify_other_question = \ # { # 'type':'text', # 'name': row['name'] + '_specify_other', # 'label': # 'Specify Other for:\n"' + row['label'] + '"', # 'bind' : {'relevant': # "selected(../%s, 'other')" % row['name']}, # } new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type if row.get('choice_filter'): if select_type == 'select one external': new_json_dict['query'] = list_name else: new_json_dict['itemset'] = list_name json_dict['choices'] = choices else: new_json_dict[constants.CHOICES] = choices[list_name] # Code to deal with table_list appearance flags # (for groups of selects) if table_list is not None: # Then this row is the first select in a table list if not isinstance(table_list, basestring): table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number ), # Adding row number for uniqueness # noqa constants.CONTROL: { u"appearance": u"label" }, constants.CHOICES: choices[list_name], # Do we care about filtered selects in table lists? # 'itemset' : list_name, } parent_children_array.append(table_list_header) if table_list != list_name: error_message = rowFormatString % row_number error_message += " Badly formatted table list,"\ " list names don't match: " +\ table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[u"control"] = \ new_json_dict.get(u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) if specify_other_question: parent_children_array.append(specify_other_question) continue # Try to parse question as osm: osm_parse = osm_regexp.search(question_type) if osm_parse: parse_dict = osm_parse.groupdict() new_dict = row.copy() new_dict['type'] = constants.OSM if parse_dict.get('list_name') is not None: tags = osm_tags.get(parse_dict.get('list_name')) for tag in tags: if osm_tags.get(tag.get('name')): tag['choices'] = osm_tags.get(tag.get('name')) new_dict['tags'] = tags parent_children_array.append(new_dict) continue # range question_type if question_type == 'range': new_dict = process_range_question_type(row) parent_children_array.append(new_dict) continue # TODO: Consider adding some question_type validation here. # Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0])) if settings.get('flat', False): # print "Generating flattened instance..." add_flat_annotations(stack[0][1]) meta_children = [] if aliases.yes_no.get(settings.get("omit_instanceID")): if settings.get("public_key"): raise PyXFormError( "Cannot omit instanceID, it is required for encryption.") else: # Automatically add an instanceID element: meta_children.append({ "name": "instanceID", "bind": { "readonly": "true()", "calculate": settings.get("instance_id", "concat('uuid:', uuid())"), }, "type": "calculate", }) if 'instance_name' in settings: # Automatically add an instanceName element: meta_children.append({ "name": "instanceName", "bind": { "calculate": settings['instance_name'] }, "type": "calculate", }) # PMA2020 Logging BEGIN if 'logging' in settings and aliases.yes_no.get(settings.get('logging')) \ is True: meta_children.append({ "name": "logging", "bind": { "calculate": "string('log.txt')" }, "type": "hidden attachment" }) # PMA2020 Logging END if len(meta_children) > 0: meta_element = \ { "name": "meta", "type": "group", "control": { "bodyless": True }, "children": meta_children } noop, survey_children_array = stack[0] survey_children_array.append(meta_element) # print_pyobj_to_json(json_dict) return json_dict
def validate(self): if not is_valid_xml_tag(self.name): msg = "The name '%s' is an invalid xml tag. Names must begin with a letter, colon, or underscore, subsequent characters can include numbers, dashes, and periods." % self.name raise PyXFormError(msg)
def workbook_to_json( workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ # ensure required headers are present survey_header_sheet = u'%s_header' % constants.SURVEY if survey_header_sheet in workbook_dict: survey_headers = workbook_dict.get(survey_header_sheet) if not survey_headers: raise PyXFormError(u"The survey sheet is missing column headers.") tmp = [h for h in [constants.TYPE, constants.NAME] if h in survey_headers[0].keys()] if tmp.__len__() is not 2: raise PyXFormError(u"The survey sheet must have on the first row" u" name and type columns.") del workbook_dict[survey_header_sheet] choices_header_sheet = u'%s_header' % constants.CHOICES if choices_header_sheet in workbook_dict: choices_headers = workbook_dict.get(choices_header_sheet) if not choices_headers: raise PyXFormError(u"The choices sheet is missing column headers.") choices_header_list = [u'list name', u'list_name', constants.NAME] tmp = [ h for h in choices_header_list if h in choices_headers[0].keys()] if tmp.__len__() is not 2: raise PyXFormError(u"The choices sheet must have on the first row" u" list_name and name.") del workbook_dict[choices_header_sheet] if warnings is None: #Set warnings to a list that will be discarded. warnings = [] rowFormatString = '[row : %s]' #Make sure the passed in vars are unicode if form_name: form_name = unicode(form_name) default_language = unicode(default_language) #We check for double columns to determine whether to use them #or single colons to delimit grouped headers. #Single colons are bad because they conflict with with the xform namespace #syntax (i.e. jr:constraintMsg), #so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) #Break the spreadsheet dict into easier to access objects #(settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), aliases.settings_header, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get( constants.DEFAULT_LANGUAGE, default_language) #add_none_option is a boolean that when true, #indicates a none option should automatically be added to selects. #It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = aliases.yes_no.get( settings[u"add_none_option"], False) #Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) form_name= form_name if form_name else id_string title= settings.get(constants.TITLE, id_string) sms_keyword = settings.get(constants.SMS_KEYWORD, id_string) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: form_name, constants.TITLE: title, constants.ID_STRING: id_string, constants.SMS_KEYWORD: sms_keyword, constants.DEFAULT_LANGUAGE: default_language, #By default the version is based on the date and time yyyymmddhh #Leaving default version out for now since it might cause #problems for formhub. #constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"), constants.CHILDREN: [] } #Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## #Columns and "choices and columns" sheets are deprecated, #but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get( constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, aliases.list_header, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers( columns_sheet, aliases.list_header, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers( choices_sheet, aliases.list_header, use_double_colons, default_language) ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, []) if len(cascading_choices): if 'choices' in cascading_choices[0]: choices_sheet = choices_sheet + cascading_choices[0]['choices'] combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists #Make sure all the options have the required properties: warnedabout = set() for list_name, options in choices.items(): for option in options: if constants.NAME not in option: info = "[list_name : " + list_name + ']' raise PyXFormError("On the choices sheet there is " "a option with no name. " + info) if 'label' not in option: info = "[list_name : " + list_name + ']' warnings.append( "On the choices sheet there is a option with no label. " + info) # chrislrobert's fix for a cryptic error message: # see: https://code.google.com/p/opendatakit/issues/detail?id=833&start=200 for headername in option.keys(): # Using warnings and removing the bad columns # instead of throwing errors because some forms # use choices column headers for notes. if ' ' in headername: if headername not in warnedabout: warnedabout.add(headername) warnings.append("On the choices sheet there is " + "a column (\"" + headername + "\") with an illegal header. " + "Headers cannot include spaces.") del option[headername] elif headername == '': warnings.append("On the choices sheet there is a value" + " in a column with no header.") del option[headername] ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError( "You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] #Process the headers: clean_text_values_enabled = aliases.yes_no.get( settings.get("clean_text_values", "true()")) if clean_text_values_enabled: survey_sheet = clean_text_values(survey_sheet) survey_sheet = dealias_and_group_headers( survey_sheet, aliases.survey_header, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## #Parse the survey sheet while generating a survey in our json format: row_number = 1 # We start at 1 because the column header row is not # included in the survey sheet (presumably). #A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] #If a group has a table-list appearance flag #this will be set to the name of the list table_list = None #For efficiency we compile all the regular expressions # that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(aliases.control.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(aliases.control.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile( r"^(?P<select_command>(" + '|'.join(aliases.multiple_choice.keys()) + r")) (?P<list_name>\S+)" + "( (?P<specify_other>(or specify other|or_other|or other)))?$") cascading_regexp = re.compile( r"^(?P<cascading_command>(" + '|'.join(aliases.cascading.keys()) + r")) (?P<cascading_level>\S+)?$") for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] #Disabled should probably be first #so the attributes below can be disabled. if u"disabled" in row: warnings.append( rowFormatString % row_number + " The 'disabled' column header is not part of the current" + " spec. We recommend using relevant instead.") disabled = row.pop(u"disabled") if aliases.yes_no.get(disabled): continue #skip empty rows if len(row) == 0: continue #Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, #then its a comment row, and we skip it with warning if not ((constants.NAME in row) or (constants.LABEL in row)): warnings.append( rowFormatString % row_number + " Row without name," " text, or label is being skipped:\n" + str(row)) continue raise PyXFormError( rowFormatString % row_number + " Question with no type.\n" + str(row)) continue if question_type == constants.CALCULATE_XFORM: calculation = row.get(constants.BIND, {}).get(constants.CALCULATE_XFORM) if not calculation: raise PyXFormError( rowFormatString % row_number + " Missing calculation.") #Check if the question is actually a setting specified #on the survey sheet settings_type = aliases.settings_header.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue #Try to parse question as a end control statement #(i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and constants.TYPE in parse_dict: control_type = aliases.control[parse_dict[constants.TYPE]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( rowFormatString % row_number + " Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue #Make sure the row has a valid name if not constants.NAME in row: if row[constants.TYPE] == 'note': #autogenerate names for notes without them row[constants.NAME] = "generated_note_name_" + str(row_number) # elif 'group' in row[constants.TYPE].lower(): # # autogenerate names for groups without them # row['name'] = "generated_group_name_" + str(row_number) else: raise PyXFormError(rowFormatString % row_number + " Question or group with no name.") question_name = unicode(row[constants.NAME]) if not is_valid_xml_tag(question_name): error_message = rowFormatString % row_number error_message += " Invalid question name [" + question_name + "]" error_message += "Names must begin with a letter, colon,"\ + " or underscore." error_message += "Subsequent characters can include numbers,"\ + " dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in aliases.label_optional_types: #TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned # about false positives. warnings.append( rowFormatString % row_number + " Question has no label: " + str(row)) #Try to parse question as begin control statement #(i.e. begin loop/repeat/group): begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and constants.TYPE in parse_dict: #Create a new json dict with children, and the proper type, #and add it to parent_children_array in place of a question. #parent_children_array will then be set to its children array #(so following questions are nested under it) #until an end command is encountered. control_type = aliases.control[parse_dict[constants.TYPE]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): #TODO: Perhaps warn and make repeat into a group? raise PyXFormError( rowFormatString % row_number + " Repeat loop without list name.") list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError( rowFormatString % row_number + " List name not in columns sheet: " + list_name) new_json_dict[constants.COLUMNS] = choices[list_name] #Generate a new node for the jr:count column so #xpath expressions can be used. repeat_count_expression = new_json_dict.get( constants.CONTROL, {}).get('jr:count') if repeat_count_expression: generated_node_name = new_json_dict[constants.NAME] + "_count" parent_children_array.append({ constants.NAME: generated_node_name, constants.BIND: { "readonly": "true()", constants.CALCULATE_XFORM: repeat_count_expression, }, constants.TYPE: constants.CALCULATE_XFORM, }) new_json_dict[constants.CONTROL]['jr:count'] = \ "${" + generated_node_name + "}" #Code to deal with table_list appearance flags # (for groups of selects) ctrl_ap = new_json_dict.get(constants.CONTROL, {}).get(u"appearance") if ctrl_ap == constants.TABLE_LIST: table_list = True new_json_dict[constants.CONTROL][u"appearance"] = u"field-list" #Generate a note label element so hints and labels #work as expected in table-lists. #see https://github.com/modilabs/pyxform/issues/62 if 'label' in new_json_dict or 'hint' in new_json_dict: generated_label_element = { constants.TYPE: "note", constants.NAME: "generated_table_list_label_" + str(row_number) } if 'label' in new_json_dict: generated_label_element[constants.LABEL] = \ new_json_dict[constants.LABEL] del new_json_dict[constants.LABEL] if 'hint' in new_json_dict: generated_label_element['hint'] = \ new_json_dict['hint'] del new_json_dict['hint'] child_list.append(generated_label_element) parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError( rowFormatString % row_number + " Cascading select needs a name.") #cascading_json = get_cascading_json( #cascading_choices, cascading_prefix, cascading_level) if len(cascading_choices) <= 0 or\ 'questions' not in cascading_choices[0]: raise PyXFormError( "Found a cascading_select " + cascading_level + ", but could not" " find " + cascading_level + "in cascades sheet.") cascading_json = cascading_choices[0]['questions'] json_dict['choices'] = choices include_bindings = False if constants.BIND in row: include_bindings = True for cq in cascading_json: # include bindings if include_bindings: cq[constants.BIND] = row[constants.BIND] def replace_prefix(d, prefix): for k, v in d.items(): if isinstance(v, basestring): d[k] = v.replace('$PREFIX$', prefix) elif isinstance(v, dict): d[k] = replace_prefix(v, prefix) elif isinstance(v, list): d[k] = map( lambda x: replace_prefix(x, prefix), v) return d parent_children_array.append( replace_prefix(cq, cascading_prefix)) continue # so the row isn't put in as is #Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = aliases.multiple_choice[parse_dict["select_command"]] if select_type == 'select one external'\ and not 'choice_filter' in row: warnings.append(rowFormatString % row_number + u" select one external is only meant for" u" filtered selects.") select_type = aliases.multiple_choice[constants.SELECT_ONE_XLSFORM] list_name = parse_dict["list_name"] if list_name not in choices\ and select_type != 'select one external': if not choices: raise PyXFormError( u"There should be a choices sheet in this xlsform." u" Please ensure that the \"choices\" sheet name" u" is all in lowercase.") raise PyXFormError( rowFormatString % row_number + " List name not in choices sheet: " + list_name) #Validate select_multiple choice names by making sure #they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added " "to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") specify_other_question = None if parse_dict.get("specify_other") is not None: select_type += u" or specify other" # #With this code we no longer need to handle or_other # #questions in survey builder. # #However, it depends on being able to use choice filters # #and xpath expressions that return empty sets. # choices[list_name].append( # { # 'name': 'other', # 'label': {default_language : 'Other'}, # 'orOther': 'true', # }) # or_other_xpath = 'isNull(orOther)' # if 'choice_filter' in row: # row['choice_filter'] += ' or ' + or_other_xpath # else: # row['choice_filter'] = or_other_xpath # # specify_other_question = \ # { # 'type':'text', # 'name': row['name'] + '_specify_other', # 'label': # 'Specify Other for:\n"' + row['label'] + '"', # 'bind' : {'relevant': # "selected(../%s, 'other')" % row['name']}, # } new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type if row.get('choice_filter'): if select_type == 'select one external': new_json_dict['query'] = list_name else: new_json_dict[constants.ITEMSET_XFORM] = list_name json_dict[constants.CHOICES] = choices else: new_json_dict[constants.CHOICES] = choices[list_name] #Code to deal with table_list appearance flags #(for groups of selects) if table_list is not None: #Then this row is the first select in a table list if not isinstance(table_list, basestring): table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number), # Adding row number for uniqueness constants.CONTROL: {u"appearance": u"label"}, constants.CHOICES: choices[list_name], #Do we care about filtered selects in table lists? #'itemset' : list_name, } parent_children_array.append(table_list_header) if table_list <> list_name: error_message = rowFormatString % row_number error_message += " Badly formatted table list,"\ " list names don't match: " +\ table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[constants.CONTROL] = \ new_json_dict.get(constants.CONTROL, {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) if specify_other_question: parent_children_array.append(specify_other_question) continue #TODO: Consider adding some question_type validation here. #Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0])) if settings.get('flat', False): #print "Generating flattened instance..." add_flat_annotations(stack[0][1]) meta_children = [] if aliases.yes_no.get(settings.get("omit_instanceID")): if settings.get(constants.PUBLIC_KEY): raise PyXFormError( "Cannot omit instanceID, it is required for encryption.") else: #Automatically add an instanceID element: meta_children.append({ constants.NAME: "instanceID", constants.BIND: { "readonly": "true()", constants.CALCULATE_XFORM: settings.get( "instance_id", "concat('uuid:', uuid())"), }, constants.TYPE: constants.CALCULATE_XFORM, }) if 'instance_name' in settings: #Automatically add an instanceName element: meta_children.append({ constants.NAME: "instanceName", constants.BIND: { constants.CALCULATE_XFORM: settings['instance_name'] }, constants.TYPE: constants.CALCULATE_XFORM, }) if len(meta_children) > 0: meta_element = \ { constants.NAME: constants.META_XFORM, constants.TYPE: constants.GROUP, constants.CONTROL: { "bodyless": True }, constants.CHILDREN: meta_children } _, survey_children_array = stack[0] survey_children_array.append(meta_element) #print_pyobj_to_json(json_dict) return json_dict
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ if warnings is None: #Set warnings to a list that will be discarded. warnings = [] #Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) #We check for double columns to determine whether to use them or single colons to delimit grouped headers. #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg), #so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects. #It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = yes_no_aliases.get( settings[u"add_none_option"], u"false()") == u"true()" #Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: form_name, constants.TITLE: id_string, constants.ID_STRING: id_string, constants.DEFAULT_LANGUAGE: default_language, constants.CHILDREN: [] } #Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get( constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language) combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {}) ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] #Process the headers: survey_sheet = clean_unicode_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## #Parse the survey sheet while generating a survey in our json format: row_number = 1 #We start at 1 because the column header row is not included in the survey sheet (presumably). #A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] #If a group has a table-list appearance flag this will be set to the name of the list table_list = None begin_table_list = False #For efficiency we compile all the regular expressions that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile( r"^(?P<select_command>(" + '|'.join(select_aliases.keys()) + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$" ) cascading_regexp = re.compile(r"^(?P<cascading_command>(" + '|'.join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$") for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] #Disabled should probably be first so the attributes below can be disabled. if u"disabled" in row: warnings.append( "The 'disabled' column header is not part of the current spec. We recommend using relevant instead." ) disabled = row.pop(u"disabled") if disabled in yes_no_aliases: disabled = yes_no_aliases[disabled] if disabled == 'true()': continue #skip empty rows if len(row) == 0: continue #Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, then its a comment row, and we skip it with warning if not ((constants.NAME in row) and (constants.LABEL in row)): warnings.append( "Row wihtout name, text, or label is being skipped " + str(row_number) + ": " + str(row)) continue raise PyXFormError("Question with no type on row " + str(row_number)) continue #Check if the question is actually a setting specified on the survey sheet settings_type = settings_header_aliases.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue #Try to parse question as a end control statement (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = control_aliases[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( "Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue #Make sure the question has a valid name question_name = unicode(row.get(constants.NAME)) if not question_name: raise PyXFormError("Question with no name on row " + str(row_number)) if not is_valid_xml_tag(question_name): error_message = "Invalid question name [" + question_name + "] on row " + str( row_number) + "\n" error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in label_optional_types: #TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned about false positives. warnings.append("Warning unlabeled question in row " + str(row_number) + ": " + str(row)) #Try to parse question as begin control statement (i.e. begin loop/repeat/group: begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question. #parent_children_array will then be set to its children array (so following questions are nested under it) #until an end command is encountered. control_type = control_aliases[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): #TODO: Perhaps warn and make repeat into a group? raise PyXFormError("Repeat without list name " + " Error on row: " + str(row_number)) list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in columns sheet: " + list_name + " Error on row: " + str(row_number)) new_json_dict[constants.COLUMNS] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if new_json_dict.get( u"control", {}).get(u"appearance") == constants.TABLE_LIST: begin_table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError( "Cascading select needs a name. Error on row: %s" % row_number) cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level) for c in cascading_json: parent_children_array.append(c) continue # so the row isn't put in as is #Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = select_aliases[parse_dict["select_command"]] list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in choices sheet: " + list_name + " Error on row: " + str(row_number)) #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") if parse_dict.get("specify_other") is not None: select_type += u" or specify other" new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type new_json_dict[constants.CHOICES] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if table_list or begin_table_list: if begin_table_list: #If this row is the first select in a table list table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number), #Adding row number for uniqueness constants.CONTROL: { u"appearance": u"label" }, constants.CHOICES: choices[list_name] } parent_children_array.append(table_list_header) begin_table_list = False if table_list <> list_name: error_message = "Error on row: " + str( row_number) + "\n" error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[u"control"] = new_json_dict.get( u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) continue #TODO: Consider adding some question_type validation here. #Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("unmatched begin statement: " + str(stack[-1][0])) #print_pyobj_to_json(json_dict) return json_dict
def validate(self): if not is_valid_xml_tag(self.get_name()): raise Exception("Invalid xml tag.", self._dict)
def validate(self): if not is_valid_xml_tag(self.get_name()): msg = "The name of this survey element is an invalid xml tag. Names must begin with a letter, colon, or underscore, subsequent characters can include numbers, dashes, and periods." raise Exception(self.get_name(), msg)
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ if warnings is None: # Set warnings to a list that will be discarded. warnings = [] rowFormatString = "[row : %s]" # Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) # We check for double columns to determine whether to use them or single colons to delimit grouped headers. # Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg), # so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) # Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons ) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) # add_none_option is a boolean that when true, indicates a none option should automatically be added to selects. # It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], False) # Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: form_name, constants.TITLE: id_string, constants.ID_STRING: id_string, constants.DEFAULT_LANGUAGE: default_language, # By default the version is based on the date and time yyyymmddhh # Leaving default version out for now since it might cause problems for formhub. # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"), constants.CHILDREN: [], } # Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## # Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language ) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language) combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME ) choices = combined_lists # Make sure all the options have the required properties: for list_name, options in choices.items(): for option in options: if "name" not in option: info = "[list_name : " + list_name + "]" raise PyXFormError("On the choices sheet there is a option with no name. " + info) if "label" not in option: info = "[list_name : " + list_name + "]" warnings.append("On the choices sheet there is a option with no label. " + info) ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {}) ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] # Process the headers: clean_text_values_enabled = yes_no_aliases.get(settings.get("clean_text_values", "true()")) if clean_text_values_enabled: survey_sheet = clean_text_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## # Parse the survey sheet while generating a survey in our json format: row_number = 1 # We start at 1 because the column header row is not included in the survey sheet (presumably). # A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] # If a group has a table-list appearance flag this will be set to the name of the list table_list = None # For efficiency we compile all the regular expressions that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))$") begin_control_regex = re.compile( r"^(?P<begin>begin)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$" ) select_regexp = re.compile( r"^(?P<select_command>(" + "|".join(select_aliases.keys()) + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$" ) cascading_regexp = re.compile( r"^(?P<cascading_command>(" + "|".join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$" ) for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] # Disabled should probably be first so the attributes below can be disabled. if u"disabled" in row: warnings.append( rowFormatString % row_number + " The 'disabled' column header is not part of the current spec." + "We recommend using relevant instead." ) disabled = row.pop(u"disabled") if yes_no_aliases.get(disabled): continue # skip empty rows if len(row) == 0: continue # Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, then its a comment row, and we skip it with warning if not ((constants.NAME in row) and (constants.LABEL in row)): warnings.append( rowFormatString % row_number + " Row without name, text, or label is being skipped:\n" + str(row) ) continue raise PyXFormError(rowFormatString % row_number + " Question with no type.") continue # Check if the question is actually a setting specified on the survey sheet settings_type = settings_header_aliases.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue # Try to parse question as a end control statement (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = control_aliases[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( rowFormatString % row_number + " Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type) ) stack.pop() table_list = None continue # Make sure the row has a valid name if not constants.NAME in row: # TODO: It could be slick if had nameless groups generate a flat model # with only a body element. if row["type"] == "note": # autogenerate names for notes without them row["name"] = "generated_note_name_" + str(row_number) else: raise PyXFormError(rowFormatString % row_number + " Question or group with no name.") question_name = unicode(row[constants.NAME]) if not is_valid_xml_tag(question_name): error_message = rowFormatString % row_number error_message += " Invalid question name [" + question_name + "]" error_message += "Names must begin with a letter, colon, or underscore." error_message += "Subsequent characters can include numbers, dashes, and periods." raise PyXFormError(error_message) if ( constants.LABEL not in row and row.get(constants.MEDIA) is None and question_type not in label_optional_types ): # TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned about false positives. warnings.append(rowFormatString % row_number + " Question has no label: " + str(row)) # Try to parse question as begin control statement (i.e. begin loop/repeat/group): begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: # Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question. # parent_children_array will then be set to its children array (so following questions are nested under it) # until an end command is encountered. control_type = control_aliases[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): # TODO: Perhaps warn and make repeat into a group? raise PyXFormError(rowFormatString % row_number + " Repeat loop without list name.") list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError( rowFormatString % row_number + " List name not in columns sheet: " + list_name ) new_json_dict[constants.COLUMNS] = choices[list_name] # Code to deal with table_list appearance flags (for groups of selects) if new_json_dict.get(u"control", {}).get(u"appearance") == constants.TABLE_LIST: table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" # Generate a note label element so hints and labels # work as expected in table-lists. # see https://github.com/modilabs/pyxform/issues/62 if "label" in new_json_dict or "hint" in new_json_dict: generated_label_element = { "type": "note", "name": "generated_table_list_label_" + str(row_number), } if "label" in new_json_dict: generated_label_element[constants.LABEL] = new_json_dict[constants.LABEL] del new_json_dict[constants.LABEL] if "hint" in new_json_dict: generated_label_element["hint"] = new_json_dict["hint"] del new_json_dict["hint"] child_list.append(generated_label_element) parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError(rowFormatString % row_number + " Cascading select needs a name.") cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level) for c in cascading_json: parent_children_array.append(c) continue # so the row isn't put in as is # Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = select_aliases[parse_dict["select_command"]] list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError(rowFormatString % row_number + " List name not in choices sheet: " + list_name) # Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if " " in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]" ) specify_other_question = None if parse_dict.get("specify_other") is not None: select_type += u" or specify other" # #With this code we no longer need to handle or_other questions in survey builder. # #However, it depends on being able to use choice filters and xpath expressions that return empty sets. # choices[list_name].append( # { # 'name': 'other', # 'label': {default_language : 'Other'}, # 'orOther': 'true', # }) # or_other_xpath = 'isNull(orOther)' # if 'choice_filter' in row: # row['choice_filter'] += ' or ' + or_other_xpath # else: # row['choice_filter'] = or_other_xpath # # specify_other_question = \ # { # 'type':'text', # 'name': row['name'] + '_specify_other', # 'label':'Specify Other for:\n"' + row['label'] + '"', # 'bind' : {'relevant': "selected(../%s, 'other')" % row['name']}, # } new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type new_json_dict["itemset"] = list_name if row.get("choice_filter"): json_dict["choices"] = choices else: new_json_dict[constants.CHOICES] = choices[list_name] # Code to deal with table_list appearance flags (for groups of selects) if table_list is not None: if not isinstance(table_list, basestring): # Then this row is the first select in a table list table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number), # Adding row number for uniqueness constants.CONTROL: {u"appearance": u"label"}, constants.CHOICES: choices[list_name], # Do we care about filtered selects in table lists? #'itemset' : list_name, } parent_children_array.append(table_list_header) if table_list <> list_name: error_message = rowFormatString % row_number error_message += ( " Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name ) raise PyXFormError(error_message) control = new_json_dict[u"control"] = new_json_dict.get(u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) if specify_other_question: parent_children_array.append(specify_other_question) continue # TODO: Consider adding some question_type validation here. # Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0])) # Automatically add an instanceID element: if yes_no_aliases.get(settings.get("omit_instanceID")): if settings.get("public_key"): raise PyXFormError("Cannot omit instanceID, it is required for encryption.") else: meta_element = { "name": "meta", "type": "group", "control": {"bodyless": True}, "children": [ { "name": "instanceID", "bind": {"readonly": "true()", "calculate": "concat('uuid:', uuid())"}, "type": "calculate", } ], } noop, survey_children_array = stack[0] survey_children_array.append(meta_element) # print_pyobj_to_json(json_dict) return json_dict
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ if warnings is None: #Set warnings to a list that will be discarded. warnings = [] #Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) #We check for double columns to determine whether to use them or single colons to delimit grouped headers. #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg), #so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers(workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects. #It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], u"false()") == u"true()" #Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) json_dict = { constants.TYPE : constants.SURVEY, constants.NAME : form_name, constants.TITLE : id_string, constants.ID_STRING : id_string, constants.DEFAULT_LANGUAGE : default_language, constants.CHILDREN : [] } #Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers(choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language) combined_lists = group_dictionaries_by_key(choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {}) ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] #Process the headers: survey_sheet = clean_unicode_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## #Parse the survey sheet while generating a survey in our json format: row_number = 1 #We start at 1 because the column header row is not included in the survey sheet (presumably). #A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] #If a group has a table-list appearance flag this will be set to the name of the list table_list = None begin_table_list = False #For efficiency we compile all the regular expressions that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile(r"^(?P<select_command>(" + '|'.join(select_aliases.keys()) + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$") cascading_regexp = re.compile(r"^(?P<cascading_command>(" + '|'.join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$") for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] #Disabled should probably be first so the attributes below can be disabled. if u"disabled" in row: warnings.append("The 'disabled' column header is not part of the current spec. We recommend using relevant instead.") disabled = row.pop(u"disabled") if disabled in yes_no_aliases: disabled = yes_no_aliases[disabled] if disabled == 'true()': continue #skip empty rows if len(row) == 0: continue #Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, then its a comment row, and we skip it with warning if not ((constants.NAME in row) and (constants.LABEL in row)): warnings.append("Row wihtout name, text, or label is being skipped " + str(row_number) + ": " + str(row)) continue raise PyXFormError("Question with no type on row " + str(row_number)) continue #Check if the question is actually a setting specified on the survey sheet settings_type = settings_header_aliases.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue #Try to parse question as a end control statement (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = control_aliases[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError("Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue #Make sure the question has a valid name question_name = unicode(row.get(constants.NAME)) if not question_name: raise PyXFormError("Question with no name on row " + str(row_number)) if not is_valid_xml_tag(question_name): error_message = "Invalid question name [" + question_name + "] on row " + str(row_number) + "\n" error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in label_optional_types: #TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned about false positives. warnings.append("Warning unlabeled question in row " + str(row_number) + ": " + str(row)) #Try to parse question as begin control statement (i.e. begin loop/repeat/group: begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question. #parent_children_array will then be set to its children array (so following questions are nested under it) #until an end command is encountered. control_type = control_aliases[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): #TODO: Perhaps warn and make repeat into a group? raise PyXFormError("Repeat without list name " + " Error on row: " + str(row_number)) list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in columns sheet: " + list_name + " Error on row: " + str(row_number)) new_json_dict[constants.COLUMNS] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if new_json_dict.get(u"control",{}).get(u"appearance") == constants.TABLE_LIST: begin_table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError("Cascading select needs a name. Error on row: %s" % row_number) cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level) for c in cascading_json: parent_children_array.append(c) continue # so the row isn't put in as is #Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = select_aliases[parse_dict["select_command"]] list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in choices sheet: " + list_name + " Error on row: " + str(row_number)) #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError("Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") if parse_dict.get("specify_other") is not None: select_type += u" or specify other" new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type new_json_dict[constants.CHOICES] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if table_list or begin_table_list: if begin_table_list: #If this row is the first select in a table list table_list = list_name table_list_header = { constants.TYPE : select_type, constants.NAME : "reserved_name_for_field_list_labels_" + str(row_number), #Adding row number for uniqueness constants.CONTROL : { u"appearance" : u"label" }, constants.CHOICES : choices[list_name] } parent_children_array.append(table_list_header) begin_table_list = False if table_list <> list_name: error_message = "Error on row: " + str(row_number) + "\n" error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[u"control"] = new_json_dict.get(u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) continue #TODO: Consider adding some question_type validation here. #Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("unmatched begin statement: " + str(stack[-1][0])) #print_pyobj_to_json(json_dict) return json_dict