Exemplo n.º 1
0
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when there is no translation available for some itext element.
       Because of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language.
       If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to
    
    returns a nested dictionary equivalent to the format specified in the json form spec.
    """
    if warnings is None:
        # Set warnings to a list that will be discarded.
        warnings = []

    rowFormatString = "[row : %s]"

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons
    )
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)

    # add_none_option is a boolean that when true, indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.DEFAULT_LANGUAGE: default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: [],
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    ########### Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language
    )

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language)

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME
    )

    choices = combined_lists
    # Make sure all the options have the required properties:
    for list_name, options in choices.items():
        for option in options:
            if "name" not in option:
                info = "[list_name : " + list_name + "]"
                raise PyXFormError("On the choices sheet there is a option with no name. " + info)
            if "label" not in option:
                info = "[list_name : " + list_name + "]"
                warnings.append("On the choices sheet there is a option with no label. " + info)
    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {})

    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = yes_no_aliases.get(settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))$")
    begin_control_regex = re.compile(
        r"^(?P<begin>begin)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$"
    )
    select_regexp = re.compile(
        r"^(?P<select_command>("
        + "|".join(select_aliases.keys())
        + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$"
    )
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>(" + "|".join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$"
    )
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]

        # Disabled should probably be first so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                rowFormatString % row_number
                + " The 'disabled' column header is not part of the current spec."
                + "We recommend using relevant instead."
            )
            disabled = row.pop(u"disabled")
            if yes_no_aliases.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing, then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) and (constants.LABEL in row)):
                warnings.append(
                    rowFormatString % row_number + " Row without name, text, or label is being skipped:\n" + str(row)
                )
                continue
            raise PyXFormError(rowFormatString % row_number + " Question with no type.")
            continue

        # Check if the question is actually a setting specified on the survey sheet
        settings_type = settings_header_aliases.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = control_aliases[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        rowFormatString % row_number
                        + " Unmatched end statement. Previous control type: "
                        + str(prev_control_type)
                        + ", Control type: "
                        + str(control_type)
                    )
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if not constants.NAME in row:
            # TODO: It could be slick if had nameless groups generate a flat model
            #      with only a body element.
            if row["type"] == "note":
                # autogenerate names for notes without them
                row["name"] = "generated_note_name_" + str(row_number)
            else:
                raise PyXFormError(rowFormatString % row_number + " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = rowFormatString % row_number
            error_message += " Invalid question name [" + question_name + "]"
            error_message += "Names must begin with a letter, colon, or underscore."
            error_message += "Subsequent characters can include numbers, dashes, and periods."
            raise PyXFormError(error_message)

        if (
            constants.LABEL not in row
            and row.get(constants.MEDIA) is None
            and question_type not in label_optional_types
        ):
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned about false positives.
            warnings.append(rowFormatString % row_number + " Question has no label: " + str(row))

        # Try to parse question as begin control statement (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array (so following questions are nested under it)
                # until an end command is encountered.
                control_type = control_aliases[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(rowFormatString % row_number + " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            rowFormatString % row_number + " List name not in columns sheet: " + list_name
                        )
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Code to deal with table_list appearance flags (for groups of selects)
                if new_json_dict.get(u"control", {}).get(u"appearance") == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if "label" in new_json_dict or "hint" in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name": "generated_table_list_label_" + str(row_number),
                        }
                        if "label" in new_json_dict:
                            generated_label_element[constants.LABEL] = new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if "hint" in new_json_dict:
                            generated_label_element["hint"] = new_json_dict["hint"]
                            del new_json_dict["hint"]
                        child_list.append(generated_label_element)

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(rowFormatString % row_number + " Cascading select needs a name.")
                cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level)

                for c in cascading_json:
                    parent_children_array.append(c)
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = select_aliases[parse_dict["select_command"]]
                list_name = parse_dict["list_name"]

                if list_name not in choices:
                    raise PyXFormError(rowFormatString % row_number + " List name not in choices sheet: " + list_name)

                # Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if " " in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added to multiple choice selects. See ["
                                + choice[constants.NAME]
                                + "] in ["
                                + list_name
                                + "]"
                            )

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                #                    #With this code we no longer need to handle or_other questions in survey builder.
                #                    #However, it depends on being able to use choice filters and xpath expressions that return empty sets.
                #                    choices[list_name].append(
                #                        {
                #                            'name': 'other',
                #                            'label': {default_language : 'Other'},
                #                            'orOther': 'true',
                #                        })
                #                    or_other_xpath = 'isNull(orOther)'
                #                    if 'choice_filter' in row:
                #                        row['choice_filter'] += ' or ' + or_other_xpath
                #                    else:
                #                        row['choice_filter'] = or_other_xpath
                #
                #                    specify_other_question = \
                #                        {
                #                          'type':'text',
                #                          'name': row['name'] + '_specify_other',
                #                          'label':'Specify Other for:\n"' + row['label'] + '"',
                #                          'bind' : {'relevant': "selected(../%s, 'other')" % row['name']},
                #                        }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type
                new_json_dict["itemset"] = list_name
                if row.get("choice_filter"):
                    json_dict["choices"] = choices
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags (for groups of selects)
                if table_list is not None:
                    if not isinstance(table_list, basestring):  # Then this row is the first select in a table list
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                            constants.NAME: "reserved_name_for_field_list_labels_"
                            + str(row_number),  # Adding row number for uniqueness
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            # Do we care about filtered selects in table lists?
                            #'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list <> list_name:
                        error_message = rowFormatString % row_number
                        error_message += (
                            " Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name
                        )
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"

                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    # Automatically add an instanceID element:
    if yes_no_aliases.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError("Cannot omit instanceID, it is required for encryption.")
    else:
        meta_element = {
            "name": "meta",
            "type": "group",
            "control": {"bodyless": True},
            "children": [
                {
                    "name": "instanceID",
                    "bind": {"readonly": "true()", "calculate": "concat('uuid:', uuid())"},
                    "type": "calculate",
                }
            ],
        }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)

    return json_dict
Exemplo n.º 2
0
def workbook_to_json(workbook_dict,
                     form_name=None,
                     default_language=u"default",
                     warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when there is no translation available for some itext element.
       Because of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language.
       If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to
    
    returns a nested dictionary equivalent to the format specified in the json form spec.
    """
    if warnings is None:
        #Set warnings to a list that will be discarded.
        warnings = []

    #Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    #We check for double columns to determine whether to use them or single colons to delimit grouped headers.
    #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg),
    #so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []), settings_header_aliases,
        use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(constants.DEFAULT_LANGUAGE,
                                    default_language)

    #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects.
    #It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = yes_no_aliases.get(
            settings[u"add_none_option"], u"false()") == u"true()"

    #Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.DEFAULT_LANGUAGE: default_language,
        constants.CHILDREN: []
    }
    #Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    ########### Choices sheet ##########
    #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, list_header_aliases, use_double_colons,
        default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet,
                                              list_header_aliases,
                                              use_double_colons,
                                              default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet,
                                              list_header_aliases,
                                              use_double_colons,
                                              default_language)

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists

    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {})

    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " +
                           constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    #Process the headers:
    survey_sheet = clean_unicode_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet,
                                             survey_header_aliases,
                                             use_double_colons,
                                             default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################

    #Parse the survey sheet while generating a survey in our json format:

    row_number = 1  #We start at 1 because the column header row is not included in the survey sheet (presumably).
    #A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    #If a group has a table-list appearance flag this will be set to the name of the list
    table_list = None
    begin_table_list = False
    #For efficiency we compile all the regular expressions that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(control_aliases.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(control_aliases.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(select_aliases.keys()) +
        r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$"
    )
    cascading_regexp = re.compile(r"^(?P<cascading_command>(" +
                                  '|'.join(cascading_aliases.keys()) +
                                  r")) (?P<cascading_level>\S+)?$")
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]

        #Disabled should probably be first so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                "The 'disabled' column header is not part of the current spec. We recommend using relevant instead."
            )
            disabled = row.pop(u"disabled")
            if disabled in yes_no_aliases:
                disabled = yes_no_aliases[disabled]
            if disabled == 'true()':
                continue

        #skip empty rows
        if len(row) == 0: continue

        #Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing, then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) and (constants.LABEL in row)):
                warnings.append(
                    "Row wihtout name, text, or label is being skipped " +
                    str(row_number) + ": " + str(row))
                continue
            raise PyXFormError("Question with no type on row " +
                               str(row_number))
            continue

        #Check if the question is actually a setting specified on the survey sheet
        settings_type = settings_header_aliases.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        #Try to parse question as a end control statement (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = control_aliases[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        "Unmatched end statement. Previous control type: " +
                        str(prev_control_type) + ", Control type: " +
                        str(control_type))
                stack.pop()
                table_list = None
                continue

        #Make sure the question has a valid name
        question_name = unicode(row.get(constants.NAME))
        if not question_name:
            raise PyXFormError("Question with no name on row " +
                               str(row_number))
        if not is_valid_xml_tag(question_name):
            error_message = "Invalid question name [" + question_name + "] on row " + str(
                row_number) + "\n"
            error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in label_optional_types:
            #TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned about false positives.
            warnings.append("Warning unlabeled question in row " +
                            str(row_number) + ": " + str(row))

        #Try to parse question as begin control statement (i.e. begin loop/repeat/group:
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question.
                #parent_children_array will then be set to its children array (so following questions are nested under it)
                #until an end command is encountered.
                control_type = control_aliases[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        #TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError("Repeat without list name " +
                                           " Error on row: " + str(row_number))
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError("List name not in columns sheet: " +
                                           list_name + " Error on row: " +
                                           str(row_number))
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                #Code to deal with table_list appearance flags (for groups of selects)
                if new_json_dict.get(
                        u"control",
                    {}).get(u"appearance") == constants.TABLE_LIST:
                    begin_table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        "Cascading select needs a name. Error on row: %s" %
                        row_number)
                cascading_json = get_cascading_json(cascading_choices,
                                                    cascading_prefix,
                                                    cascading_level)

                for c in cascading_json:
                    parent_children_array.append(c)
                continue  # so the row isn't put in as is

        #Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = select_aliases[parse_dict["select_command"]]
                list_name = parse_dict["list_name"]

                if list_name not in choices:
                    raise PyXFormError("List name not in choices sheet: " +
                                       list_name + " Error on row: " +
                                       str(row_number))

                #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added to multiple choice selects. See ["
                                + choice[constants.NAME] + "] in [" +
                                list_name + "]")

                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type
                new_json_dict[constants.CHOICES] = choices[list_name]

                #Code to deal with table_list appearance flags (for groups of selects)
                if table_list or begin_table_list:
                    if begin_table_list:  #If this row is the first select in a table list
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE:
                            select_type,
                            constants.NAME:
                            "reserved_name_for_field_list_labels_" +
                            str(row_number),  #Adding row number for uniqueness
                            constants.CONTROL: {
                                u"appearance": u"label"
                            },
                            constants.CHOICES:
                            choices[list_name]
                        }
                        parent_children_array.append(table_list_header)
                        begin_table_list = False

                    if table_list <> list_name:
                        error_message = "Error on row: " + str(
                            row_number) + "\n"
                        error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = new_json_dict.get(
                        u"control", {})
                    control[u"appearance"] = "list-nolabel"

                parent_children_array.append(new_json_dict)
                continue

        #TODO: Consider adding some question_type validation here.

        #Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("unmatched begin statement: " + str(stack[-1][0]))
    #print_pyobj_to_json(json_dict)
    return json_dict
Exemplo n.º 3
0
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when there is no translation available for some itext element.
       Because of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language.
       If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to
    
    returns a nested dictionary equivalent to the format specified in the json form spec.
    """
    if warnings is None:
        #Set warnings to a list that will be discarded.
        warnings = []
    
    #Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    #We check for double columns to determine whether to use them or single colons to delimit grouped headers.
    #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg),
    #so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)
    
    #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
    
    default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)
    
    #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects.
    #It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], u"false()") == u"true()"
    
    #Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    json_dict = {
       constants.TYPE : constants.SURVEY,
       constants.NAME : form_name,
       constants.TITLE : id_string,
       constants.ID_STRING : id_string,
       constants.DEFAULT_LANGUAGE : default_language,
       constants.CHILDREN : []
    }
    #Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)
    
    ########### Choices sheet ##########
    #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language)
    
    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language)
    
    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language)
    
    combined_lists = group_dictionaries_by_key(choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME)
    
                
    choices = combined_lists

    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {})
    
    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    #Process the headers:
    survey_sheet = clean_unicode_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################
    
    #Parse the survey sheet while generating a survey in our json format:
    
    row_number = 1 #We start at 1 because the column header row is not included in the survey sheet (presumably).
    #A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    #If a group has a table-list appearance flag this will be set to the name of the list
    table_list = None
    begin_table_list = False
    #For efficiency we compile all the regular expressions that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>("
                                   + '|'.join(control_aliases.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>("
                                     + '|'.join(control_aliases.keys())
                                     + r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(r"^(?P<select_command>("
                               + '|'.join(select_aliases.keys())
                               + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(r"^(?P<cascading_command>("
                               + '|'.join(cascading_aliases.keys())
                               + r")) (?P<cascading_level>\S+)?$")
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        
        #Disabled should probably be first so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append("The 'disabled' column header is not part of the current spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if disabled in yes_no_aliases:
                disabled = yes_no_aliases[disabled]
            if disabled == 'true()':
                continue
        
        #skip empty rows
        if len(row) == 0: continue
        
        #Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing, then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) and (constants.LABEL in row)):
                    warnings.append("Row wihtout name, text, or label is being skipped " + str(row_number) + ": " + str(row))
                    continue
            raise PyXFormError("Question with no type on row " + str(row_number))
            continue
        
        #Check if the question is actually a setting specified on the survey sheet
        settings_type = settings_header_aliases.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue
        
        #Try to parse question as a end control statement (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = control_aliases[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError("Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type))
                stack.pop()
                table_list = None
                continue
        
        #Make sure the question has a valid name
        question_name = unicode(row.get(constants.NAME))
        if not question_name:
            raise PyXFormError("Question with no name on row " + str(row_number))
        if not is_valid_xml_tag(question_name):
            error_message = "Invalid question name [" + question_name + "] on row " + str(row_number) + "\n"
            error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods."
            raise PyXFormError(error_message)
        
        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in label_optional_types:
            #TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned about false positives.
            warnings.append("Warning unlabeled question in row " + str(row_number) + ": " + str(row))
        
        #Try to parse question as begin control statement (i.e. begin loop/repeat/group:
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question.
                #parent_children_array will then be set to its children array (so following questions are nested under it)
                #until an end command is encountered.
                control_type = control_aliases[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        #TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError("Repeat without list name " + " Error on row: " + str(row_number))
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError("List name not in columns sheet: " + list_name + " Error on row: " + str(row_number))
                    new_json_dict[constants.COLUMNS] = choices[list_name]
                
                #Code to deal with table_list appearance flags (for groups of selects)
                if new_json_dict.get(u"control",{}).get(u"appearance") == constants.TABLE_LIST:
                    begin_table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    
                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError("Cascading select needs a name. Error on row: %s" % row_number)
                cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level)
                
                for c in cascading_json: parent_children_array.append(c)
                continue # so the row isn't put in as is

        #Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = select_aliases[parse_dict["select_command"]]
                list_name = parse_dict["list_name"]

                if list_name not in choices:
                    raise PyXFormError("List name not in choices sheet: " + list_name + " Error on row: " + str(row_number))

                #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                                raise PyXFormError("Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]")

                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type
                new_json_dict[constants.CHOICES] = choices[list_name]
                
                #Code to deal with table_list appearance flags (for groups of selects)
                if table_list or begin_table_list:
                    if begin_table_list: #If this row is the first select in a table list
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE : select_type,
                            constants.NAME : "reserved_name_for_field_list_labels_" + str(row_number), #Adding row number for uniqueness
                            constants.CONTROL : { u"appearance" : u"label" },
                            constants.CHOICES : choices[list_name]
                        }
                        parent_children_array.append(table_list_header)
                        begin_table_list = False

                    if table_list <> list_name:
                        error_message = "Error on row: " + str(row_number) + "\n"
                        error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name
                        raise PyXFormError(error_message)
                    
                    control = new_json_dict[u"control"] = new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                        
                parent_children_array.append(new_json_dict)
                continue
            
        #TODO: Consider adding some question_type validation here.
        
        #Put the row in the json dict as is:
        parent_children_array.append(row)
    
    if len(stack) != 1:
        raise PyXFormError("unmatched begin statement: " + str(stack[-1][0]))
    #print_pyobj_to_json(json_dict)
    return json_dict