Exemplo n.º 1
0
 def validate(self):
     if not is_valid_xml_tag(self.name):
         msg = (
             "The name '%s' is an invalid xml tag. Names must begin with a letter, colon, or underscore, subsequent characters can include numbers, dashes, and periods."
             % self.name
         )
         raise PyXFormError(msg)
Exemplo n.º 2
0
def workbook_to_json(workbook_dict,
                     form_name=None,
                     default_language=u"default",
                     warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    """
    # ensure required headers are present
    survey_header_sheet = u'%s_header' % constants.SURVEY
    if survey_header_sheet in workbook_dict:
        survey_headers = workbook_dict.get(survey_header_sheet)
        if not survey_headers:
            raise PyXFormError(u"The survey sheet is missing column headers.")
        tmp = [h for h in [u'type', u'name'] if h in survey_headers[0].keys()]
        if tmp.__len__() is not 2:
            raise PyXFormError(u"The survey sheet must have on the first row"
                               u" name and type columns.")
        del workbook_dict[survey_header_sheet]
    choices_header_sheet = u'%s_header' % constants.CHOICES
    if choices_header_sheet in workbook_dict:
        choices_headers = workbook_dict.get(choices_header_sheet)
        if not choices_headers:
            raise PyXFormError(u"The choices sheet is missing column headers.")
        choices_header_list = [u'list name', u'list_name', u'name']
        tmp = [
            h for h in choices_header_list if h in choices_headers[0].keys()
        ]
        if tmp.__len__() is not 2:
            raise PyXFormError(u"The choices sheet must have on the first row"
                               u" list_name and name.")
        del workbook_dict[choices_header_sheet]
    if warnings is None:
        # Set warnings to a list that will be discarded.
        warnings = []

    rowFormatString = '[row : %s]'

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them
    # or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace
    # syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects
    # (settings, choices, survey_sheet):
    # ########## Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []), aliases.settings_header,
        use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(constants.DEFAULT_LANGUAGE,
                                    default_language)

    # add_none_option is a boolean that when true,
    # indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    xml_root = settings.get(constants.XML_ROOT, form_name)
    json_dict = {
        constants.TYPE:
        constants.SURVEY,
        constants.NAME:
        xml_root,
        constants.TITLE:
        id_string,
        constants.ID_STRING:
        id_string,
        constants.SMS_KEYWORD:
        sms_keyword,
        constants.DEFAULT_LANGUAGE:
        default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause
        # problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    # ########## Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated,
    # but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header, use_double_colons,
        default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet,
                                              aliases.list_header,
                                              use_double_colons,
                                              default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet,
                                              aliases.list_header,
                                              use_double_colons,
                                              default_language)
    # ########## Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists
    # Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if 'name' not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                warnings.append(
                    "On the choices sheet there is a option with no label. " +
                    info)
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
            for headername in option.keys():
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnedabout.add(headername)
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" + headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    # ########## Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " +
                           constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet,
                                             aliases.survey_header,
                                             use_double_colons,
                                             default_language)
    survey_sheet = dealias_types(survey_sheet)

    osm_sheet = workbook_dict.get(constants.OSM, [])
    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
    # #################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
    #                 included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag
    # this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(aliases.control.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
        r")) (?P<list_name>\S+)" +
        "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(r"^(?P<cascading_command>(" +
                                  '|'.join(aliases.cascading.keys()) +
                                  r")) (?P<cascading_level>\S+)?$")
    osm_regexp = re.compile(r"(?P<osm_command>(" +
                            '|'.join(aliases.osm.keys()) +
                            ')) (?P<list_name>\S+)')

    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        # Disabled should probably be first
        # so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                rowFormatString % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            # then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                warnings.append(rowFormatString % row_number +
                                " Row without name,"
                                " text, or label is being skipped:\n" +
                                str(row))
                continue
            raise PyXFormError(rowFormatString % row_number +
                               " Question with no type.\n" + str(row))
            continue

        if question_type == 'calculate':
            calculation = row.get('bind', {}).get('calculate')
            if not calculation:
                raise PyXFormError(rowFormatString % row_number +
                                   " Missing calculation.")

        # Check if the question is actually a setting specified
        # on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement
        # (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = aliases.control[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        rowFormatString % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) + ", Control type: " +
                        str(control_type))
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if constants.NAME not in row:
            if row['type'] == 'note':
                # autogenerate names for notes without them
                row['name'] = "generated_note_name_" + str(row_number)
            # elif 'group' in row['type'].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
            else:
                raise PyXFormError(rowFormatString % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = rowFormatString % row_number
            error_message += " Invalid question name [" + question_name + "]"
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers,"\
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
            warnings.append(rowFormatString % row_number +
                            " Question has no label: " + str(row))

        # Try to parse question as begin control statement
        # (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type,
                # and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array
                # (so following questions are nested under it)
                # until an end command is encountered.
                control_type = aliases.control[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(rowFormatString % row_number +
                                           " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            rowFormatString % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Generate a new node for the jr:count column so
                # xpath expressions can be used.
                repeat_count_expression = new_json_dict.get('control',
                                                            {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict['name'] + "_count"
                    parent_children_array.append({
                        "name": generated_node_name,
                        "bind": {
                            "readonly": "true()",
                            "calculate": repeat_count_expression,
                        },
                        "type": "calculate",
                    })
                    new_json_dict['control']['jr:count'] = \
                        "${" + generated_node_name + "}"

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name":
                            "generated_table_list_label_" + str(row_number)
                        }
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                                new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                                new_json_dict['hint']
                            del new_json_dict['hint']
                        child_list.append(generated_label_element)

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(rowFormatString % row_number +
                                       " Cascading select needs a name.")
                # cascading_json = get_cascading_json(
                # cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or\
                        'questions' not in cascading_choices[0]:
                    raise PyXFormError("Found a cascading_select " +
                                       cascading_level + ", but could not"
                                       " find " + cascading_level +
                                       "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if 'bind' in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq['bind'] = row['bind']

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(lambda x: replace_prefix(x, prefix),
                                           v)
                        return d

                    parent_children_array.append(
                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.select[parse_dict["select_command"]]
                if select_type == 'select one external'\
                        and 'choice_filter' not in row:
                    warnings.append(rowFormatString % row_number +
                                    u" select one external is only meant for"
                                    u" filtered selects.")
                    select_type = aliases.select['select_one']
                list_name = parse_dict["list_name"]

                if list_name not in choices\
                   and select_type != 'select one external':
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the choices sheet name is "
                            u"all in small caps.")
                    raise PyXFormError(rowFormatString % row_number +
                                       " List name not in choices sheet: " +
                                       list_name)

                # Validate select_multiple choice names by making sure
                # they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" + list_name +
                                "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    # With this code we no longer need to handle or_other
                    # questions in survey builder.
                    # However, it depends on being able to use choice filters
                    # and xpath expressions that return empty sets.
                    # choices[list_name].append(
                    # {
                    #     'name': 'other',
                    #     'label': {default_language : 'Other'},
                    #     'orOther': 'true',
                    # })
                    # or_other_xpath = 'isNull(orOther)'
                    # if 'choice_filter' in row:
                    #   row['choice_filter'] += ' or ' + or_other_xpath
                    # else:
                    #   row['choice_filter'] = or_other_xpath

                    # specify_other_question = \
                    # {
                    #       'type':'text',
                    #       'name': row['name'] + '_specify_other',
                    #       'label':
                    #        'Specify Other for:\n"' + row['label'] + '"',
                    #       'bind' : {'relevant':
                    #                "selected(../%s, 'other')" % row['name']},
                    #     }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                    else:
                        new_json_dict['itemset'] = list_name
                        json_dict['choices'] = choices
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                if table_list is not None:
                    # Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE:
                            select_type,
                            constants.NAME:
                            "reserved_name_for_field_list_labels_" +
                            str(row_number
                                ),  # Adding row number for uniqueness # noqa
                            constants.CONTROL: {
                                u"appearance": u"label"
                            },
                            constants.CHOICES:
                            choices[list_name],
                            # Do we care about filtered selects in table lists?
                            # 'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list != list_name:
                        error_message = rowFormatString % row_number
                        error_message += " Badly formatted table list,"\
                                         " list names don't match: " +\
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = \
                        new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # Try to parse question as osm:
        osm_parse = osm_regexp.search(question_type)
        if osm_parse:
            parse_dict = osm_parse.groupdict()
            new_dict = row.copy()
            new_dict['type'] = constants.OSM

            if parse_dict.get('list_name') is not None:
                tags = osm_tags.get(parse_dict.get('list_name'))
                for tag in tags:
                    if osm_tags.get(tag.get('name')):
                        tag['choices'] = osm_tags.get(tag.get('name'))
                new_dict['tags'] = tags

            parent_children_array.append(new_dict)

            continue

        # range question_type
        if question_type == 'range':
            new_dict = process_range_question_type(row)
            parent_children_array.append(new_dict)
            continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    if settings.get('flat', False):
        # print "Generating flattened instance..."
        add_flat_annotations(stack[0][1])

    meta_children = []

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
    else:
        # Automatically add an instanceID element:
        meta_children.append({
            "name": "instanceID",
            "bind": {
                "readonly":
                "true()",
                "calculate":
                settings.get("instance_id", "concat('uuid:', uuid())"),
            },
            "type": "calculate",
        })

    if 'instance_name' in settings:
        # Automatically add an instanceName element:
        meta_children.append({
            "name": "instanceName",
            "bind": {
                "calculate": settings['instance_name']
            },
            "type": "calculate",
        })

    # PMA2020 Logging BEGIN
    if 'logging' in settings and aliases.yes_no.get(settings.get('logging')) \
            is True:
        meta_children.append({
            "name": "logging",
            "bind": {
                "calculate": "string('log.txt')"
            },
            "type": "hidden attachment"
        })
    # PMA2020 Logging END

    if len(meta_children) > 0:
        meta_element = \
            {
                "name": "meta",
                "type": "group",
                "control": {
                    "bodyless": True
                },
                "children": meta_children
            }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)
    return json_dict
Exemplo n.º 3
0
 def validate(self):
     if not is_valid_xml_tag(self.name):
         msg = "The name '%s' is an invalid xml tag. Names must begin with a letter, colon, or underscore, subsequent characters can include numbers, dashes, and periods." % self.name
         raise PyXFormError(msg)
Exemplo n.º 4
0
def workbook_to_json(
        workbook_dict, form_name=None,
        default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    """
    # ensure required headers are present
    survey_header_sheet = u'%s_header' % constants.SURVEY
    if survey_header_sheet in workbook_dict:
        survey_headers = workbook_dict.get(survey_header_sheet)
        if not survey_headers:
            raise PyXFormError(u"The survey sheet is missing column headers.")
        tmp = [h for h in [constants.TYPE, constants.NAME] if h in survey_headers[0].keys()]
        if tmp.__len__() is not 2:
            raise PyXFormError(u"The survey sheet must have on the first row"
                               u" name and type columns.")
        del workbook_dict[survey_header_sheet]
    choices_header_sheet = u'%s_header' % constants.CHOICES
    if choices_header_sheet in workbook_dict:
        choices_headers = workbook_dict.get(choices_header_sheet)
        if not choices_headers:
            raise PyXFormError(u"The choices sheet is missing column headers.")
        choices_header_list = [u'list name', u'list_name', constants.NAME]
        tmp = [
            h for h in choices_header_list if h in choices_headers[0].keys()]
        if tmp.__len__() is not 2:
            raise PyXFormError(u"The choices sheet must have on the first row"
                               u" list_name and name.")
        del workbook_dict[choices_header_sheet]
    if warnings is None:
        #Set warnings to a list that will be discarded.
        warnings = []

    rowFormatString = '[row : %s]'

    #Make sure the passed in vars are unicode
    if form_name:
        form_name = unicode(form_name)
    default_language = unicode(default_language)

    #We check for double columns to determine whether to use them
    #or single colons to delimit grouped headers.
    #Single colons are bad because they conflict with with the xform namespace
    #syntax (i.e. jr:constraintMsg),
    #so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    #Break the spreadsheet dict into easier to access objects
    #(settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []),
        aliases.settings_header, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(
        constants.DEFAULT_LANGUAGE, default_language)

    #add_none_option is a boolean that when true,
    #indicates a none option should automatically be added to selects.
    #It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    #Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    form_name= form_name if form_name else id_string
    title= settings.get(constants.TITLE, id_string)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: title,
        constants.ID_STRING: id_string,
        constants.SMS_KEYWORD: sms_keyword,
        constants.DEFAULT_LANGUAGE: default_language,
        #By default the version is based on the date and time yyyymmddhh
        #Leaving default version out for now since it might cause
        #problems for formhub.
        #constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    }
    #Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    ########### Choices sheet ##########
    #Columns and "choices and columns" sheets are deprecated,
    #but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(
        columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(
        choices_sheet, aliases.list_header, use_double_colons,
        default_language)
    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists
    #Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if constants.NAME not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                warnings.append(
                    "On the choices sheet there is a option with no label. "
                    + info)
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=833&start=200
            for headername in option.keys():
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnedabout.add(headername)
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" +
                                        headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError(
            "You must have a sheet named (case-sensitive): "
            + constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    #Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(
        survey_sheet, aliases.survey_header,
        use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################

    #Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
                    # included in the survey sheet (presumably).
    #A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    #If a group has a table-list appearance flag
    #this will be set to the name of the list
    table_list = None
    #For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>("
                                   + '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>("
                                     + '|'.join(aliases.control.keys())
                                     + r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.multiple_choice.keys())
        + r")) (?P<list_name>\S+)"
        + "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>("
        + '|'.join(aliases.cascading.keys())
        + r")) (?P<cascading_level>\S+)?$")
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        #Disabled should probably be first
        #so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                rowFormatString % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):
                continue

        #skip empty rows
        if len(row) == 0:
            continue

        #Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            #then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                    warnings.append(
                        rowFormatString % row_number + " Row without name,"
                        " text, or label is being skipped:\n" + str(row))
                    continue
            raise PyXFormError(
                rowFormatString % row_number +
                " Question with no type.\n" + str(row))
            continue

        if question_type == constants.CALCULATE_XFORM:
            calculation = row.get(constants.BIND, {}).get(constants.CALCULATE_XFORM)
            if not calculation:
                raise PyXFormError(
                    rowFormatString % row_number + " Missing calculation.")

        #Check if the question is actually a setting specified
        #on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        #Try to parse question as a end control statement
        #(i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and constants.TYPE in parse_dict:
                control_type = aliases.control[parse_dict[constants.TYPE]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        rowFormatString % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) +
                        ", Control type: " + str(control_type))
                stack.pop()
                table_list = None
                continue

        #Make sure the row has a valid name
        if not constants.NAME in row:
            if row[constants.TYPE] == 'note':
                #autogenerate names for notes without them
                row[constants.NAME] = "generated_note_name_" + str(row_number)
            # elif 'group' in row[constants.TYPE].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
            else:
                raise PyXFormError(rowFormatString % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = rowFormatString % row_number
            error_message += " Invalid question name [" + question_name + "]"
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers,"\
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            #TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
            warnings.append(
                rowFormatString % row_number +
                " Question has no label: " + str(row))

        #Try to parse question as begin control statement
        #(i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and constants.TYPE in parse_dict:
                #Create a new json dict with children, and the proper type,
                #and add it to parent_children_array in place of a question.
                #parent_children_array will then be set to its children array
                #(so following questions are nested under it)
                #until an end command is encountered.
                control_type = aliases.control[parse_dict[constants.TYPE]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        #TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(
                            rowFormatString % row_number +
                            " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            rowFormatString % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                #Generate a new node for the jr:count column so
                #xpath expressions can be used.
                repeat_count_expression = new_json_dict.get(
                    constants.CONTROL, {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict[constants.NAME] + "_count"
                    parent_children_array.append({
                        constants.NAME: generated_node_name,
                        constants.BIND: {
                            "readonly": "true()",
                            constants.CALCULATE_XFORM: repeat_count_expression,
                        },
                        constants.TYPE: constants.CALCULATE_XFORM,
                    })
                    new_json_dict[constants.CONTROL]['jr:count'] = \
                        "${" + generated_node_name + "}"

                #Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(constants.CONTROL, {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[constants.CONTROL][u"appearance"] = u"field-list"
                    #Generate a note label element so hints and labels
                    #work as expected in table-lists.
                    #see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            constants.TYPE: "note",
                            constants.NAME:
                            "generated_table_list_label_" + str(row_number)
                        }
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                                new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                                new_json_dict['hint']
                            del new_json_dict['hint']
                        child_list.append(generated_label_element)

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        rowFormatString % row_number +
                        " Cascading select needs a name.")
                #cascading_json = get_cascading_json(
                #cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or\
                        'questions' not in cascading_choices[0]:
                    raise PyXFormError(
                        "Found a cascading_select " +
                        cascading_level + ", but could not"
                        " find " + cascading_level + "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if constants.BIND in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq[constants.BIND] = row[constants.BIND]

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(
                                    lambda x: replace_prefix(x, prefix), v)
                        return d
                    parent_children_array.append(
                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        #Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.multiple_choice[parse_dict["select_command"]]
                if select_type == 'select one external'\
                   and not 'choice_filter' in row:
                    warnings.append(rowFormatString % row_number +
                        u" select one external is only meant for"
                        u" filtered selects.")
                    select_type = aliases.multiple_choice[constants.SELECT_ONE_XLSFORM]
                list_name = parse_dict["list_name"]

                if list_name not in choices\
                   and select_type != 'select one external':
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the \"choices\" sheet name"
                            u" is all in lowercase.")
                    raise PyXFormError(
                        rowFormatString % row_number +
                        " List name not in choices sheet: " + list_name)

                #Validate select_multiple choice names by making sure
                #they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" +
                                list_name + "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
#                    #With this code we no longer need to handle or_other
#                    #questions in survey builder.
#                    #However, it depends on being able to use choice filters
#                    #and xpath expressions that return empty sets.
#                    choices[list_name].append(
#                        {
#                            'name': 'other',
#                            'label': {default_language : 'Other'},
#                            'orOther': 'true',
#                        })
#                    or_other_xpath = 'isNull(orOther)'
#                    if 'choice_filter' in row:
#                        row['choice_filter'] += ' or ' + or_other_xpath
#                    else:
#                        row['choice_filter'] = or_other_xpath
#
#                    specify_other_question = \
#                        {
#                          'type':'text',
#                          'name': row['name'] + '_specify_other',
#                          'label':
#                           'Specify Other for:\n"' + row['label'] + '"',
#                          'bind' : {'relevant':
#                                   "selected(../%s, 'other')" % row['name']},
#                        }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                    else:
                        new_json_dict[constants.ITEMSET_XFORM] = list_name
                        json_dict[constants.CHOICES] = choices
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                #Code to deal with table_list appearance flags
                #(for groups of selects)
                if table_list is not None:
                    #Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                            constants.NAME:
                            "reserved_name_for_field_list_labels_" +
                            str(row_number),  # Adding row number for uniqueness
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            #Do we care about filtered selects in table lists?
                            #'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list <> list_name:
                        error_message = rowFormatString % row_number
                        error_message += " Badly formatted table list,"\
                                         " list names don't match: " +\
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[constants.CONTROL] = \
                        new_json_dict.get(constants.CONTROL, {})
                    control[u"appearance"] = "list-nolabel"
                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        #TODO: Consider adding some question_type validation here.

        #Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))


    if settings.get('flat', False):
        #print "Generating flattened instance..."
        add_flat_annotations(stack[0][1])

    meta_children = []

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get(constants.PUBLIC_KEY):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
    else:
        #Automatically add an instanceID element:
        meta_children.append({
            constants.NAME: "instanceID",
            constants.BIND: {
                "readonly": "true()",
                constants.CALCULATE_XFORM: settings.get(
                    "instance_id", "concat('uuid:', uuid())"),
            },
            constants.TYPE: constants.CALCULATE_XFORM,
        })

    if 'instance_name' in settings:
        #Automatically add an instanceName element:
        meta_children.append({
            constants.NAME: "instanceName",
            constants.BIND: {
                constants.CALCULATE_XFORM: settings['instance_name']
            },
            constants.TYPE: constants.CALCULATE_XFORM,
        })

    if len(meta_children) > 0:
        meta_element = \
            {
                constants.NAME: constants.META_XFORM,
                constants.TYPE: constants.GROUP,
                constants.CONTROL: {
                    "bodyless": True
                },
                constants.CHILDREN: meta_children
            }
        _, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    #print_pyobj_to_json(json_dict)
    return json_dict
Exemplo n.º 5
0
def workbook_to_json(workbook_dict,
                     form_name=None,
                     default_language=u"default",
                     warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when there is no translation available for some itext element.
       Because of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language.
       If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to
    
    returns a nested dictionary equivalent to the format specified in the json form spec.
    """
    if warnings is None:
        #Set warnings to a list that will be discarded.
        warnings = []

    #Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    #We check for double columns to determine whether to use them or single colons to delimit grouped headers.
    #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg),
    #so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []), settings_header_aliases,
        use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(constants.DEFAULT_LANGUAGE,
                                    default_language)

    #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects.
    #It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = yes_no_aliases.get(
            settings[u"add_none_option"], u"false()") == u"true()"

    #Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.DEFAULT_LANGUAGE: default_language,
        constants.CHILDREN: []
    }
    #Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    ########### Choices sheet ##########
    #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, list_header_aliases, use_double_colons,
        default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet,
                                              list_header_aliases,
                                              use_double_colons,
                                              default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet,
                                              list_header_aliases,
                                              use_double_colons,
                                              default_language)

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists

    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {})

    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " +
                           constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    #Process the headers:
    survey_sheet = clean_unicode_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet,
                                             survey_header_aliases,
                                             use_double_colons,
                                             default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################

    #Parse the survey sheet while generating a survey in our json format:

    row_number = 1  #We start at 1 because the column header row is not included in the survey sheet (presumably).
    #A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    #If a group has a table-list appearance flag this will be set to the name of the list
    table_list = None
    begin_table_list = False
    #For efficiency we compile all the regular expressions that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(control_aliases.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(control_aliases.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(select_aliases.keys()) +
        r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$"
    )
    cascading_regexp = re.compile(r"^(?P<cascading_command>(" +
                                  '|'.join(cascading_aliases.keys()) +
                                  r")) (?P<cascading_level>\S+)?$")
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]

        #Disabled should probably be first so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                "The 'disabled' column header is not part of the current spec. We recommend using relevant instead."
            )
            disabled = row.pop(u"disabled")
            if disabled in yes_no_aliases:
                disabled = yes_no_aliases[disabled]
            if disabled == 'true()':
                continue

        #skip empty rows
        if len(row) == 0: continue

        #Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing, then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) and (constants.LABEL in row)):
                warnings.append(
                    "Row wihtout name, text, or label is being skipped " +
                    str(row_number) + ": " + str(row))
                continue
            raise PyXFormError("Question with no type on row " +
                               str(row_number))
            continue

        #Check if the question is actually a setting specified on the survey sheet
        settings_type = settings_header_aliases.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        #Try to parse question as a end control statement (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = control_aliases[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        "Unmatched end statement. Previous control type: " +
                        str(prev_control_type) + ", Control type: " +
                        str(control_type))
                stack.pop()
                table_list = None
                continue

        #Make sure the question has a valid name
        question_name = unicode(row.get(constants.NAME))
        if not question_name:
            raise PyXFormError("Question with no name on row " +
                               str(row_number))
        if not is_valid_xml_tag(question_name):
            error_message = "Invalid question name [" + question_name + "] on row " + str(
                row_number) + "\n"
            error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in label_optional_types:
            #TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned about false positives.
            warnings.append("Warning unlabeled question in row " +
                            str(row_number) + ": " + str(row))

        #Try to parse question as begin control statement (i.e. begin loop/repeat/group:
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question.
                #parent_children_array will then be set to its children array (so following questions are nested under it)
                #until an end command is encountered.
                control_type = control_aliases[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        #TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError("Repeat without list name " +
                                           " Error on row: " + str(row_number))
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError("List name not in columns sheet: " +
                                           list_name + " Error on row: " +
                                           str(row_number))
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                #Code to deal with table_list appearance flags (for groups of selects)
                if new_json_dict.get(
                        u"control",
                    {}).get(u"appearance") == constants.TABLE_LIST:
                    begin_table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        "Cascading select needs a name. Error on row: %s" %
                        row_number)
                cascading_json = get_cascading_json(cascading_choices,
                                                    cascading_prefix,
                                                    cascading_level)

                for c in cascading_json:
                    parent_children_array.append(c)
                continue  # so the row isn't put in as is

        #Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = select_aliases[parse_dict["select_command"]]
                list_name = parse_dict["list_name"]

                if list_name not in choices:
                    raise PyXFormError("List name not in choices sheet: " +
                                       list_name + " Error on row: " +
                                       str(row_number))

                #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added to multiple choice selects. See ["
                                + choice[constants.NAME] + "] in [" +
                                list_name + "]")

                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type
                new_json_dict[constants.CHOICES] = choices[list_name]

                #Code to deal with table_list appearance flags (for groups of selects)
                if table_list or begin_table_list:
                    if begin_table_list:  #If this row is the first select in a table list
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE:
                            select_type,
                            constants.NAME:
                            "reserved_name_for_field_list_labels_" +
                            str(row_number),  #Adding row number for uniqueness
                            constants.CONTROL: {
                                u"appearance": u"label"
                            },
                            constants.CHOICES:
                            choices[list_name]
                        }
                        parent_children_array.append(table_list_header)
                        begin_table_list = False

                    if table_list <> list_name:
                        error_message = "Error on row: " + str(
                            row_number) + "\n"
                        error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = new_json_dict.get(
                        u"control", {})
                    control[u"appearance"] = "list-nolabel"

                parent_children_array.append(new_json_dict)
                continue

        #TODO: Consider adding some question_type validation here.

        #Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("unmatched begin statement: " + str(stack[-1][0]))
    #print_pyobj_to_json(json_dict)
    return json_dict
Exemplo n.º 6
0
 def validate(self):
     if not is_valid_xml_tag(self.get_name()):
         raise Exception("Invalid xml tag.", self._dict)
Exemplo n.º 7
0
 def validate(self):
     if not is_valid_xml_tag(self.get_name()):
         msg = "The name of this survey element is an invalid xml tag. Names must begin with a letter, colon, or underscore, subsequent characters can include numbers, dashes, and periods."
         raise Exception(self.get_name(), msg)                
Exemplo n.º 8
0
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when there is no translation available for some itext element.
       Because of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language.
       If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to
    
    returns a nested dictionary equivalent to the format specified in the json form spec.
    """
    if warnings is None:
        # Set warnings to a list that will be discarded.
        warnings = []

    rowFormatString = "[row : %s]"

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons
    )
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)

    # add_none_option is a boolean that when true, indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.DEFAULT_LANGUAGE: default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: [],
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    ########### Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language
    )

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language)

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME
    )

    choices = combined_lists
    # Make sure all the options have the required properties:
    for list_name, options in choices.items():
        for option in options:
            if "name" not in option:
                info = "[list_name : " + list_name + "]"
                raise PyXFormError("On the choices sheet there is a option with no name. " + info)
            if "label" not in option:
                info = "[list_name : " + list_name + "]"
                warnings.append("On the choices sheet there is a option with no label. " + info)
    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {})

    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = yes_no_aliases.get(settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))$")
    begin_control_regex = re.compile(
        r"^(?P<begin>begin)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$"
    )
    select_regexp = re.compile(
        r"^(?P<select_command>("
        + "|".join(select_aliases.keys())
        + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$"
    )
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>(" + "|".join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$"
    )
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]

        # Disabled should probably be first so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                rowFormatString % row_number
                + " The 'disabled' column header is not part of the current spec."
                + "We recommend using relevant instead."
            )
            disabled = row.pop(u"disabled")
            if yes_no_aliases.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing, then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) and (constants.LABEL in row)):
                warnings.append(
                    rowFormatString % row_number + " Row without name, text, or label is being skipped:\n" + str(row)
                )
                continue
            raise PyXFormError(rowFormatString % row_number + " Question with no type.")
            continue

        # Check if the question is actually a setting specified on the survey sheet
        settings_type = settings_header_aliases.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = control_aliases[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        rowFormatString % row_number
                        + " Unmatched end statement. Previous control type: "
                        + str(prev_control_type)
                        + ", Control type: "
                        + str(control_type)
                    )
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if not constants.NAME in row:
            # TODO: It could be slick if had nameless groups generate a flat model
            #      with only a body element.
            if row["type"] == "note":
                # autogenerate names for notes without them
                row["name"] = "generated_note_name_" + str(row_number)
            else:
                raise PyXFormError(rowFormatString % row_number + " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = rowFormatString % row_number
            error_message += " Invalid question name [" + question_name + "]"
            error_message += "Names must begin with a letter, colon, or underscore."
            error_message += "Subsequent characters can include numbers, dashes, and periods."
            raise PyXFormError(error_message)

        if (
            constants.LABEL not in row
            and row.get(constants.MEDIA) is None
            and question_type not in label_optional_types
        ):
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned about false positives.
            warnings.append(rowFormatString % row_number + " Question has no label: " + str(row))

        # Try to parse question as begin control statement (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array (so following questions are nested under it)
                # until an end command is encountered.
                control_type = control_aliases[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(rowFormatString % row_number + " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            rowFormatString % row_number + " List name not in columns sheet: " + list_name
                        )
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Code to deal with table_list appearance flags (for groups of selects)
                if new_json_dict.get(u"control", {}).get(u"appearance") == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if "label" in new_json_dict or "hint" in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name": "generated_table_list_label_" + str(row_number),
                        }
                        if "label" in new_json_dict:
                            generated_label_element[constants.LABEL] = new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if "hint" in new_json_dict:
                            generated_label_element["hint"] = new_json_dict["hint"]
                            del new_json_dict["hint"]
                        child_list.append(generated_label_element)

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(rowFormatString % row_number + " Cascading select needs a name.")
                cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level)

                for c in cascading_json:
                    parent_children_array.append(c)
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = select_aliases[parse_dict["select_command"]]
                list_name = parse_dict["list_name"]

                if list_name not in choices:
                    raise PyXFormError(rowFormatString % row_number + " List name not in choices sheet: " + list_name)

                # Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if " " in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added to multiple choice selects. See ["
                                + choice[constants.NAME]
                                + "] in ["
                                + list_name
                                + "]"
                            )

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                #                    #With this code we no longer need to handle or_other questions in survey builder.
                #                    #However, it depends on being able to use choice filters and xpath expressions that return empty sets.
                #                    choices[list_name].append(
                #                        {
                #                            'name': 'other',
                #                            'label': {default_language : 'Other'},
                #                            'orOther': 'true',
                #                        })
                #                    or_other_xpath = 'isNull(orOther)'
                #                    if 'choice_filter' in row:
                #                        row['choice_filter'] += ' or ' + or_other_xpath
                #                    else:
                #                        row['choice_filter'] = or_other_xpath
                #
                #                    specify_other_question = \
                #                        {
                #                          'type':'text',
                #                          'name': row['name'] + '_specify_other',
                #                          'label':'Specify Other for:\n"' + row['label'] + '"',
                #                          'bind' : {'relevant': "selected(../%s, 'other')" % row['name']},
                #                        }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type
                new_json_dict["itemset"] = list_name
                if row.get("choice_filter"):
                    json_dict["choices"] = choices
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags (for groups of selects)
                if table_list is not None:
                    if not isinstance(table_list, basestring):  # Then this row is the first select in a table list
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                            constants.NAME: "reserved_name_for_field_list_labels_"
                            + str(row_number),  # Adding row number for uniqueness
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            # Do we care about filtered selects in table lists?
                            #'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list <> list_name:
                        error_message = rowFormatString % row_number
                        error_message += (
                            " Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name
                        )
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"

                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    # Automatically add an instanceID element:
    if yes_no_aliases.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError("Cannot omit instanceID, it is required for encryption.")
    else:
        meta_element = {
            "name": "meta",
            "type": "group",
            "control": {"bodyless": True},
            "children": [
                {
                    "name": "instanceID",
                    "bind": {"readonly": "true()", "calculate": "concat('uuid:', uuid())"},
                    "type": "calculate",
                }
            ],
        }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)

    return json_dict
Exemplo n.º 9
0
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when there is no translation available for some itext element.
       Because of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language.
       If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to
    
    returns a nested dictionary equivalent to the format specified in the json form spec.
    """
    if warnings is None:
        #Set warnings to a list that will be discarded.
        warnings = []
    
    #Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    #We check for double columns to determine whether to use them or single colons to delimit grouped headers.
    #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg),
    #so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)
    
    #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet):
    ########### Settings sheet ##########
    settings_sheet = dealias_and_group_headers(workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
    
    default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)
    
    #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects.
    #It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], u"false()") == u"true()"
    
    #Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    json_dict = {
       constants.TYPE : constants.SURVEY,
       constants.NAME : form_name,
       constants.TITLE : id_string,
       constants.ID_STRING : id_string,
       constants.DEFAULT_LANGUAGE : default_language,
       constants.CHILDREN : []
    }
    #Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)
    
    ########### Choices sheet ##########
    #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language)
    
    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language)
    
    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language)
    
    combined_lists = group_dictionaries_by_key(choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME)
    
                
    choices = combined_lists

    ########### Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {})
    
    ########### Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    #Process the headers:
    survey_sheet = clean_unicode_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)
    ##################################
    
    #Parse the survey sheet while generating a survey in our json format:
    
    row_number = 1 #We start at 1 because the column header row is not included in the survey sheet (presumably).
    #A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    #If a group has a table-list appearance flag this will be set to the name of the list
    table_list = None
    begin_table_list = False
    #For efficiency we compile all the regular expressions that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>("
                                   + '|'.join(control_aliases.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>("
                                     + '|'.join(control_aliases.keys())
                                     + r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(r"^(?P<select_command>("
                               + '|'.join(select_aliases.keys())
                               + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(r"^(?P<cascading_command>("
                               + '|'.join(cascading_aliases.keys())
                               + r")) (?P<cascading_level>\S+)?$")
    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        
        #Disabled should probably be first so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append("The 'disabled' column header is not part of the current spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if disabled in yes_no_aliases:
                disabled = yes_no_aliases[disabled]
            if disabled == 'true()':
                continue
        
        #skip empty rows
        if len(row) == 0: continue
        
        #Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing, then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) and (constants.LABEL in row)):
                    warnings.append("Row wihtout name, text, or label is being skipped " + str(row_number) + ": " + str(row))
                    continue
            raise PyXFormError("Question with no type on row " + str(row_number))
            continue
        
        #Check if the question is actually a setting specified on the survey sheet
        settings_type = settings_header_aliases.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue
        
        #Try to parse question as a end control statement (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = control_aliases[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError("Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type))
                stack.pop()
                table_list = None
                continue
        
        #Make sure the question has a valid name
        question_name = unicode(row.get(constants.NAME))
        if not question_name:
            raise PyXFormError("Question with no name on row " + str(row_number))
        if not is_valid_xml_tag(question_name):
            error_message = "Invalid question name [" + question_name + "] on row " + str(row_number) + "\n"
            error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods."
            raise PyXFormError(error_message)
        
        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in label_optional_types:
            #TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned about false positives.
            warnings.append("Warning unlabeled question in row " + str(row_number) + ": " + str(row))
        
        #Try to parse question as begin control statement (i.e. begin loop/repeat/group:
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question.
                #parent_children_array will then be set to its children array (so following questions are nested under it)
                #until an end command is encountered.
                control_type = control_aliases[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        #TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError("Repeat without list name " + " Error on row: " + str(row_number))
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError("List name not in columns sheet: " + list_name + " Error on row: " + str(row_number))
                    new_json_dict[constants.COLUMNS] = choices[list_name]
                
                #Code to deal with table_list appearance flags (for groups of selects)
                if new_json_dict.get(u"control",{}).get(u"appearance") == constants.TABLE_LIST:
                    begin_table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    
                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError("Cascading select needs a name. Error on row: %s" % row_number)
                cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level)
                
                for c in cascading_json: parent_children_array.append(c)
                continue # so the row isn't put in as is

        #Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = select_aliases[parse_dict["select_command"]]
                list_name = parse_dict["list_name"]

                if list_name not in choices:
                    raise PyXFormError("List name not in choices sheet: " + list_name + " Error on row: " + str(row_number))

                #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                                raise PyXFormError("Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]")

                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type
                new_json_dict[constants.CHOICES] = choices[list_name]
                
                #Code to deal with table_list appearance flags (for groups of selects)
                if table_list or begin_table_list:
                    if begin_table_list: #If this row is the first select in a table list
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE : select_type,
                            constants.NAME : "reserved_name_for_field_list_labels_" + str(row_number), #Adding row number for uniqueness
                            constants.CONTROL : { u"appearance" : u"label" },
                            constants.CHOICES : choices[list_name]
                        }
                        parent_children_array.append(table_list_header)
                        begin_table_list = False

                    if table_list <> list_name:
                        error_message = "Error on row: " + str(row_number) + "\n"
                        error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name
                        raise PyXFormError(error_message)
                    
                    control = new_json_dict[u"control"] = new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                        
                parent_children_array.append(new_json_dict)
                continue
            
        #TODO: Consider adding some question_type validation here.
        
        #Put the row in the json dict as is:
        parent_children_array.append(row)
    
    if len(stack) != 1:
        raise PyXFormError("unmatched begin statement: " + str(stack[-1][0]))
    #print_pyobj_to_json(json_dict)
    return json_dict