Exemplo n.º 1
0
 def xls_value_to_unicode(value, value_type):
     """
     Take a xls formatted value and try to make a unicode string
     representation.
     """
     if value_type == xlrd.XL_CELL_BOOLEAN:
         return u"TRUE" if value else u"FALSE"
     elif value_type == xlrd.XL_CELL_NUMBER:
         # Try to display as an int if possible.
         int_value = int(value)
         if int_value == value:
             return unicode(int_value)
         else:
             return unicode(value)
     elif value_type is xlrd.XL_CELL_DATE:
         # Warn that it is better to single quote as a string.
         # error_location = cellFormatString % (ss_row_idx, ss_col_idx)
         # raise Exception(
         #   "Cannot handle excel formatted date at " + error_location)
         datetime_or_time_only = xlrd.xldate_as_tuple(
             value, workbook.datemode)
         if datetime_or_time_only[:3] == (0, 0, 0):
             # must be time only
             return unicode(datetime.time(*datetime_or_time_only[3:]))
         return unicode(datetime.datetime(*datetime_or_time_only))
     else:
         # ensure unicode and replace nbsp spaces with normal ones
         # to avoid this issue:
         # https://github.com/modilabs/pyxform/issues/83
         return unicode(value).replace(unichr(160), ' ')
Exemplo n.º 2
0
 def xls_value_to_unicode(value, value_type):
     """
     Take a xls formatted value and try to make a unicode string
     representation.
     """
     if value_type == xlrd.XL_CELL_BOOLEAN:
         return u"TRUE" if value else u"FALSE"
     elif value_type == xlrd.XL_CELL_NUMBER:
         # Try to display as an int if possible.
         int_value = int(value)
         if int_value == value:
             return unicode(int_value)
         else:
             return unicode(value)
     elif value_type is xlrd.XL_CELL_DATE:
         # Warn that it is better to single quote as a string.
         # error_location = cellFormatString % (ss_row_idx, ss_col_idx)
         # raise Exception(
         #   "Cannot handle excel formatted date at " + error_location)
         datetime_or_time_only = xlrd.xldate_as_tuple(
             value, workbook.datemode)
         if datetime_or_time_only[:3] == (0, 0, 0):
             # must be time only
             return unicode(datetime.time(*datetime_or_time_only[3:]))
         return unicode(datetime.datetime(*datetime_or_time_only))
     else:
         # ensure unicode and replace nbsp spaces with normal ones
         # to avoid this issue:
         # https://github.com/modilabs/pyxform/issues/83
         return unicode(value).replace(unichr(160), ' ')
 def parse(self, xml_str):
     clean_xml_str = xml_str.strip()
     clean_xml_str = re.sub(unicode(r">\s+<"), unicode("><"), clean_xml_str)
     self._xml_obj = minidom.parseString(clean_xml_str)
     self._root_node = self._xml_obj.documentElement
     self._dict = _xml_node_to_dict(self._root_node)
     self._flat_dict = {}
     for path, value in _flatten_dict(self._dict, []):
         self._flat_dict[u"/".join(path[1:])] = value
     self._set_attributes()
Exemplo n.º 4
0
def csv_to_dict(path_or_file):
    if isinstance(path_or_file, basestring):
        csv_data = open(path_or_file, 'rb')
    else:
        csv_data = path_or_file

    _dict = OrderedDict()

    def first_column_as_sheet_name(row):
        if len(row) == 0:
            return None, None
        elif len(row) == 1:
            return row[0], None
        else:
            s_or_c = row[0]
            content = row[1:]
            if s_or_c == '':
                s_or_c = None
            # concatenate all the strings in content
            if reduce(lambda x, y: x + y, content) == '':
                # content is a list of empty strings
                content = None
            return s_or_c, content

    reader = csv.reader(csv_data, encoding='utf-8')
    sheet_name = None
    current_headers = None
    for row in reader:
        survey_or_choices, content = first_column_as_sheet_name(row)
        if survey_or_choices is not None:
            sheet_name = survey_or_choices
            if sheet_name not in _dict:
                _dict[unicode(sheet_name)] = []
            current_headers = None
        if content is not None:
            if current_headers is None:
                current_headers = content
                _dict[u"%s_header" % sheet_name] = \
                    _list_to_dict_list(current_headers)
            else:
                _d = OrderedDict()
                for key, val in zip(current_headers, content):
                    if val != "":
                        # Slight modification so values are striped
                        # this is because csvs often spaces following commas
                        # (but the csv reader might already handle that.)
                        _d[unicode(key)] = unicode(val.strip())
                _dict[sheet_name].append(_d)
    csv_data.close()
    return _dict
Exemplo n.º 5
0
def csv_to_dict(path_or_file):
    if isinstance(path_or_file, basestring):
        csv_data = open(path_or_file, 'rb')
    else:
        csv_data = path_or_file

    _dict = OrderedDict()

    def first_column_as_sheet_name(row):
        if len(row) == 0:
            return None, None
        elif len(row) == 1:
            return row[0], None
        else:
            s_or_c = row[0]
            content = row[1:]
            if s_or_c == '':
                s_or_c = None
            # concatenate all the strings in content
            if reduce(lambda x, y: x + y, content) == '':
                # content is a list of empty strings
                content = None
            return s_or_c, content

    reader = csv.reader(csv_data, encoding='utf-8')
    sheet_name = None
    current_headers = None
    for row in reader:
        survey_or_choices, content = first_column_as_sheet_name(row)
        if survey_or_choices is not None:
            sheet_name = survey_or_choices
            if sheet_name not in _dict:
                _dict[unicode(sheet_name)] = []
            current_headers = None
        if content is not None:
            if current_headers is None:
                current_headers = content
                _dict[u"%s_header" % sheet_name] = \
                    _list_to_dict_list(current_headers)
            else:
                _d = OrderedDict()
                for key, val in zip(current_headers, content):
                    if val != "":
                        # Slight modification so values are striped
                        # this is because csvs often spaces following commas
                        # (but the csv reader might already handle that.)
                        _d[unicode(key)] = unicode(val.strip())
                _dict[sheet_name].append(_d)
    csv_data.close()
    return _dict
def _flatten_dict(d, prefix):
    """
    Return a list of XPath, value pairs.
    """
    assert type(d) == dict
    assert type(prefix) == list

    for key, value in d.items():
        new_prefix = prefix + [key]
        if type(value) == dict:
            for pair in _flatten_dict(value, new_prefix):
                yield pair
        elif type(value) == list:
            for i, item in enumerate(value):
                item_prefix = list(new_prefix)  # make a copy
                # note on indexing xpaths: IE5 and later has
                # implemented that [0] should be the first node, but
                # according to the W3C standard it should have been
                # [1]. I'm adding 1 to i to start at 1.
                item_prefix[-1] += u"[%s]" % unicode(i + 1)
                if type(item) == dict:
                    for pair in _flatten_dict(item, item_prefix):
                        yield pair
                else:
                    yield (item_prefix, item)
        else:
            yield (new_prefix, value)
Exemplo n.º 7
0
    def _generate_static_instances(self):
        """
        Generates <instance> elements for static data
        (e.g. choices for select type questions)
        """
        for list_name, choice_list in self.choices.items():
            instance_element_list = []
            for idx, choice in zip(range(len(choice_list)), choice_list):
                choice_element_list = []
                # Add a unique id to the choice element incase there is itext
                # it refrences
                itext_id = '-'.join(['static_instance', list_name, str(idx)])
                choice_element_list.append(node("itextId", itext_id))

                for choicePropertyName, choicePropertyValue in choice.items():
                    if isinstance(choicePropertyValue, basestring) \
                            and choicePropertyName != 'label':
                        choice_element_list.append(
                            node(choicePropertyName,
                                 unicode(choicePropertyValue)))
                instance_element_list.append(node("item",
                                                  *choice_element_list))
            yield node("instance",
                       node("root", *instance_element_list),
                       id=list_name)
Exemplo n.º 8
0
    def insert_xpaths(self, text):
        """
        Replace all instances of ${var} with the xpath to var.
        """
        bracketed_tag = r"\$\{(.*?)\}"

        return re.sub(bracketed_tag, self._var_repl_function, unicode(text))
Exemplo n.º 9
0
 def __init__(self, path_or_file):
     path = path_or_file
     if type(path_or_file) is file:
         path = path.name
     self._dict = parse_file_to_workbook_dict(path)
     self._path = path
     self._id = unicode(get_filename(path))
     self._name = self._print_name = self._title = self._id
Exemplo n.º 10
0
 def xml_instance(self):
     survey = self.get_root()
     attributes = {}
     attributes.update(self.get(u'instance', {}))
     for key, value in attributes.items():
         attributes[key] = survey.insert_xpaths(value)
     if self.get(u"default"):
         return node(self.name, unicode(self.get(u"default")), **attributes)
     return node(self.name, **attributes)
Exemplo n.º 11
0
 def __init__(self, path_or_file):
     path = path_or_file
     try:
         path = path.name
     except AttributeError:
         pass
     self._dict = parse_file_to_workbook_dict(path)
     self._path = path
     self._id = unicode(get_filename(path))
     self._name = self._print_name = self._title = self._id
Exemplo n.º 12
0
 def xml_instance(self):
     survey = self.get_root()
     attributes = {}
     attributes.update(self.get(u'instance', {}))
     for key, value in attributes.items():
         attributes[key] = survey.insert_xpaths(value)
     if self.get(u"default"):
         return node(
             self.name, unicode(self.get(u"default")), **attributes
         )
     return node(self.name, **attributes)
Exemplo n.º 13
0
def parse_file_to_json(path, default_name=None, default_language=u"default",
                       warnings=None, file_object=None):
    """
    A wrapper for workbook_to_json
    """
    if warnings is None:
        warnings = []
    workbook_dict = parse_file_to_workbook_dict(path, file_object)
    if default_name is None:
        default_name = unicode(get_filename(path))
    return workbook_to_json(
        workbook_dict, default_name, default_language, warnings)
Exemplo n.º 14
0
def detail(handler, note):
    local_path = os.path.join(config.note_dir, note)
    if not os.path.exists(local_path):
        handler.send_response(404)
        return
    with open(local_path) as f:
        md = f.read()
        md = utils.unicode(md)
    content = markdown.markdown(md, ['toc'])
    html = template_engine.render('detail.html', {'content':content})
    headers = {
            'Content-Type': 'text/html;charset=UTF-8',
            'Content-Length': len(html)
            }
    handler.simple_response(headers, html)
Exemplo n.º 15
0
def parse_file_to_json(path,
                       default_name=None,
                       default_language=u"default",
                       warnings=None,
                       file_object=None):
    """
    A wrapper for workbook_to_json
    """
    if warnings is None:
        warnings = []
    workbook_dict = parse_file_to_workbook_dict(path, file_object)
    if default_name is None:
        default_name = unicode(get_filename(path))
    return workbook_to_json(workbook_dict, default_name, default_language,
                            warnings)
Exemplo n.º 16
0
    def insert_output_values(self, text):
        """
        Replace all the ${variables} in text with xpaths.
        Returns that and a boolean indicating if there were any ${variables}
        present.
        """
        # There was a bug where escaping is completely turned off in labels
        # where variable replacement is used.
        # For exampke, `${name} < 3` causes an error but `< 3` does not.
        # This is my hacky fix for it, which does string escaping prior to
        # variable replacement:
        text_node = PatchedText()
        text_node.data = text
        xml_text = text_node.toxml()

        bracketed_tag = r"\$\{(.*?)\}"
        # need to make sure we have reason to replace
        # since at this point < is &lt,
        # the net effect &lt gets translated again to &amp;lt;
        if unicode(xml_text).find('{') != -1:
            result = re.sub(bracketed_tag, self._var_repl_output_function,
                            unicode(xml_text))
            return result, not result == xml_text
        return text, False
Exemplo n.º 17
0
    def xml_instance(self):
        result = Section.xml_instance(self)

        # set these first to prevent overwriting id and version
        for key, value in self.attribute.items():
            result.setAttribute(unicode(key), value)

        result.setAttribute(u"id", self.id_string)

        # add instance xmlns attribute to the instance node
        if self.instance_xmlns:
            result.setAttribute(u"xmlns", self.instance_xmlns)

        if self.version:
            result.setAttribute(u"version", self.version)

        return result
Exemplo n.º 18
0
    def writerow(self, row):
        encoded_row = []
        for col in row:
            if col:
                col = unicode(col).encode('utf-8', 'replace')

            # If this breaks, experiment with the workaround code in my utils.unicode module....

            encoded_row.append(col)
        self.writer.writerow(encoded_row)
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)
Exemplo n.º 19
0
 def writerow(self, row):
     encoded_row = []
     for col in row:
         if col:
             col = unicode(col).encode('utf-8', 'replace')
             
         # If this breaks, experiment with the workaround code in my utils.unicode module....
         
         encoded_row.append(col)
     self.writer.writerow(encoded_row)
     # Fetch UTF-8 output from the queue ...
     data = self.queue.getvalue()
     data = data.decode("utf-8")
     # ... and reencode it into the target encoding
     data = self.encoder.encode(data)
     # write to the target stream
     self.stream.write(data)
     # empty queue
     self.queue.truncate(0)
Exemplo n.º 20
0
 def next(self):
     row = self.reader.next()
     return [unicode(s, "utf-8") for s in row]
Exemplo n.º 21
0
 def get_abbreviated_xpath(self):
     lineage = self.get_lineage()
     if len(lineage) >= 2:
         return u"/".join([unicode(n.name) for n in lineage[1:]])
     else:
         return lineage[0].name
Exemplo n.º 22
0
 def __repr__(self):
     return unicode(self)
Exemplo n.º 23
0
def workbook_to_json(workbook_dict,
                     form_name=None,
                     default_language=u"default",
                     warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    """
    # ensure required headers are present
    if warnings is None:
        warnings = []
    is_valid = False
    for row in workbook_dict.get('survey', []):
        is_valid = 'type' in row
        if is_valid:
            break
    if not is_valid:
        raise PyXFormError(
            u"The survey sheet is either empty or missing important "
            u"column headers.")

    row_format_string = '[row : %s]'

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them
    # or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace
    # syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects
    # (settings, choices, survey_sheet):
    # ########## Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []), aliases.settings_header,
        use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(constants.DEFAULT_LANGUAGE,
                                    default_language)

    # add_none_option is a boolean that when true,
    # indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    json_dict = {
        constants.TYPE:
        constants.SURVEY,
        constants.NAME:
        form_name,
        constants.TITLE:
        id_string,
        constants.ID_STRING:
        id_string,
        constants.SMS_KEYWORD:
        sms_keyword,
        constants.DEFAULT_LANGUAGE:
        default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause
        # problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    # ########## Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated,
    # but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header, use_double_colons,
        default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(columns_sheet,
                                              aliases.list_header,
                                              use_double_colons,
                                              default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    choices_sheet = dealias_and_group_headers(choices_sheet,
                                              aliases.list_header,
                                              use_double_colons,
                                              default_language)
    # ########## Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists
    # Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if 'name' not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                warnings.append(
                    "On the choices sheet there is a option with no label. " +
                    info)
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
            option_keys = list(option.keys())
            for headername in option_keys:
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnedabout.add(headername)
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" + headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    # ########## Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError("You must have a sheet named (case-sensitive): " +
                           constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(survey_sheet,
                                             aliases.survey_header,
                                             use_double_colons,
                                             default_language)
    survey_sheet = dealias_types(survey_sheet)

    osm_sheet = workbook_dict.get(constants.OSM, [])
    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
    # #################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
    #                 included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag
    # this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(aliases.control.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
        r")) (?P<list_name>\S+)" +
        "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(r"^(?P<cascading_command>(" +
                                  '|'.join(aliases.cascading.keys()) +
                                  r")) (?P<cascading_level>\S+)?$")
    osm_regexp = re.compile(r"(?P<osm_command>(" +
                            '|'.join(aliases.osm.keys()) +
                            ')) (?P<list_name>\S+)')

    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        # Disabled should probably be first
        # so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                row_format_string % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            # then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                warnings.append(
                    row_format_string % row_number +
                    " Row without name, text, or label is being skipped:\n" +
                    str(row))
                continue
            raise PyXFormError(row_format_string % row_number +
                               " Question with no type.\n" + str(row))

        if question_type == 'calculate':
            calculation = row.get('bind', {}).get('calculate')
            if not calculation:
                raise PyXFormError(row_format_string % row_number +
                                   " Missing calculation.")

        # Check if the question is actually a setting specified
        # on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement
        # (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = aliases.control[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) + ", Control type: " +
                        str(control_type))
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if constants.NAME not in row:
            if row['type'] == 'note':
                # autogenerate names for notes without them
                row['name'] = "generated_note_name_" + str(row_number)
            # elif 'group' in row['type'].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
            else:
                raise PyXFormError(row_format_string % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = row_format_string % row_number
            error_message += " Invalid question name [" + \
                             question_name.encode('utf-8') + "] "
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers," \
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
            warnings.append(row_format_string % row_number +
                            " Question has no label: " + str(row))

        # Try to parse question as begin control statement
        # (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type,
                # and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array
                # (so following questions are nested under it)
                # until an end command is encountered.
                control_type = aliases.control[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(row_format_string % row_number +
                                           " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            row_format_string % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Generate a new node for the jr:count column so
                # xpath expressions can be used.
                repeat_count_expression = new_json_dict.get('control',
                                                            {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict['name'] + "_count"
                    parent_children_array.append({
                        "name": generated_node_name,
                        "bind": {
                            "readonly": "true()",
                            "calculate": repeat_count_expression,
                        },
                        "type": "calculate",
                    })
                    new_json_dict['control']['jr:count'] = \
                        "${" + generated_node_name + "}"

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name":
                            "generated_table_list_label_" + str(row_number)
                        }
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                                new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                                new_json_dict['hint']
                            del new_json_dict['hint']
                        child_list.append(generated_label_element)
                if 'intent' in new_json_dict:
                    new_json_dict['control'] = \
                        new_json_dict.get(u"control", {})
                    new_json_dict['control']['intent'] = \
                        new_json_dict['intent']

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(row_format_string % row_number +
                                       " Cascading select needs a name.")
                # cascading_json = get_cascading_json(
                # cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or \
                   'questions' not in cascading_choices[0]:
                    raise PyXFormError("Found a cascading_select " +
                                       cascading_level +
                                       ", but could not find " +
                                       cascading_level + "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if 'bind' in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq['bind'] = row['bind']

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(lambda x: replace_prefix(x, prefix),
                                           v)
                        return d

                    parent_children_array.append(
                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.select[parse_dict["select_command"]]
                if select_type == 'select one external' \
                        and 'choice_filter' not in row:
                    warnings.append(row_format_string % row_number +
                                    u" select one external is only meant for"
                                    u" filtered selects.")
                    select_type = aliases.select['select_one']
                list_name = parse_dict["list_name"]
                list_file_name, file_extension = os.path.splitext(list_name)

                if list_name not in choices \
                        and select_type != 'select one external' \
                        and file_extension not in ['.csv', '.xml']:
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the choices sheet name is "
                            u"all in small caps and has columns 'list name', "
                            u"'name', and 'label' (or aliased column names).")
                    raise PyXFormError(row_format_string % row_number +
                                       " List name not in choices sheet: " +
                                       list_name)

                # Validate select_multiple choice names by making sure
                # they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY \
                        and file_extension not in ['.csv', '.xml']:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" + list_name +
                                "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    # With this code we no longer need to handle or_other
                    # questions in survey builder.
                    # However, it depends on being able to use choice filters
                    # and xpath expressions that return empty sets.
                    # choices[list_name].append(
                    # {
                    #     'name': 'other',
                    #     'label': {default_language : 'Other'},
                    #     'orOther': 'true',
                    # })
                    # or_other_xpath = 'isNull(orOther)'
                    # if 'choice_filter' in row:
                    #   row['choice_filter'] += ' or ' + or_other_xpath
                    # else:
                    #   row['choice_filter'] = or_other_xpath

                    # specify_other_question = \
                    # {
                    #       'type':'text',
                    #       'name': row['name'] + '_specify_other',
                    #       'label':
                    #        'Specify Other for:\n"' + row['label'] + '"',
                    #       'bind' : {'relevant':
                    #                "selected(../%s, 'other')" % row['name']},
                    #     }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                    else:
                        new_json_dict['itemset'] = list_name
                        json_dict['choices'] = choices
                elif file_extension in ['.csv', '.xml']:
                    new_json_dict['itemset'] = list_name
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                if table_list is not None:
                    # Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE:
                            select_type,
                            constants.NAME:
                            "reserved_name_for_field_list_labels_" +
                            str(row_number),
                            # Adding row number for uniqueness # noqa
                            constants.CONTROL: {
                                u"appearance": u"label"
                            },
                            constants.CHOICES:
                            choices[list_name],
                            # Do we care about filtered selects in table lists?
                            # 'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list != list_name:
                        error_message = row_format_string % row_number
                        error_message += " Badly formatted table list," \
                                         " list names don't match: " + \
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = \
                        new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # Try to parse question as osm:
        osm_parse = osm_regexp.search(question_type)
        if osm_parse:
            parse_dict = osm_parse.groupdict()
            new_dict = row.copy()
            new_dict['type'] = constants.OSM

            if parse_dict.get('list_name') is not None:
                tags = osm_tags.get(parse_dict.get('list_name'))
                for tag in tags:
                    if osm_tags.get(tag.get('name')):
                        tag['choices'] = osm_tags.get(tag.get('name'))
                new_dict['tags'] = tags

            parent_children_array.append(new_dict)

            continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    if settings.get('flat', False):
        # print "Generating flattened instance..."
        add_flat_annotations(stack[0][1])

    meta_children = []

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
    else:
        # Automatically add an instanceID element:
        meta_children.append({
            "name": "instanceID",
            "bind": {
                "readonly":
                "true()",
                "calculate":
                settings.get("instance_id", "concat('uuid:', uuid())"),
            },
            "type": "calculate",
        })

    if 'instance_name' in settings:
        # Automatically add an instanceName element:
        meta_children.append({
            "name": "instanceName",
            "bind": {
                "calculate": settings['instance_name']
            },
            "type": "calculate",
        })

    if len(meta_children) > 0:
        meta_element = \
            {
                "name": "meta",
                "type": "group",
                "control": {
                    "bodyless": True
                },
                "children": meta_children
            }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)
    return json_dict
Exemplo n.º 24
0
 def next(self):
     row = self.reader.next()
     return [unicode(s, "utf-8") for s in row]
Exemplo n.º 25
0
def workbook_to_json(
        workbook_dict, form_name=None,
        default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    """
    # ensure required headers are present
    if warnings is None:
        warnings = []
    is_valid = False
    workbook_dict = {x.lower(): y for x,y in workbook_dict.items()}
    for row in workbook_dict.get(constants.SURVEY, []):
        is_valid = 'type' in [z.lower() for z in row]
        if is_valid:
            break
    if not is_valid:
        raise PyXFormError(
            u"The survey sheet is either empty or missing important "
            u"column headers.")

    row_format_string = '[row : %s]'

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them
    # or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace
    # syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects
    # (settings, choices, survey_sheet):
    # ########## Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []),
        aliases.settings_header, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
    replace_smart_quotes_in_dict(settings)

    default_language = settings.get(
        constants.DEFAULT_LANGUAGE, default_language)

    # add_none_option is a boolean that when true,
    # indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.SMS_KEYWORD: sms_keyword,
        constants.DEFAULT_LANGUAGE: default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause
        # problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    # ########## Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated,
    # but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(
        columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    for choice_item in choices_sheet:
        replace_smart_quotes_in_dict(choice_item)

    choices_sheet = dealias_and_group_headers(
        choices_sheet, aliases.list_header, use_double_colons,
        default_language)
    # ########## Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists
    # Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if 'name' not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                warnings.append(
                    "On the choices sheet there is a option with no label. " +
                    info)
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
            option_keys = list(option.keys())
            for headername in option_keys:
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnedabout.add(headername)
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" +
                                        headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    # ########## Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError(
            "You must have a sheet named (case-sensitive): " +
            constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(
        survey_sheet, aliases.survey_header,
        use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)

    osm_sheet = dealias_and_group_headers(workbook_dict.get(constants.OSM, []),
                                              aliases.list_header,
                                              True)
    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
    # #################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
    #                 included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag
    # this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(aliases.control.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
        r")) (?P<list_name>\S+)" +
        "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>(" +
        '|'.join(aliases.cascading.keys()) +
        r")) (?P<cascading_level>\S+)?$")
    osm_regexp = re.compile(
        r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) +
        ')) (?P<list_name>\S+)')

    # Rows from the survey sheet that should be nested in meta
    survey_meta = []

    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        # Disabled should probably be first
        # so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                row_format_string % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            # then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                warnings.append(
                    row_format_string % row_number +
                    " Row without name, text, or label is being skipped:\n" +
                    str(row))
                continue
            raise PyXFormError(
                row_format_string % row_number +
                " Question with no type.\n" + str(row))

        # Pull out questions that will go in meta block
        if question_type == 'audit':
            # Force audit name to always be "audit" to follow XForms spec
            if 'name' in row and row['name'] not in [None, '', 'audit']:
                raise PyXFormError(row_format_string % row_number +
                    " Audits must always be named 'audit.'" +
                    " The name column should be left blank.")

            row['name'] = 'audit'
            survey_meta.append(row)
            continue

        if question_type == 'calculate':
            calculation = row.get('bind', {}).get('calculate')
            if not calculation:
                raise PyXFormError(
                    row_format_string % row_number + " Missing calculation.")

        # Check if the question is actually a setting specified
        # on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement
        # (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = aliases.control[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) +
                        ", Control type: " + str(control_type))
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if constants.NAME not in row:
            if row['type'] == 'note':
                # autogenerate names for notes without them
                row['name'] = "generated_note_name_" + str(row_number)
            # elif 'group' in row['type'].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
            else:
                raise PyXFormError(row_format_string % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = row_format_string % row_number
            error_message += " Invalid question name [" + \
                             question_name.encode('utf-8') + "] "
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers," \
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
            warnings.append(
                row_format_string % row_number +
                " Question has no label: " + str(row))

        # Try to parse question as begin control statement
        # (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type,
                # and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array
                # (so following questions are nested under it)
                # until an end command is encountered.
                control_type = aliases.control[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(
                            row_format_string % row_number +
                            " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            row_format_string % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Generate a new node for the jr:count column so
                # xpath expressions can be used.
                repeat_count_expression = new_json_dict.get(
                    'control', {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict['name'] + "_count"
                    parent_children_array.append({
                        "name": generated_node_name,
                        "bind": {
                            "readonly": "true()",
                            "calculate": repeat_count_expression,
                        },
                        "type": "calculate",
                    })
                    new_json_dict['control']['jr:count'] = \
                        "${" + generated_node_name + "}"

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name":
                                "generated_table_list_label_" + str(row_number)
                        }
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                                new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                                new_json_dict['hint']
                            del new_json_dict['hint']
                        child_list.append(generated_label_element)
                if 'intent' in new_json_dict:
                    new_json_dict['control'] = \
                        new_json_dict.get(u"control", {})
                    new_json_dict['control']['intent'] = \
                        new_json_dict['intent']

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Cascading select needs a name.")
                # cascading_json = get_cascading_json(
                # cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or \
                   'questions' not in cascading_choices[0]:
                    raise PyXFormError(
                        "Found a cascading_select " + cascading_level +
                        ", but could not find " + cascading_level +
                        "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if 'bind' in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq['bind'] = row['bind']

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(
                                    lambda x: replace_prefix(x, prefix), v)
                        return d

                    parent_children_array.append(
                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.select[parse_dict["select_command"]]
                if select_type == 'select one external' \
                        and 'choice_filter' not in row:
                    warnings.append(
                        row_format_string % row_number +
                        u" select one external is only meant for"
                        u" filtered selects.")
                    select_type = aliases.select['select_one']
                list_name = parse_dict["list_name"]
                list_file_name, file_extension = os.path.splitext(list_name)

                if list_name not in choices \
                        and select_type != 'select one external' \
                        and file_extension not in ['.csv', '.xml']:
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the choices sheet name is "
                            u"all in small caps and has columns 'list name', "
                            u"'name', and 'label' (or aliased column names).")
                    raise PyXFormError(
                        row_format_string % row_number +
                        " List name not in choices sheet: " + list_name)

                # Validate select_multiple choice names by making sure
                # they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY \
                        and file_extension not in ['.csv', '.xml']:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" +
                                list_name + "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    # With this code we no longer need to handle or_other
                    # questions in survey builder.
                    # However, it depends on being able to use choice filters
                    # and xpath expressions that return empty sets.
                    # choices[list_name].append(
                    # {
                    #     'name': 'other',
                    #     'label': {default_language : 'Other'},
                    #     'orOther': 'true',
                    # })
                    # or_other_xpath = 'isNull(orOther)'
                    # if 'choice_filter' in row:
                    #   row['choice_filter'] += ' or ' + or_other_xpath
                    # else:
                    #   row['choice_filter'] = or_other_xpath

                    # specify_other_question = \
                    # {
                    #       'type':'text',
                    #       'name': row['name'] + '_specify_other',
                    #       'label':
                    #        'Specify Other for:\n"' + row['label'] + '"',
                    #       'bind' : {'relevant':
                    #                "selected(../%s, 'other')" % row['name']},
                    #     }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                    else:
                        new_json_dict['itemset'] = list_name
                        json_dict['choices'] = choices
                elif file_extension in ['.csv', '.xml']:
                    new_json_dict['itemset'] = list_name
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                if table_list is not None:
                    # Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                            constants.NAME:
                                "reserved_name_for_field_list_labels_" +
                                str(row_number),
                            # Adding row number for uniqueness # noqa
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            # Do we care about filtered selects in table lists?
                            # 'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list != list_name:
                        error_message = row_format_string % row_number
                        error_message += " Badly formatted table list," \
                                         " list names don't match: " + \
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = \
                        new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # Try to parse question as osm:
        osm_parse = osm_regexp.search(question_type)
        if osm_parse:
            parse_dict = osm_parse.groupdict()
            new_dict = row.copy()
            new_dict['type'] = constants.OSM

            if parse_dict.get('list_name') is not None:
                tags = osm_tags.get(parse_dict.get('list_name'))
                for tag in tags:
                    if osm_tags.get(tag.get('name')):
                        tag['choices'] = osm_tags.get(tag.get('name'))
                new_dict['tags'] = tags

            parent_children_array.append(new_dict)

            continue

        # range question_type
        if question_type == 'range':
            new_dict = process_range_question_type(row)
            parent_children_array.append(new_dict)
            continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    if settings.get('flat', False):
        # print "Generating flattened instance..."
        add_flat_annotations(stack[0][1])

    meta_children = [] + survey_meta

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
    else:
        # Automatically add an instanceID element:
        meta_children.append({
            "name": "instanceID",
            "bind": {
                "readonly": "true()",
                "calculate": settings.get(
                    "instance_id", "concat('uuid:', uuid())"),
            },
            "type": "calculate",
        })

    if 'instance_name' in settings:
        # Automatically add an instanceName element:
        meta_children.append({
            "name": "instanceName",
            "bind": {
                "calculate": settings['instance_name']
            },
            "type": "calculate",
        })

    if len(meta_children) > 0:
        meta_element = \
            {
                "name": "meta",
                "type": "group",
                "control": {
                    "bodyless": True
                },
                "children": meta_children
            }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)
    return json_dict
Exemplo n.º 26
0
 def get_abbreviated_xpath(self):
     lineage = self.get_lineage()
     if len(lineage) >= 2:
         return u"/".join([unicode(n.name) for n in lineage[1:]])
     else:
         return lineage[0].name