示例#1
0
    def test_xl_date_ambigous_with_exception(self):
        """Test non standard sheet with exception is processed successfully."""
        filename = "xl_date_ambiguous_v1.xlsx"
        path_to_excel_file = os.path.join(DIR, "bug_example_xls", filename)
        with self.assertRaises(PyXFormError) as e:
            xls_to_dict(path_to_excel_file)
        msg = ("The xls file provided has an invalid date on the"
               " survey sheet, under the default column on row number 5")

        self.assertEqual(msg, str(e.exception))
示例#2
0
    def test_xl_date_ambigous_with_exception(self):
        """Test non standard sheet with exception is processed successfully."""
        filename = "xl_date_ambiguous_v1.xlsx"
        path_to_excel_file = os.path.join(DIR, "bug_example_xls", filename)
        with self.assertRaises(PyXFormError) as e:
            xls_to_dict(path_to_excel_file)
        msg = (
            "The xls file provided has an invalid date on the"
            " survey sheet, under the default column on row number 5"
        )

        self.assertEqual(msg, str(e.exception))
def _b64_xls_to_dict(base64_encoded_upload):
    decoded_str = base64.b64decode(base64_encoded_upload)
    try:
        xls_with_renamed_sheet = rename_xls_sheet(BytesIO(decoded_str),
                                                  from_sheet='library',
                                                  to_sheet='survey')
    except ConflictSheetError:
        raise ValueError('An import cannot have both "survey" and'
                         ' "library" sheets.')
    except NoFromSheetError:
        # library did not exist in the xls file
        survey_dict = xls2json_backends.xls_to_dict(BytesIO(decoded_str))
    else:
        survey_dict = xls2json_backends.xls_to_dict(xls_with_renamed_sheet)
        survey_dict['library'] = survey_dict.pop('survey')
    return _strip_header_keys(survey_dict)
示例#4
0
 def test_default_sheet_name_to_survey(self):
     xls_path = utils.path_to_text_fixture("survey_no_name.xlsx")
     dict_value = xls_to_dict(xls_path)
     print(json.dumps(dict_value))
     self.assertTrue("survey" in json.dumps(dict_value))
     self.assertTrue("state" in json.dumps(dict_value))
     self.assertTrue("The State" in json.dumps(dict_value))
示例#5
0
    def _load_assets_from_url(self, url, messages, **kwargs):
        destination = kwargs.get('destination', False)
        destination_kls = kwargs.get('destination_kls', False)
        has_necessary_perm = kwargs.get('has_necessary_perm', False)
        req = requests.get(url, allow_redirects=True)
        fif = HttpContentParse(request=req).parse()
        fif.remove_invalid_assets()
        fif.remove_empty_collections()

        destination_collection = destination \
                if (destination_kls == 'collection') else False

        if destination_collection and not has_necessary_perm:
            # redundant check
            raise exceptions.PermissionDenied(
                'user cannot load assets into this collection')

        collections_to_assign = []
        for item in fif._parsed:
            extra_args = {
                'owner': self.user,
                'name': item._name_base,
            }

            if item.get_type() == 'collection':
                item._orm = create_assets(item.get_type(), extra_args)
            elif item.get_type() == 'asset':
                kontent = xls2json_backends.xls_to_dict(item.readable)
                if not destination:
                    extra_args['content'] = _strip_header_keys(kontent)
                    item._orm = create_assets(item.get_type(), extra_args)
                else:
                    # The below is copied from `_parse_b64_upload` pretty much as is
                    # TODO: review and test carefully
                    asset = destination
                    asset.content = kontent
                    asset.save()
                    messages['updated'].append({
                        'uid':
                        asset.uid,
                        'kind':
                        'asset',
                        'owner__username':
                        self.user.username,
                    })

            if item.parent:
                collections_to_assign.append([
                    item._orm,
                    item.parent._orm,
                ])
            elif destination_collection:
                collections_to_assign.append([
                    item._orm,
                    destination_collection,
                ])

        for (orm_obj, parent_item) in collections_to_assign:
            orm_obj.parent = parent_item
            orm_obj.save()
示例#6
0
 def test_xls_to_dict__extra_sheet_names_are_returned_by_parser(self):
     """Should return all sheet names so that later steps can do spellcheck."""
     d = xls_to_dict(os.path.join(example_xls.PATH, "extra_sheet_names.xlsx"))
     self.assertIn("survey", d)
     self.assertIn("my_sheet", d)
     self.assertIn("stettings", d)
     self.assertIn("choices", d)
示例#7
0
 def _parse_input(self):
     if self.filetype == "xls":
         self._dict = xls_to_dict(self._path)
     elif self.filetype == "csv":
         self._dict = csv_to_dict(self._path)
     self._sheet_names = self._dict.keys()
     self._set_choices_and_columns_sheet_name()
     self._strip_unicode_values()
     self._fix_int_values()
     self._group_dictionaries()
示例#8
0
 def test_equivalency(self):
     equivalent_fixtures = ['group', 'loop',  #'gps',
             'specify_other', 'include', 'text_and_integer', \
             'include_json', 'yes_or_no_question']
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.assertEqual(csv_inp, xls_inp)
 def test_equivalency(self):
     equivalent_fixtures = ['group', 'loop',  #'gps',
             'specify_other', 'include', 'text_and_integer', \
             'include_json', 'yes_or_no_question']
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.maxDiff = None
         self.assertEqual(csv_inp, xls_inp)
示例#10
0
    def test_xls_to_dict(self):
        # convert a CSV to XLS using our new method
        new_xls = pyxform_utils.convert_csv_to_xls(simple_yn)

        # convert our new XLS to dict (using pyxform)
        xls_dict = xls2json_backends.xls_to_dict(new_xls)
        # convert the original CSV to dict (using pyxform)
        csv_dict = xls2json_backends.csv_to_dict(StringIO(simple_yn))
        # Our function, "pyxform_utils.csv_to_xls" performs (CSV -> XLS)
        # This assertion tests equivalence of
        #   (CSV) -> dict_representation
        #   (CSV -> XLS) -> dict_representation
        self.assertEqual(csv_dict, xls_dict)
示例#11
0
 def test_equivalency(self):
     equivalent_fixtures = [
         "group",
         "loop",  # 'gps',
         "specify_other",
         "include",
         "text_and_integer",
         "include_json",
         "yes_or_no_question",
     ]
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.maxDiff = None
         self.assertEqual(csv_inp, xls_inp)
示例#12
0
 def test_equivalency(self):
     equivalent_fixtures = [
         "group",
         "loop",  #'gps',
         "specify_other",
         "include",
         "text_and_integer",
         "include_json",
         "yes_or_no_question",
     ]
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.maxDiff = None
         self.assertEqual(csv_inp, xls_inp)
示例#13
0
    def _load_assets_from_url(self, url, messages, **kwargs):
        destination = kwargs.get('destination', False)
        destination_kls = kwargs.get('destination_kls', False)
        has_necessary_perm = kwargs.get('has_necessary_perm', False)
        req = requests.get(url, allow_redirects=True)
        fif = HttpContentParse(request=req).parse()
        fif.remove_invalid_assets()
        fif.remove_empty_collections()

        destination_collection = destination \
                if (destination_kls == 'collection') else False

        if destination_collection and not has_necessary_perm:
            # redundant check
            raise exceptions.PermissionDenied('user cannot load assets into this collection')

        collections_to_assign = []
        for item in fif._parsed:
            extra_args = {
                'owner': self.user,
                'name': item._name_base,
            }

            if item.get_type() == 'collection':
                item._orm = create_assets(item.get_type(), extra_args)
            elif item.get_type() == 'asset':
                kontent = xls2json_backends.xls_to_dict(item.readable)
                extra_args['content'] = _strip_header_keys(kontent)
                item._orm = create_assets(item.get_type(), extra_args)
            if item.parent:
                collections_to_assign.append([
                    item._orm,
                    item.parent._orm,
                ])
            elif destination_collection:
                collections_to_assign.append([
                    item._orm,
                    destination_collection,
                ])

        for (orm_obj, parent_item) in collections_to_assign:
            orm_obj.parent = parent_item
            orm_obj.save()
示例#14
0
def parse_file_to_workbook_dict(path, file_object=None):
    """
    Given a xls or csv workbook file use xls2json_backends to create
    a python workbook_dict.
    workbook_dicts are organized as follows:
    {sheetname : [{column_header : column_value_in_array_indexed_row}]}
    """
    (filepath, filename) = os.path.split(path)
    if not filename:
        raise PyXFormError("No filename.")
    (shortname, extension) = os.path.splitext(filename)
    if not extension:
        raise PyXFormError("No extension.")

    if extension == ".xls" or extension == ".xlsx":
        return xls_to_dict(file_object if file_object is not None else path)
    elif extension == ".csv":
        return csv_to_dict(file_object if file_object is not None else path)
    else:
        raise PyXFormError("File was not recognized")
示例#15
0
    def to_csv_io(self, asset_xls_io, id_string):
        ''' Convert the output of `Asset.to_xls_io()` or
        `Asset.to_versioned_xls_io()` into a CSV appropriate for KC's
        `text_xls_form` '''
        xls_dict = xls_to_dict(asset_xls_io)
        csv_io = cStringIO.StringIO()
        writer = unicodecsv.writer(csv_io,
                                   delimiter=',',
                                   quotechar='"',
                                   quoting=unicodecsv.QUOTE_MINIMAL)
        settings_arr = xls_dict.get('settings', [])
        if len(settings_arr) == 0:
            settings_dict = {}
        else:
            settings_dict = settings_arr[0]
        if 'form_id' in settings_dict:
            del settings_dict['form_id']
        settings_dict['id_string'] = id_string
        settings_dict['form_title'] = self.asset.name
        xls_dict['settings'] = [settings_dict]

        for sheet_name, rows in xls_dict.items():
            if re.search(r'_header$', sheet_name):
                continue

            writer.writerow([sheet_name])
            out_keys = []
            out_rows = []
            for row in rows:
                out_row = []
                for key in row.keys():
                    if key not in out_keys:
                        out_keys.append(key)
                for out_key in out_keys:
                    out_row.append(row.get(out_key, None))
                out_rows.append(out_row)
            writer.writerow([None] + out_keys)
            for out_row in out_rows:
                writer.writerow([None] + out_row)
        return csv_io
示例#16
0
    def to_csv_io(self, asset_xls_io, id_string):
        ''' Convert the output of `Asset.to_xls_io()` or
        `Asset.to_versioned_xls_io()` into a CSV appropriate for KC's
        `text_xls_form` '''
        xls_dict = xls_to_dict(asset_xls_io)
        csv_io = cStringIO.StringIO()
        writer = unicodecsv.writer(
            csv_io, delimiter=',', quotechar='"',
            quoting=unicodecsv.QUOTE_MINIMAL
        )
        settings_arr = xls_dict.get('settings', [])
        if len(settings_arr) == 0:
            settings_dict = {}
        else:
            settings_dict = settings_arr[0]
        if 'form_id' in settings_dict:
            del settings_dict['form_id']
        settings_dict['id_string'] = id_string
        settings_dict['form_title'] = self.asset.name
        xls_dict['settings'] = [settings_dict]

        for sheet_name, rows in xls_dict.items():
            if re.search(r'_header$', sheet_name):
                continue

            writer.writerow([sheet_name])
            out_keys = []
            out_rows = []
            for row in rows:
                out_row = []
                for key in row.keys():
                    if key not in out_keys:
                        out_keys.append(key)
                for out_key in out_keys:
                    out_row.append(row.get(out_key, None))
                out_rows.append(out_row)
            writer.writerow([None] + out_keys)
            for out_row in out_rows:
                writer.writerow([None] + out_row)
        return csv_io
示例#17
0
def _b64_xls_to_dict(base64_encoded_upload):
    decoded_str = base64.b64decode(base64_encoded_upload)
    survey_dict = xls2json_backends.xls_to_dict(BytesIO(decoded_str))
    return _strip_header_keys(survey_dict)
                list_name, village_code, village_label, village_label, ao,
                region, district))

            self._set_ao(village)
            self._set_region_info(region, village, ao)
            self._set_district_info(district, village, region, ao)

        return data

    def write_location_cascade_csv(self, filename='location.csv'):
        headers = ('list name', 'name', 'label:English', 'label:Somali',
                   'ao', 'region', 'district')
        with codecs.open(filename, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(self.ao_cascade())
            writer.writerows(self.region_cascade())
            writer.writerows(self.district_cascade())
            writer.writerows(self.village_cascade())


if __name__ == '__main__':
    path = 'data.xlsx'
    doc = xls_to_dict(path)
    vdb = doc['VDB']
    districts = doc['District Names']
    regions = doc['Region Name']

    g = GenerateLocationCascade(regions, vdb)
    g.write_location_cascade_csv()
示例#19
0
                for j in i['children']:
                    if 'label' in j:
                        j['group'] = g
                        l.append(add_question(j))

            else:
                i['group'] = g
                l.append(add_question(i))
    return l


if __name__ == '__main__':
    XLS_FILE = 'data/codebook_xls.xls'
    # we can convert the xls to a better shaped json with pyxform
    from pyxform.xls2json_backends import xls_to_dict
    workbook_dict = xls_to_dict(XLS_FILE)
    from pyxform.xls2json import workbook_to_json
    workbook_json = workbook_to_json(workbook_dict,
                                     form_name=None,
                                     default_language=u"default",
                                     warnings=None)

    # cast the questions into a list of object, each with type, name, label, group and choices
    l = feed_workbook(workbook_json['children'])

    rows = ""
    for i in l:
        rows = rows + i.to_html()

    import html_utils as h
    html = h.html_head + h.html_body_header + ' '.join(
示例#20
0
def parse_xlsform(fp):
    warnings = []
    json_survey = xls2json.workbook_to_json(xls_to_dict(fp), None, 'default',
                                            warnings)
    survey = builder.create_survey_element_from_dict(json_survey)
    return survey.xml().toprettyxml(indent='  ')
示例#21
0
def _b64_xls_to_dict(base64_encoded_upload):
    decoded_str = base64.b64decode(base64_encoded_upload)
    survey_dict = xls2json_backends.xls_to_dict(BytesIO(decoded_str))
    return _strip_header_keys(survey_dict)