Пример #1
0
    def parse(self, file_contents):
        assert file_contents is not None
        xlsx_file = StringIO.StringIO(file_contents)

        workbook = load_workbook(xlsx_file, use_iterators=True)
        all_sheets = workbook.worksheets
        if len(all_sheets) == 1:
            raise CodeSheetMissingException()
        elif len(all_sheets) > 1:
            codes_sheet = self._get_code_sheet(all_sheets)
            worksheet = self._get_worksheet(workbook.worksheets)
            rows = []
            for cs in codes_sheet.iter_rows():
                rows = [self._get_value(x.value) for x in cs]
            header, header_found = self._is_header_row(rows)
            parsed_data = []
            form_code = REGISTRATION_FORM_CODE
            header = header[1:]
            for row in worksheet.iter_rows(row_offset=1):
                row_values = [self._get_value(x.value) for x in row]
                values = dict(zip(header, row_values))
                values.update({"t": "reporter"})
                parsed_data.append((form_code, values))
            if not header_found:
                raise XlsParserInvalidHeaderFormatException()
            return parsed_data
Пример #2
0
    def parse(self, xls_contents):
        assert xls_contents is not None
        workbook = xlrd.open_workbook(file_contents=xls_contents)
        all_sheets = workbook.sheets()
        if len(all_sheets) == 1:
            raise CodeSheetMissingException()
        elif len(all_sheets)>1:
            codes_sheet = self._get_code_sheet(all_sheets)
            worksheet = self._get_worksheet(all_sheets)
            row = codes_sheet.row_values(0)
            header, header_found = self._is_header_row(row)
            form_code = header[0]
            header = header[1:]
            empty_indexes = self.get_empty_indexes(header)
            header = self.remove_empty_indexes(header, empty_indexes)
            parsed_data = []

            for row_num in range(1, worksheet.nrows):
                row = worksheet.row_values(row_num)
                row = self._clean(row)
                row = self.remove_empty_indexes(row, empty_indexes)
                values = OrderedDict(zip(header, row))
                parsed_data.append((form_code, values))
            if not header_found:
                raise XlsParserInvalidHeaderFormatException()
            return parsed_data
Пример #3
0
 def _get_code_sheet(self, all_sheets):
     code_sheets = [
         work_sheet for work_sheet in all_sheets
         if work_sheet.title == 'codes' and work_sheet.max_column > 1
     ]
     if len(code_sheets):
         return code_sheets[0]
     raise CodeSheetMissingException()
Пример #4
0
 def _get_code_sheet(self, all_sheets):
     code_sheets = [
         work_sheet for work_sheet in all_sheets
         if work_sheet.name == 'codes' and len(work_sheet._cell_values)
     ]
     if len(code_sheets):
         return code_sheets[0]
     raise CodeSheetMissingException()