def parse(self, file_contents): assert file_contents is not None xlsx_file = StringIO.StringIO(file_contents) workbook = load_workbook(xlsx_file, use_iterators=True) all_sheets = workbook.worksheets if len(all_sheets) == 1: raise CodeSheetMissingException() elif len(all_sheets) > 1: codes_sheet = self._get_code_sheet(all_sheets) worksheet = self._get_worksheet(workbook.worksheets) rows = [] for cs in codes_sheet.iter_rows(): rows = [self._get_value(x.value) for x in cs] header, header_found = self._is_header_row(rows) parsed_data = [] form_code = REGISTRATION_FORM_CODE header = header[1:] for row in worksheet.iter_rows(row_offset=1): row_values = [self._get_value(x.value) for x in row] values = dict(zip(header, row_values)) values.update({"t": "reporter"}) parsed_data.append((form_code, values)) if not header_found: raise XlsParserInvalidHeaderFormatException() return parsed_data
def parse(self, xls_contents): assert xls_contents is not None workbook = xlrd.open_workbook(file_contents=xls_contents) all_sheets = workbook.sheets() if len(all_sheets) == 1: raise CodeSheetMissingException() elif len(all_sheets)>1: codes_sheet = self._get_code_sheet(all_sheets) worksheet = self._get_worksheet(all_sheets) row = codes_sheet.row_values(0) header, header_found = self._is_header_row(row) form_code = header[0] header = header[1:] empty_indexes = self.get_empty_indexes(header) header = self.remove_empty_indexes(header, empty_indexes) parsed_data = [] for row_num in range(1, worksheet.nrows): row = worksheet.row_values(row_num) row = self._clean(row) row = self.remove_empty_indexes(row, empty_indexes) values = OrderedDict(zip(header, row)) parsed_data.append((form_code, values)) if not header_found: raise XlsParserInvalidHeaderFormatException() return parsed_data
def parse(self, xls_contents): assert xls_contents is not None workbook = xlrd.open_workbook(file_contents=xls_contents) worksheet = self._get_worksheet(workbook.sheets()) codes_sheet = self._get_code_sheet(workbook.sheets()) parsed_data = [] row = codes_sheet.row_values(0) header, header_found = self._is_header_row(row) if row[0] != 'reg': raise Exception("Invalid datasender excel imported") form_code = REGISTRATION_FORM_CODE header = header[1:] for row_num in range(1, worksheet.nrows): row = worksheet.row_values(row_num) row = self._clean(row) values = dict(zip(header, row)) values.update({"t": "reporter"}) parsed_data.append((form_code, values)) if not header_found: raise XlsParserInvalidHeaderFormatException() return parsed_data
def parse(self, xls_contents): assert xls_contents is not None workbook = xlrd.open_workbook(file_contents=xls_contents) worksheet = workbook.sheets()[0] header_found = False header = None parsedData = [] for row_num in range(worksheet.nrows): row = worksheet.row_values(row_num) if not header_found: header, header_found = self._is_header_row(row) continue if self._is_empty(row): continue row = self._clean(row) row_dict = dict(zip(header, row)) form_code, values = (row_dict.pop(header[0]).lower(), row_dict) parsedData.append((form_code, values)) if not header_found: raise XlsParserInvalidHeaderFormatException() return parsedData