def get_head_line(sheet_data: xlrd.sheet.Sheet): """ 获取第一行cell数据列表 :param sheet_data: :return: """ return sheet_data.row_values(0, 0, sheet_data.ncols)
def diff_sheet(self, s1: xlrd.sheet.Sheet, s2: xlrd.sheet.Sheet): """ get sheet diff :param s1: sheet 1 :param s2: sheet 2 :return: sheet diff of s1 and s2 """ sheet_diff = { 'added_cols': [], 'removed_cols': [], 'modified_data': {}, } modified = False # diff header headers1 = [ str(v) for v in s1.row_values(self._header_row, start_colx=self._start_col) ] headers2 = [ str(v) for v in s2.row_values(self._header_row, start_colx=self._start_col) ] # may contain header with same name header_cols1, header_cols2 = dict(), dict() l1, l2 = len(headers1), len(headers2) for i in range(l1): h1 = headers1[i] if h1 not in header_cols1.keys(): header_cols1[h1] = list() header_cols1[h1].append(i) for i in range(l2): h2 = headers2[i] if h2 not in header_cols2.keys(): header_cols2[h2] = list() header_cols2[h2].append(i) removed_cols, kept_cols, added_cols = get_iter_diff( header_cols1.keys(), header_cols2.keys()) # please do not change col name or switch data frequently! if len(removed_cols) > 0: sheet_diff['removed_cols'] = [{ 'name': h, 'indices': header_cols1[h] } for h in removed_cols] modified = True if len(added_cols) > 0: sheet_diff['added_cols'] = [{ 'name': h, 'indices': header_cols2[h] } for h in added_cols] modified = True for h in kept_cols: cols1, cols2 = header_cols1[h], header_cols2[h] l1, l2 = len(cols1), len(cols2) if l1 > l2: sheet_diff['removed_cols'].append({ 'name': h, 'indices': cols1[l2 - l1:] }) header_cols1[h] = cols1[:l2] modified = True elif l1 < l2: sheet_diff['added_cols'].append({ 'name': h, 'indices': cols2[l1 - l2:] }) header_cols2[h] = cols2[:l1] modified = True # map cols cols1_header = dict() cols1_cols2 = dict() for header in header_cols1: if header in kept_cols: col1_indices = header_cols1[header] col2_indices = header_cols2[header] while len(col1_indices) > 0 and len(col2_indices) > 0: col_idx1 = col1_indices.pop() col_idx2 = col2_indices.pop() cols1_header[col_idx1] = header cols1_cols2[col_idx1] = col_idx2 indices1 = list(cols1_header.keys()) indices1.sort() d1, d2 = [], [] if self._start_row > s1.nrows: LOGGER.warn('Sheet %s: start row %d is larger than num rows %d!' % (s1.name, self._start_row, s1.nrows)) else: for i in range(self._start_row, s1.nrows): d1.append([str(s1.cell_value(i, c)) for c in indices1]) if self._start_row > s2.nrows: LOGGER.warn('Sheet %s: start row %d is larger then num rows %d!' % (s2.name, self._start_row, s2.nrows)) else: for i in range(self._start_row, s2.nrows): d2.append( [str(s2.cell_value(i, cols1_cols2[c])) for c in indices1]) # diff data data_diff = self.diff_data(d1, d2) if data_diff: modified = True data_diff['modified_cells'] = [ dict( d, **{ 'src_col': indices1[d['src_col']] + self._start_col, 'dest_col': cols1_cols2[indices1[d['dest_col']]] + self._start_col, }) for d in data_diff['modified_cells'] ] sheet_diff['modified_data'] = data_diff # +1 to all indices if using excel if modified and self._use_excel_indices: sheet_diff = ExcelDiffer._convert_idx_of_sheet_diff(sheet_diff) return sheet_diff if modified else None