def __LoadSheet( self, sheet: xlrd.sheet.Sheet ) -> Tuple[PaymentsData, PaymentsDataErrors]: payments_data = PaymentsData(self.config) payments_data_err = PaymentsDataErrors() # Get column indexes email_col_idx = self._ColumnToIndex( self.config.GetValue(BotConfigTypes.PAYMENT_EMAIL_COL)) user_col_idx = self._ColumnToIndex( self.config.GetValue(BotConfigTypes.PAYMENT_USER_COL)) expiration_col_idx = self._ColumnToIndex( self.config.GetValue(BotConfigTypes.PAYMENT_EXPIRATION_COL)) # Read each row for i in range(sheet.nrows): # Skip header (first row) if i > 0: # Get cell values email = str(sheet.cell_value(i, email_col_idx)).strip() user = User.FromString( self.config, str(sheet.cell_value(i, user_col_idx)).strip()) expiration = sheet.cell_value(i, expiration_col_idx) # Skip invalid users if user.IsValid(): self.__AddPayment(i + 1, payments_data, payments_data_err, email, user, expiration) return payments_data, payments_data_err
def _read_sheet(sheet: xlrd.sheet.Sheet, exercise_name: str) -> List[Record]: """ Return all candidate records in given sheet """ records = [] # 5 is the candidate count for candidate_index in range(5): row = 5 + candidate_index * 2 evaluator_name = sheet.cell_value(5, 1) candidate_name = sheet.cell_value(row, 4).strip() team_name = _sanitize_team(sheet.cell_value(5, 2)) if not candidate_name: continue if candidate_name and not team_name: raise Exception("Error: No team name") if team_name and not evaluator_name: raise Exception("Error: No evaluator name") evaluation = Evaluation(evaluator_name=evaluator_name, exercise_name=exercise_name, learning_ability=_read_attribute( sheet, row, 5), personal=_read_attribute(sheet, row, 10), interpersonal=_read_attribute(sheet, row, 15), leader=_read_attribute(sheet, row, 20), summary=_read_attribute(sheet, row, 25)) records.append(Record(candidate_name, team_name, evaluation)) return records
def _parse_oakland_sheet(sheet: xlrd.sheet.Sheet, datemode: int): """Parse a single sheet of the Oakland excel file into a dataframe. Parameters ---------- sheet The Sheet object from the Book of the Oakland container moves. datemode The Book's datemode value (usually 0 or 1). Returns ------- pandas.DataFrame The DataFrame containing all the sheets concatenated together. """ # Assume the first 6 rows are just header, and verify that the columns are in order # date, full imports, full exports, total full, empty imports, empty expots, total empty # grand total keys = _verify_oakland_sheet(sheet) nrow = len(sheet.col(0)) dates = [] data = {k: [] for k in keys} for irow in range(3, nrow): year = sheet.cell_value(irow, 0) month = sheet.cell_value(irow, 1) if isinstance(month, str) and month == 'Annual Total': continue this_date = pd.to_datetime('{} {:.0f}'.format(month, year)) if this_date < pd.Timestamp(1990, 1, 1) or this_date > pd.Timestamp.now(): # This may catch some bad date parsing. I haven't had a problem with this, but want to check (in case they # change the format unexpectedly). raise ExcelParsingError('Unexpected date parsed (pre-1990)') dates.append(this_date) for k, icol in keys.items(): val = sheet.cell_value(irow, icol) if isinstance(val, str) and len(val) == 0: data[k].append(np.nan) else: data[k].append(val) dates = pd.DatetimeIndex(dates) colname_mapping = { 'Import Full': 'Full Imports', 'Export Full': 'Full Exports', 'Import Empty': 'Empty Imports', 'Export Empty': 'Empty Exports', 'Grand Total': 'Total TEUs' } return pd.DataFrame(data, index=dates).drop( columns=['Total Full', 'Total Empty']).rename(columns=colname_mapping)
def _get_sheet_data(self, sheet: xlrd.sheet.Sheet, col: int) -> (str, str, str, str): proto_type = sheet.cell_value(0, col) define_type = sheet.cell_value(1, col) # 定义的类型 name = sheet.cell_value(2, col) # 字段名 comment = str(sheet.cell_value(4, col)).replace('\n', '').replace('\r', '') # 注释 if comment != '': comment = f' @{comment}' return proto_type, define_type, name, comment
def _parse_target(self, sheet: xlrd.sheet.Sheet, key_col: dict, start_row: int, end_row: int, name: str): ''' :param sheet: sheet to be parsed :param key_col: map key_name to column order ex: {"ip": 1, "username": 2} :param start_row: set to 1 for single thread :param end_row: set to sheet.nrows for single thread :return: Flag: Bool, columns_processes: int or error msg ''' print( "thread parse target started. start_row: {}, end_row: {}, name: {}" .format(start_row, end_row, name)) try: thread_result = [] for row_no in range(start_row, end_row): row_dict = {} for key in key_col: row_dict[key] = sheet.cell_value(row_no, key_col[key]) thread_result.append(row_dict) self.result_lock.acquire() self.result += thread_result print("thread {} result:\n{}\n\n".format(name, thread_result)) self.result_lock.release() self.thread_state[name] = True return True, end_row - start_row except Exception as e: self.result_lock.acquire() self.thread_state[name] = True self.result_lock.release() return False, e
def _verify_oakland_sheet(sheet: xlrd.sheet.Sheet): """Check that a sheet in the Oakland container workbook is laid out as expected. Raises `ExcelParsingError` if not. """ keys = dict() reasons = [] _cols = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H') # row index, column index, expected contents, whether this column is a column name for the dataframe checks = [(2, 0, 'Year', False), (2, 1, 'Month', False), (2, 2, 'Import Full', True), (2, 3, 'Export Full', True), (2, 4, 'Total Full', True), (2, 5, 'Import Empty', True), (2, 6, 'Export Empty', True), (2, 7, 'Total Empty', True), (2, 8, 'Grand Total', True)] for r, c, val, is_key in checks: # Replace any whitespace with a single space (e.g. newlines) sheet_val = re.sub(r'\s+', ' ', sheet.cell_value(r, c)) if sheet_val != val: msg = '{}{} != {}'.format(_cols[c], r + 1, val) reasons.append(msg) elif is_key: keys[sheet_val] = c if len(reasons) > 0: msg = 'Unexpected sheet format ({})'.format(', '.join(reasons)) raise ExcelParsingError(msg) else: return keys
def _get_sheet_data(sh: xlrd.sheet.Sheet, datemode: int) -> pd.DataFrame: """Process a Microsoft Excel sheet, returning a Pandas DataFrame Args: sh: the sheet to be processed datemode: integer to pass as argument to _get_row_data() Returns: pd.DataFrame: all data in the given sheet with normalized names and types """ maturity = sh.cell_value(0, 1) if isinstance(maturity, float): maturity = datetime.datetime(*xlrd.xldate_as_tuple(maturity, datemode)) else: maturity = datetime.datetime.strptime(maturity, "%d/%m/%Y") bond, series = sh.name.rsplit(" ", maxsplit=1) bond = BONDS["aliases"][bond.replace("-", "").lower()] # Fix bonds names header = tuple(c.value for c in sh.row(1) if c.value != "") rows = (r for r in itertools.islice(sh.get_rows(), 2, None) if r[1].ctype != 0 and r[1].value != "") data = (_get_row_data(row, datemode) for row in rows) df = pd.DataFrame.from_records(data, columns=header) df = df.assign( MaturityDate=maturity, BondCode=sh.name, BondName=bond, BondSeries=series, ) return df
def find_in_sheet(val, sheet: xlrd.sheet.Sheet) -> Tuple[int, int]: """Return a tuple containing the (row, col) of first match searching row 0, then row 1, etc.""" for row in range(sheet.nrows): for col in range(sheet.ncols): if sheet.cell_value(row, col) == val: return row, col raise LookupError(f'Value {val} not found in sheet {sheet}')
def get_table_name(sheet_data: xlrd.sheet.Sheet): """ 直接获取table_name。表格的第一行第一列 :param sheet_data: :return: """ cell_content = sheet_data.cell_value(0, 0) return cell_content.splitlines()[0]
def _extract_data(sheet: xlrd.sheet.Sheet) -> DataTable: """ Helper function that extracts cell values from an xlrd sheet into a plain array """ cols = sheet.ncols rows = sheet.nrows return [[sheet.cell_value(row, col) for col in range(0, cols)] for row in range(0, rows)]
def transform_data(xlrd_sheet: xlrd.sheet.Sheet) -> io.StringIO: """ Function to transform the data into json and stores the json in a string buffer. The data is written as a list of rows from the excel sheet with the first row as headers. Returns the string buffer. Parameters ---------- data_frame: pd.DataFrame Dataframe object of the excel file. Outputted from the extract_data_from_excel function. Returns ------- string_buffer: StringIO Dataframe is converted into json string, writtern into a string buffer and returned. """ headers, col_idx = [], 0 while True: try: headers.append(xlrd_sheet.cell_value(0, col_idx)) col_idx += 1 except IndexError: break data, row_idx = [], 0 while True: try: data.append({ col: xlrd_sheet.cell_value(col_idx, row_idx) for col_idx, col in enumerate(headers) }) row_idx += 1 except IndexError: break return io.StringIO(json.dumps(data))
def _parse_sheet(self, sheet: xlrd.sheet.Sheet): result = [] index_key_map = {} for col in range(sheet.ncols): key = sheet.cell_value(0, col) if key in self.keys: index_key_map[key] = col if index_key_map: for row_no in range(1, sheet.nrows): row_dict = {} for key in index_key_map: if index_key_map.get(key, None) != None: row_dict[key] = sheet.cell_value( row_no, index_key_map[key]) else: row_dict[key] = None if row_dict: result.append(row_dict) return result else: return []
def _get_next(self, sheet: xlrd.sheet.Sheet, col: int, max: int = -1) -> int: col = col + 1 if max == -1: max = sheet.ncols if col >= max: return max proto_type = sheet.cell_value(0, col) while self._is_skip_col(proto_type): col = col + 1 if col >= max: break proto_type = sheet.cell_value(0, col) if col >= max: return max else: return col
def _parse_sheet(self, sheet: xlrd.sheet.Sheet): index_key_map = {} for col in range(sheet.ncols): key = sheet.cell_value(0, col) if key in self.keys: index_key_map[key] = col if self.no_threads == 1: self._parse_target(sheet=sheet, key_col=index_key_map, start_row=1, end_row=sheet.nrows, name="main") return add_rows = (sheet.nrows - 1) % self.no_threads no_rows = sheet.nrows - 1 - add_rows step = int(no_rows / self.no_threads) for i in range(1, no_rows + 1, step): end_row = i + step t = Thread(target=self._parse_target, kwargs={ "sheet": sheet, "key_col": index_key_map, "start_row": i, "end_row": end_row, "name": str(i) }, daemon=True) t.start() self.threads[str(i)] = t if add_rows: print("---------------------add_rows-----------------") t_add = Thread(target=self._parse_target, kwargs={ "sheet": sheet, "key_col": index_key_map, "start_row": no_rows + 1, "end_row": sheet.nrows, "name": "add_rows" }, daemon=True) t_add.start() self.threads[str(sheet.nrows + 1)] = t_add
def get_data_from_worksheet(worksheet: xlrd.sheet.Sheet): """берем матрицу данных из листа таблицы""" num_rows = worksheet.nrows - 1 num_cells = worksheet.ncols - 1 curr_row = -1 data = [] while curr_row < num_rows: row = [] curr_row += 1 curr_cell = -1 while curr_cell < num_cells: curr_cell += 1 cell_value = worksheet.cell_value(curr_row, curr_cell) row.append(cell_value) data.append(row) # если в таблице нет этих элементов значит пользователь пытается использовать другую таблицу if data[0] != ['с', 'по', 'название', 'описание']: return [] return data[1:]
def write_cs(self, sheet: xlrd.sheet.Sheet, sheetname): """ """ output_filename = os.path.normpath('{0}/{1}.cs'.format(self.output_path, sheetname)) with open(output_filename, 'w', encoding='utf-8') as targetf: targetf.write('using System.Collections;\n') targetf.write('using System.Collections.Generic;\n\n') if len(self.namespace) > 0: targetf.write('namespace {0}{1}\n\n'.format(self.namespace, "{")) targetf.write(' public class {0}{1} \n'.format(sheetname, "{")) for r in range(0, sheet.nrows): # write class name for c in range(0, sheet.ncols): # print ("Cell:", sheet.cell_value(rowx=r, colx=c) ) data = sheet.cell_value(rowx=r, colx=c) parts = data.partition('.') data_type = parts[0] data_real = parts[2] # if c == sheet.ncols-1: # sep='\n' if data_type == Excel2Class.TYPE_IARRAY: data_type = 'List<int>' if data_type == Excel2Class.TYPE_FARRAY: data_type = 'List<float>' if data_type == Excel2Class.TYPE_DARRAY: data_type = 'List<double>' if data_type == Excel2Class.TYPE_SARRAY: data_type = 'List<string>' elif data_type == Excel2Class.TYPE_IDIC: data_type = 'Dictionary<int,int>' elif data_type == Excel2Class.TYPE_FDIC: data_type = 'Dictionary<int,float>' elif data_type == Excel2Class.TYPE_DDIC: data_type = 'Dictionary<int,double>' elif data_type == Excel2Class.TYPE_SDIC: data_type = 'Dictionary<int,string>' targetf.write(' public {0} {1};\n'.format(data_type, data_real)) break if len(self.namespace) > 0: targetf.write(' }') targetf.write('\n}')
def write_cs(self, sheet: xlrd.sheet.Sheet, sheetname: str): """ """ output_filename = os.path.normpath('{0}/{1}.cs'.format(self.output_path, sheetname)) with open(output_filename, 'w', encoding='utf-8') as targetf: targetf.write('using System.Collections;\n') targetf.write('using System.Collections.Generic;\n\n') if len(self.namespace) > 0: targetf.write('namespace {0}{1}\n\n'.format(self.namespace, "{")) targetf.write(' public class {0}{1} \n'.format(sheetname, "{")) for r in range(0, sheet.nrows): # write class name for c in range(0, sheet.ncols): # print ("Cell:", sheet.cell_value(rowx=r, colx=c) ) data = sheet.cell_value(rowx=r, colx=c) data.strip() data_type = '' # field type data_real = '' # field name if '.' in data: parts = data.partition('.') # old stype int.id data_type = parts[0] data_real = parts[2] elif ':' in data: parts = data.partition(':') # new stype id:int data_type = parts[2] data_real = parts[0] # print(data_type, data_real) # if c == sheet.ncols-1: # sep='\n' if data_type == Excel2Class.TYPE_INT32: data_type = 'int' if data_type == Excel2Class.TYPE_INT64: data_type = 'System.Int64' if data_type == Excel2Class.TYPE_FLOAT: data_type = 'float' if data_type == Excel2Class.TYPE_DOUBLE: data_type = 'double' if data_type == Excel2Class.TYPE_Bool: data_type = 'bool' if data_type == Excel2Class.TYPE_STRING: data_type = 'string' if data_type == Excel2Class.TYPE_IARRAY or data_type == 'arr': data_type = 'List<int>' if data_type == Excel2Class.TYPE_FARRAY or data_type == 'farr': data_type = 'List<float>' if data_type == Excel2Class.TYPE_DARRAY or data_type == 'darr': data_type = 'List<double>' if data_type == Excel2Class.TYPE_SARRAY or data_type == 'sarr': data_type = 'List<string>' elif data_type == Excel2Class.TYPE_IDIC: data_type = 'Dictionary<int,int>' elif data_type == Excel2Class.TYPE_FDIC: data_type = 'Dictionary<int,float>' elif data_type == Excel2Class.TYPE_DDIC: data_type = 'Dictionary<int,double>' elif data_type == Excel2Class.TYPE_SDIC: data_type = 'Dictionary<int,string>' targetf.write(' public {0} {1};\n'.format(data_type, data_real)) break # only scan the first row if len(self.namespace) > 0: targetf.write(' }') targetf.write('\n}') print('output game info:', output_filename)
def diff_sheet(self, s1: xlrd.sheet.Sheet, s2: xlrd.sheet.Sheet): """ get sheet diff :param s1: sheet 1 :param s2: sheet 2 :return: sheet diff of s1 and s2 """ sheet_diff = { 'added_cols': [], 'removed_cols': [], 'modified_data': {}, } modified = False # diff header headers1 = [ str(v) for v in s1.row_values(self._header_row, start_colx=self._start_col) ] headers2 = [ str(v) for v in s2.row_values(self._header_row, start_colx=self._start_col) ] # may contain header with same name header_cols1, header_cols2 = dict(), dict() l1, l2 = len(headers1), len(headers2) for i in range(l1): h1 = headers1[i] if h1 not in header_cols1.keys(): header_cols1[h1] = list() header_cols1[h1].append(i) for i in range(l2): h2 = headers2[i] if h2 not in header_cols2.keys(): header_cols2[h2] = list() header_cols2[h2].append(i) removed_cols, kept_cols, added_cols = get_iter_diff( header_cols1.keys(), header_cols2.keys()) # please do not change col name or switch data frequently! if len(removed_cols) > 0: sheet_diff['removed_cols'] = [{ 'name': h, 'indices': header_cols1[h] } for h in removed_cols] modified = True if len(added_cols) > 0: sheet_diff['added_cols'] = [{ 'name': h, 'indices': header_cols2[h] } for h in added_cols] modified = True for h in kept_cols: cols1, cols2 = header_cols1[h], header_cols2[h] l1, l2 = len(cols1), len(cols2) if l1 > l2: sheet_diff['removed_cols'].append({ 'name': h, 'indices': cols1[l2 - l1:] }) header_cols1[h] = cols1[:l2] modified = True elif l1 < l2: sheet_diff['added_cols'].append({ 'name': h, 'indices': cols2[l1 - l2:] }) header_cols2[h] = cols2[:l1] modified = True # map cols cols1_header = dict() cols1_cols2 = dict() for header in header_cols1: if header in kept_cols: col1_indices = header_cols1[header] col2_indices = header_cols2[header] while len(col1_indices) > 0 and len(col2_indices) > 0: col_idx1 = col1_indices.pop() col_idx2 = col2_indices.pop() cols1_header[col_idx1] = header cols1_cols2[col_idx1] = col_idx2 indices1 = list(cols1_header.keys()) indices1.sort() d1, d2 = [], [] if self._start_row > s1.nrows: LOGGER.warn('Sheet %s: start row %d is larger than num rows %d!' % (s1.name, self._start_row, s1.nrows)) else: for i in range(self._start_row, s1.nrows): d1.append([str(s1.cell_value(i, c)) for c in indices1]) if self._start_row > s2.nrows: LOGGER.warn('Sheet %s: start row %d is larger then num rows %d!' % (s2.name, self._start_row, s2.nrows)) else: for i in range(self._start_row, s2.nrows): d2.append( [str(s2.cell_value(i, cols1_cols2[c])) for c in indices1]) # diff data data_diff = self.diff_data(d1, d2) if data_diff: modified = True data_diff['modified_cells'] = [ dict( d, **{ 'src_col': indices1[d['src_col']] + self._start_col, 'dest_col': cols1_cols2[indices1[d['dest_col']]] + self._start_col, }) for d in data_diff['modified_cells'] ] sheet_diff['modified_data'] = data_diff # +1 to all indices if using excel if modified and self._use_excel_indices: sheet_diff = ExcelDiffer._convert_idx_of_sheet_diff(sheet_diff) return sheet_diff if modified else None
def _get_cell_value(sheet: xlrd.sheet.Sheet, row: int, column: int) -> Any: cell_value = sheet.cell_value(row, column) if isinstance(cell_value, str) and not cell_value: cell_value = None return cell_value