def __LoadSheet(
            self, sheet: xlrd.sheet.Sheet
    ) -> Tuple[PaymentsData, PaymentsDataErrors]:
        payments_data = PaymentsData(self.config)
        payments_data_err = PaymentsDataErrors()

        # Get column indexes
        email_col_idx = self._ColumnToIndex(
            self.config.GetValue(BotConfigTypes.PAYMENT_EMAIL_COL))
        user_col_idx = self._ColumnToIndex(
            self.config.GetValue(BotConfigTypes.PAYMENT_USER_COL))
        expiration_col_idx = self._ColumnToIndex(
            self.config.GetValue(BotConfigTypes.PAYMENT_EXPIRATION_COL))

        # Read each row
        for i in range(sheet.nrows):
            # Skip header (first row)
            if i > 0:
                # Get cell values
                email = str(sheet.cell_value(i, email_col_idx)).strip()
                user = User.FromString(
                    self.config,
                    str(sheet.cell_value(i, user_col_idx)).strip())
                expiration = sheet.cell_value(i, expiration_col_idx)

                # Skip invalid users
                if user.IsValid():
                    self.__AddPayment(i + 1, payments_data, payments_data_err,
                                      email, user, expiration)

        return payments_data, payments_data_err
Exemplo n.º 2
0
def _read_sheet(sheet: xlrd.sheet.Sheet, exercise_name: str) -> List[Record]:
    """
    Return all candidate records in given sheet
    """
    records = []
    # 5 is the candidate count
    for candidate_index in range(5):
        row = 5 + candidate_index * 2
        evaluator_name = sheet.cell_value(5, 1)
        candidate_name = sheet.cell_value(row, 4).strip()
        team_name = _sanitize_team(sheet.cell_value(5, 2))
        if not candidate_name:
            continue
        if candidate_name and not team_name:
            raise Exception("Error: No team name")
        if team_name and not evaluator_name:
            raise Exception("Error: No evaluator name")
        evaluation = Evaluation(evaluator_name=evaluator_name,
                                exercise_name=exercise_name,
                                learning_ability=_read_attribute(
                                    sheet, row, 5),
                                personal=_read_attribute(sheet, row, 10),
                                interpersonal=_read_attribute(sheet, row, 15),
                                leader=_read_attribute(sheet, row, 20),
                                summary=_read_attribute(sheet, row, 25))
        records.append(Record(candidate_name, team_name, evaluation))
    return records
Exemplo n.º 3
0
def _parse_oakland_sheet(sheet: xlrd.sheet.Sheet, datemode: int):
    """Parse a single sheet of the Oakland excel file into a dataframe.

    Parameters
    ----------
    sheet
        The Sheet object from the Book of the Oakland container moves.

    datemode
        The Book's datemode value (usually 0 or 1).

    Returns
    -------
    pandas.DataFrame
        The DataFrame containing all the sheets concatenated together.
    """
    # Assume the first 6 rows are just header, and verify that the columns are in order
    # date, full imports, full exports, total full, empty imports, empty expots, total empty
    # grand total
    keys = _verify_oakland_sheet(sheet)

    nrow = len(sheet.col(0))
    dates = []
    data = {k: [] for k in keys}
    for irow in range(3, nrow):
        year = sheet.cell_value(irow, 0)
        month = sheet.cell_value(irow, 1)
        if isinstance(month, str) and month == 'Annual Total':
            continue

        this_date = pd.to_datetime('{} {:.0f}'.format(month, year))
        if this_date < pd.Timestamp(1990, 1,
                                    1) or this_date > pd.Timestamp.now():
            # This may catch some bad date parsing. I haven't had a problem with this, but want to check (in case they
            # change the format unexpectedly).
            raise ExcelParsingError('Unexpected date parsed (pre-1990)')

        dates.append(this_date)
        for k, icol in keys.items():
            val = sheet.cell_value(irow, icol)
            if isinstance(val, str) and len(val) == 0:
                data[k].append(np.nan)
            else:
                data[k].append(val)

    dates = pd.DatetimeIndex(dates)
    colname_mapping = {
        'Import Full': 'Full Imports',
        'Export Full': 'Full Exports',
        'Import Empty': 'Empty Imports',
        'Export Empty': 'Empty Exports',
        'Grand Total': 'Total TEUs'
    }
    return pd.DataFrame(data, index=dates).drop(
        columns=['Total Full', 'Total Empty']).rename(columns=colname_mapping)
Exemplo n.º 4
0
    def _get_sheet_data(self, sheet: xlrd.sheet.Sheet,
                        col: int) -> (str, str, str, str):
        proto_type = sheet.cell_value(0, col)
        define_type = sheet.cell_value(1, col)  # 定义的类型
        name = sheet.cell_value(2, col)  # 字段名
        comment = str(sheet.cell_value(4, col)).replace('\n',
                                                        '').replace('\r',
                                                                    '')  # 注释

        if comment != '':
            comment = f' @{comment}'

        return proto_type, define_type, name, comment
Exemplo n.º 5
0
    def _parse_target(self, sheet: xlrd.sheet.Sheet, key_col: dict,
                      start_row: int, end_row: int, name: str):
        '''
        :param sheet: sheet to be parsed
        :param key_col: map key_name to column order ex: {"ip": 1, "username": 2}
        :param start_row: set to 1 for single thread
        :param end_row: set to sheet.nrows for single thread
        :return: Flag: Bool, columns_processes: int or error msg
        '''
        print(
            "thread parse target started. start_row: {}, end_row: {}, name: {}"
            .format(start_row, end_row, name))
        try:
            thread_result = []
            for row_no in range(start_row, end_row):
                row_dict = {}
                for key in key_col:
                    row_dict[key] = sheet.cell_value(row_no, key_col[key])
                thread_result.append(row_dict)

            self.result_lock.acquire()
            self.result += thread_result
            print("thread {} result:\n{}\n\n".format(name, thread_result))
            self.result_lock.release()
            self.thread_state[name] = True
            return True, end_row - start_row
        except Exception as e:
            self.result_lock.acquire()
            self.thread_state[name] = True
            self.result_lock.release()
            return False, e
Exemplo n.º 6
0
def _verify_oakland_sheet(sheet: xlrd.sheet.Sheet):
    """Check that a sheet in the Oakland container workbook is laid out as expected.

    Raises `ExcelParsingError` if not.
    """
    keys = dict()
    reasons = []
    _cols = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H')
    # row index, column index, expected contents, whether this column is a column name for the dataframe
    checks = [(2, 0, 'Year', False), (2, 1, 'Month', False),
              (2, 2, 'Import Full', True), (2, 3, 'Export Full', True),
              (2, 4, 'Total Full', True), (2, 5, 'Import Empty', True),
              (2, 6, 'Export Empty', True), (2, 7, 'Total Empty', True),
              (2, 8, 'Grand Total', True)]
    for r, c, val, is_key in checks:
        # Replace any whitespace with a single space (e.g. newlines)
        sheet_val = re.sub(r'\s+', ' ', sheet.cell_value(r, c))
        if sheet_val != val:
            msg = '{}{} != {}'.format(_cols[c], r + 1, val)
            reasons.append(msg)
        elif is_key:
            keys[sheet_val] = c

    if len(reasons) > 0:
        msg = 'Unexpected sheet format ({})'.format(', '.join(reasons))
        raise ExcelParsingError(msg)
    else:
        return keys
Exemplo n.º 7
0
def _get_sheet_data(sh: xlrd.sheet.Sheet, datemode: int) -> pd.DataFrame:
    """Process a Microsoft Excel sheet, returning a Pandas DataFrame

    Args:
        sh: the sheet to be processed
        datemode: integer to pass as argument to _get_row_data()

    Returns:
        pd.DataFrame: all data in the given sheet with normalized names and
            types
    """
    maturity = sh.cell_value(0, 1)
    if isinstance(maturity, float):
        maturity = datetime.datetime(*xlrd.xldate_as_tuple(maturity, datemode))
    else:
        maturity = datetime.datetime.strptime(maturity, "%d/%m/%Y")
    bond, series = sh.name.rsplit(" ", maxsplit=1)
    bond = BONDS["aliases"][bond.replace("-", "").lower()]  # Fix bonds names
    header = tuple(c.value for c in sh.row(1) if c.value != "")
    rows = (r for r in itertools.islice(sh.get_rows(), 2, None)
            if r[1].ctype != 0 and r[1].value != "")
    data = (_get_row_data(row, datemode) for row in rows)
    df = pd.DataFrame.from_records(data, columns=header)
    df = df.assign(
        MaturityDate=maturity,
        BondCode=sh.name,
        BondName=bond,
        BondSeries=series,
    )
    return df
Exemplo n.º 8
0
def find_in_sheet(val, sheet: xlrd.sheet.Sheet) -> Tuple[int, int]:
    """Return a tuple containing the (row, col) of first match searching row 0, then row 1, etc."""
    for row in range(sheet.nrows):
        for col in range(sheet.ncols):
            if sheet.cell_value(row, col) == val:
                return row, col
    raise LookupError(f'Value {val} not found in sheet {sheet}')
Exemplo n.º 9
0
def get_table_name(sheet_data: xlrd.sheet.Sheet):
    """
    直接获取table_name。表格的第一行第一列
    :param sheet_data:
    :return:
    """
    cell_content = sheet_data.cell_value(0, 0)
    return cell_content.splitlines()[0]
Exemplo n.º 10
0
    def _extract_data(sheet: xlrd.sheet.Sheet) -> DataTable:
        """
        Helper function that extracts cell values from an xlrd sheet into a plain array
        """

        cols = sheet.ncols
        rows = sheet.nrows
        return [[sheet.cell_value(row, col) for col in range(0, cols)]
                for row in range(0, rows)]
Exemplo n.º 11
0
def transform_data(xlrd_sheet: xlrd.sheet.Sheet) -> io.StringIO:
    """
    Function to transform the data into json and stores the json
    in a string buffer. The data is written as a list of rows from the
    excel sheet with the first row as headers. Returns the string buffer.

    Parameters
    ----------
    data_frame: pd.DataFrame
        Dataframe object of the excel file. Outputted from the 
        extract_data_from_excel function.

    Returns
    -------
    string_buffer: StringIO
        Dataframe is converted into json string, writtern into a string
        buffer and returned.

    """
    headers, col_idx = [], 0
    while True:
        try:
            headers.append(xlrd_sheet.cell_value(0, col_idx))
            col_idx += 1
        except IndexError:
            break

    data, row_idx = [], 0
    while True:
        try:
            data.append({
                col: xlrd_sheet.cell_value(col_idx, row_idx) for col_idx, col in enumerate(headers)
            })            
            row_idx += 1
        except IndexError:
            break

    return io.StringIO(json.dumps(data))
Exemplo n.º 12
0
    def _parse_sheet(self, sheet: xlrd.sheet.Sheet):
        result = []
        index_key_map = {}
        for col in range(sheet.ncols):
            key = sheet.cell_value(0, col)
            if key in self.keys:
                index_key_map[key] = col

        if index_key_map:
            for row_no in range(1, sheet.nrows):
                row_dict = {}
                for key in index_key_map:
                    if index_key_map.get(key, None) != None:
                        row_dict[key] = sheet.cell_value(
                            row_no, index_key_map[key])
                    else:
                        row_dict[key] = None
                if row_dict:
                    result.append(row_dict)

            return result
        else:
            return []
Exemplo n.º 13
0
    def _get_next(self,
                  sheet: xlrd.sheet.Sheet,
                  col: int,
                  max: int = -1) -> int:
        col = col + 1
        if max == -1:
            max = sheet.ncols

        if col >= max:
            return max

        proto_type = sheet.cell_value(0, col)

        while self._is_skip_col(proto_type):
            col = col + 1
            if col >= max:
                break

            proto_type = sheet.cell_value(0, col)

        if col >= max:
            return max
        else:
            return col
Exemplo n.º 14
0
    def _parse_sheet(self, sheet: xlrd.sheet.Sheet):
        index_key_map = {}
        for col in range(sheet.ncols):
            key = sheet.cell_value(0, col)
            if key in self.keys:
                index_key_map[key] = col

        if self.no_threads == 1:
            self._parse_target(sheet=sheet,
                               key_col=index_key_map,
                               start_row=1,
                               end_row=sheet.nrows,
                               name="main")
            return

        add_rows = (sheet.nrows - 1) % self.no_threads
        no_rows = sheet.nrows - 1 - add_rows
        step = int(no_rows / self.no_threads)
        for i in range(1, no_rows + 1, step):
            end_row = i + step
            t = Thread(target=self._parse_target,
                       kwargs={
                           "sheet": sheet,
                           "key_col": index_key_map,
                           "start_row": i,
                           "end_row": end_row,
                           "name": str(i)
                       },
                       daemon=True)
            t.start()
            self.threads[str(i)] = t

        if add_rows:
            print("---------------------add_rows-----------------")
            t_add = Thread(target=self._parse_target,
                           kwargs={
                               "sheet": sheet,
                               "key_col": index_key_map,
                               "start_row": no_rows + 1,
                               "end_row": sheet.nrows,
                               "name": "add_rows"
                           },
                           daemon=True)
            t_add.start()
            self.threads[str(sheet.nrows + 1)] = t_add
Exemplo n.º 15
0
def get_data_from_worksheet(worksheet: xlrd.sheet.Sheet):
    """берем матрицу данных из листа таблицы"""
    num_rows = worksheet.nrows - 1
    num_cells = worksheet.ncols - 1
    curr_row = -1
    data = []
    while curr_row < num_rows:
        row = []
        curr_row += 1
        curr_cell = -1
        while curr_cell < num_cells:
            curr_cell += 1
            cell_value = worksheet.cell_value(curr_row, curr_cell)
            row.append(cell_value)
        data.append(row)
    # если в таблице нет этих элементов значит пользователь пытается использовать другую таблицу
    if data[0] != ['с', 'по', 'название', 'описание']:
        return []
    return data[1:]
Exemplo n.º 16
0
    def write_cs(self, sheet: xlrd.sheet.Sheet, sheetname):
        """
        """
        output_filename = os.path.normpath('{0}/{1}.cs'.format(self.output_path, sheetname))

        with open(output_filename, 'w', encoding='utf-8') as targetf:
            targetf.write('using System.Collections;\n')
            targetf.write('using System.Collections.Generic;\n\n')
            if len(self.namespace) > 0:
                targetf.write('namespace {0}{1}\n\n'.format(self.namespace, "{"))
            targetf.write('    public class {0}{1} \n'.format(sheetname, "{"))
                   
            for r in range(0, sheet.nrows):   # write class name
                for c in range(0, sheet.ncols):
                    # print ("Cell:", sheet.cell_value(rowx=r, colx=c) )
                    data = sheet.cell_value(rowx=r, colx=c)
                    parts = data.partition('.')
                    data_type = parts[0]
                    data_real = parts[2]
                    # if c == sheet.ncols-1:
                    #    sep='\n'
                    if data_type == Excel2Class.TYPE_IARRAY:
                        data_type = 'List<int>'
                    if data_type == Excel2Class.TYPE_FARRAY:
                        data_type = 'List<float>'
                    if data_type == Excel2Class.TYPE_DARRAY:
                        data_type = 'List<double>'
                    if data_type == Excel2Class.TYPE_SARRAY:
                        data_type = 'List<string>'
                    elif data_type == Excel2Class.TYPE_IDIC:
                        data_type = 'Dictionary<int,int>'
                    elif data_type == Excel2Class.TYPE_FDIC:
                        data_type = 'Dictionary<int,float>'
                    elif data_type == Excel2Class.TYPE_DDIC:
                        data_type = 'Dictionary<int,double>'
                    elif data_type == Excel2Class.TYPE_SDIC:
                        data_type = 'Dictionary<int,string>'
                    targetf.write('        public {0} {1};\n'.format(data_type, data_real))
                break
            if len(self.namespace) > 0:
                targetf.write('    }')
            targetf.write('\n}')
Exemplo n.º 17
0
    def write_cs(self, sheet: xlrd.sheet.Sheet, sheetname: str):
        """
        """
        output_filename = os.path.normpath('{0}/{1}.cs'.format(self.output_path, sheetname))

        with open(output_filename, 'w', encoding='utf-8') as targetf:
            targetf.write('using System.Collections;\n')
            targetf.write('using System.Collections.Generic;\n\n')
            if len(self.namespace) > 0:
                targetf.write('namespace {0}{1}\n\n'.format(self.namespace, "{"))
            targetf.write('    public class {0}{1} \n'.format(sheetname, "{"))
                   
            for r in range(0, sheet.nrows):   # write class name
                for c in range(0, sheet.ncols):
                    # print ("Cell:", sheet.cell_value(rowx=r, colx=c) )
                    data = sheet.cell_value(rowx=r, colx=c)
                    data.strip()
                    data_type = ''  # field type
                    data_real = ''  # field name
                    if '.' in data:
                        parts = data.partition('.')  # old stype int.id
                        data_type = parts[0]
                        data_real = parts[2]
                    elif ':' in data:
                        parts = data.partition(':')  # new stype id:int
                        data_type = parts[2]
                        data_real = parts[0]
                    # print(data_type, data_real)
                    # if c == sheet.ncols-1:
                    #    sep='\n'
                    if data_type == Excel2Class.TYPE_INT32:
                        data_type = 'int'
                    if data_type == Excel2Class.TYPE_INT64:
                        data_type = 'System.Int64'
                    if data_type == Excel2Class.TYPE_FLOAT:
                        data_type = 'float'
                    if data_type == Excel2Class.TYPE_DOUBLE:
                        data_type = 'double'
                    if data_type == Excel2Class.TYPE_Bool:
                        data_type = 'bool'
                    if data_type == Excel2Class.TYPE_STRING:
                        data_type = 'string'
                    if data_type == Excel2Class.TYPE_IARRAY or data_type == 'arr':
                        data_type = 'List<int>'
                    if data_type == Excel2Class.TYPE_FARRAY or data_type == 'farr':
                        data_type = 'List<float>'
                    if data_type == Excel2Class.TYPE_DARRAY or data_type == 'darr':
                        data_type = 'List<double>'
                    if data_type == Excel2Class.TYPE_SARRAY or data_type == 'sarr':
                        data_type = 'List<string>'
                    elif data_type == Excel2Class.TYPE_IDIC:
                        data_type = 'Dictionary<int,int>'
                    elif data_type == Excel2Class.TYPE_FDIC:
                        data_type = 'Dictionary<int,float>'
                    elif data_type == Excel2Class.TYPE_DDIC:
                        data_type = 'Dictionary<int,double>'
                    elif data_type == Excel2Class.TYPE_SDIC:
                        data_type = 'Dictionary<int,string>'
                    targetf.write('        public {0} {1};\n'.format(data_type, data_real))

                break  # only scan the first row
            if len(self.namespace) > 0:
                targetf.write('    }')
            targetf.write('\n}')
            print('output game info:', output_filename)
Exemplo n.º 18
0
 def diff_sheet(self, s1: xlrd.sheet.Sheet, s2: xlrd.sheet.Sheet):
     """
     get sheet diff
     :param s1: sheet 1
     :param s2: sheet 2
     :return: sheet diff of s1 and s2
     """
     sheet_diff = {
         'added_cols': [],
         'removed_cols': [],
         'modified_data': {},
     }
     modified = False
     # diff header
     headers1 = [
         str(v) for v in s1.row_values(self._header_row,
                                       start_colx=self._start_col)
     ]
     headers2 = [
         str(v) for v in s2.row_values(self._header_row,
                                       start_colx=self._start_col)
     ]
     # may contain header with same name
     header_cols1, header_cols2 = dict(), dict()
     l1, l2 = len(headers1), len(headers2)
     for i in range(l1):
         h1 = headers1[i]
         if h1 not in header_cols1.keys():
             header_cols1[h1] = list()
         header_cols1[h1].append(i)
     for i in range(l2):
         h2 = headers2[i]
         if h2 not in header_cols2.keys():
             header_cols2[h2] = list()
         header_cols2[h2].append(i)
     removed_cols, kept_cols, added_cols = get_iter_diff(
         header_cols1.keys(), header_cols2.keys())
     # please do not change col name or switch data frequently!
     if len(removed_cols) > 0:
         sheet_diff['removed_cols'] = [{
             'name': h,
             'indices': header_cols1[h]
         } for h in removed_cols]
         modified = True
     if len(added_cols) > 0:
         sheet_diff['added_cols'] = [{
             'name': h,
             'indices': header_cols2[h]
         } for h in added_cols]
         modified = True
     for h in kept_cols:
         cols1, cols2 = header_cols1[h], header_cols2[h]
         l1, l2 = len(cols1), len(cols2)
         if l1 > l2:
             sheet_diff['removed_cols'].append({
                 'name': h,
                 'indices': cols1[l2 - l1:]
             })
             header_cols1[h] = cols1[:l2]
             modified = True
         elif l1 < l2:
             sheet_diff['added_cols'].append({
                 'name': h,
                 'indices': cols2[l1 - l2:]
             })
             header_cols2[h] = cols2[:l1]
             modified = True
     # map cols
     cols1_header = dict()
     cols1_cols2 = dict()
     for header in header_cols1:
         if header in kept_cols:
             col1_indices = header_cols1[header]
             col2_indices = header_cols2[header]
             while len(col1_indices) > 0 and len(col2_indices) > 0:
                 col_idx1 = col1_indices.pop()
                 col_idx2 = col2_indices.pop()
                 cols1_header[col_idx1] = header
                 cols1_cols2[col_idx1] = col_idx2
     indices1 = list(cols1_header.keys())
     indices1.sort()
     d1, d2 = [], []
     if self._start_row > s1.nrows:
         LOGGER.warn('Sheet %s: start row %d is larger than num rows %d!' %
                     (s1.name, self._start_row, s1.nrows))
     else:
         for i in range(self._start_row, s1.nrows):
             d1.append([str(s1.cell_value(i, c)) for c in indices1])
     if self._start_row > s2.nrows:
         LOGGER.warn('Sheet %s: start row %d is larger then num rows %d!' %
                     (s2.name, self._start_row, s2.nrows))
     else:
         for i in range(self._start_row, s2.nrows):
             d2.append(
                 [str(s2.cell_value(i, cols1_cols2[c])) for c in indices1])
     # diff data
     data_diff = self.diff_data(d1, d2)
     if data_diff:
         modified = True
         data_diff['modified_cells'] = [
             dict(
                 d, **{
                     'src_col':
                     indices1[d['src_col']] + self._start_col,
                     'dest_col':
                     cols1_cols2[indices1[d['dest_col']]] + self._start_col,
                 }) for d in data_diff['modified_cells']
         ]
         sheet_diff['modified_data'] = data_diff
     # +1 to all indices if using excel
     if modified and self._use_excel_indices:
         sheet_diff = ExcelDiffer._convert_idx_of_sheet_diff(sheet_diff)
     return sheet_diff if modified else None
Exemplo n.º 19
0
def _get_cell_value(sheet: xlrd.sheet.Sheet, row: int, column: int) -> Any:
    cell_value = sheet.cell_value(row, column)
    if isinstance(cell_value, str) and not cell_value:
        cell_value = None
    return cell_value