def get_cols(self, lines): """Initialize the header Column objects from the table ``lines``. This is a specialized get_cols for the RDB type: Line 0: RDB col names Line 1: RDB col definitions Line 2+: RDB data rows :param lines: list of table lines :returns: None """ header_lines = self.process_lines(lines) # this is a generator header_vals_list = [ hl for _, hl in zip(range(2), self.splitter(header_lines)) ] if len(header_vals_list) != 2: raise ValueError('RDB header requires 2 lines') self.names, raw_types = header_vals_list if len(self.names) != len(raw_types): raise ValueError( 'RDB header mismatch between number of column names and column types' ) if any(not re.match(r'\d*(N|S)$', x, re.IGNORECASE) for x in raw_types): raise ValueError( 'RDB types definitions do not all match [num](N|S): %s' % raw_types) self._set_cols_from_names() for col, raw_type in zip(self.cols, raw_types): col.raw_type = raw_type col.type = self.get_col_type(col)
def get_cols(self, lines): """Initialize the header Column objects from the table ``lines``. This is a specialized get_cols for the RDB type: Line 0: RDB col names Line 1: RDB col definitions Line 2+: RDB data rows :param lines: list of table lines :returns: None """ header_lines = self.process_lines(lines) # this is a generator header_vals_list = [hl for _, hl in zip(range(2), self.splitter(header_lines))] if len(header_vals_list) != 2: raise ValueError('RDB header requires 2 lines') self.names, raw_types = header_vals_list if len(self.names) != len(raw_types): raise ValueError('RDB header mismatch between number of column names and column types') if any(not re.match(r'\d*(N|S)$', x, re.IGNORECASE) for x in raw_types): raise ValueError('RDB types definitions do not all match [num](N|S): %s' % raw_types) self._set_cols_from_names() for col, raw_type in zip(self.cols, raw_types): col.raw_type = raw_type col.type = self.get_col_type(col)
def _guess(table, read_kwargs): """Try to read the table using various sets of keyword args. First try the original args supplied in the read() call. Then try the standard guess keyword args. For each key/val pair specified explicitly in the read() call make sure that if there is a corresponding definition in the guess then it must have the same val. If not then skip this guess.""" # First try guessing for guess_kwargs in [read_kwargs.copy()] + _get_guess_kwargs_list(): for key, val in read_kwargs.items(): # Do guess_kwargs.update(read_kwargs) except that if guess_args has # a conflicting key/val pair then skip this guess entirely. if key not in guess_kwargs: guess_kwargs[key] = val elif val != guess_kwargs[key]: continue try: reader = get_reader(**guess_kwargs) dat = reader.read(table) # When guessing impose additional requirements on column names and number of cols bads = [" ", ",", "|", "\t", "'", '"'] if (len(reader.cols) <= 1 or any(_is_number(col.name) or len(col.name) == 0 or col.name[0] in bads or col.name[-1] in bads for col in reader.cols)): raise ValueError return dat except (core.InconsistentTableError, ValueError, TypeError): pass else: # failed all guesses, try the original read_kwargs without column requirements try: reader = get_reader(**read_kwargs) return reader.read(table) except (core.InconsistentTableError, ValueError): raise core.InconsistentTableError('Unable to read table with guess=True.')
def _guess(table, read_kwargs): """Try to read the table using various sets of keyword args. First try the original args supplied in the read() call. Then try the standard guess keyword args. For each key/val pair specified explicitly in the read() call make sure that if there is a corresponding definition in the guess then it must have the same val. If not then skip this guess.""" # Keep a trace of all failed guesses kwarg failed_kwargs = [] # First try guessing for guess_kwargs in [read_kwargs.copy()] + _get_guess_kwargs_list(): guess_kwargs_ok = True # guess_kwargs are consistent with user_kwargs? for key, val in read_kwargs.items(): # Do guess_kwargs.update(read_kwargs) except that if guess_args has # a conflicting key/val pair then skip this guess entirely. if key not in guess_kwargs: guess_kwargs[key] = val elif val != guess_kwargs[key]: guess_kwargs_ok = False break if not guess_kwargs_ok: # User-supplied kwarg is inconsistent with the guess-supplied kwarg, e.g. # user supplies delimiter="|" but the guess wants to try delimiter=" ", # so skip the guess entirely. continue try: reader = get_reader(**guess_kwargs) dat = reader.read(table) # When guessing impose additional requirements on column names and number of cols bads = [" ", ",", "|", "\t", "'", '"'] if (len(reader.cols) <= 1 or any(_is_number(col.name) or len(col.name) == 0 or col.name[0] in bads or col.name[-1] in bads for col in reader.cols)): raise ValueError return dat except (core.InconsistentTableError, ValueError, TypeError): failed_kwargs.append(guess_kwargs) pass else: # failed all guesses, try the original read_kwargs without column requirements try: reader = get_reader(**read_kwargs) return reader.read(table) except (core.InconsistentTableError, ValueError): failed_kwargs.append(read_kwargs) lines = ['\nERROR: Unable to guess table for with the guesses listed below:'] for kwargs in failed_kwargs: sorted_keys = sorted([x for x in sorted(kwargs) if x not in ('Reader', 'Outputter')]) reader_repr = repr(kwargs.get('Reader', basic.Basic)) keys_vals = ['Reader:' + re.search(r"\.(\w+)'>", reader_repr).group(1)] kwargs_sorted = ((key, kwargs[key]) for key in sorted_keys) keys_vals.extend(['%s: %s' % (key, repr(val)) for key, val in kwargs_sorted]) lines.append(' '.join(keys_vals)) lines.append('ERROR: Unable to guess table for with the guesses listed above.') lines.append('Check the table and try with guess=False and appropriate arguments to read()') raise core.InconsistentTableError('\n'.join(lines))