def make_table( header=None, rows=None, row_order=None, digits=4, space=4, title="", max_width=1e100, row_ids=None, legend="", missing_data="", column_templates=None, dtype=None, data_frame=None, format="simple", ): """ Parameters ---------- header column headings rows a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied max_width maximum column width for printing row_ids if True, the 0'th column is used as row identifiers and keys for slicing. legend table legend column_templates dict of column headings or a function that will handle the formatting. dtype optional numpy array typecode. limit exits after this many lines. Only applied for non pickled data file types. data_frame a pandas DataFrame, supersedes header/rows format output format when using str(Table) """ table = _Table( header=header, rows=rows, digits=digits, row_order=row_order, title=title, dtype=dtype, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, row_ids=row_ids, legend=legend, data_frame=data_frame, format=format, ) return table
def load_table( filename, sep=None, reader=None, digits=4, space=4, title="", missing_data="", max_width=1e100, row_ids=None, legend="", column_templates=None, dtype=None, static_column_types=False, limit=None, format="simple", **kwargs, ): """ Parameters ---------- filename path to file containing a tabular data sep the delimiting character between columns reader a parser for reading filename. This approach assumes the first row returned by the reader will be the header row. static_column_types if True, and reader is None, identifies columns with a numeric/bool data types from the first non-header row. This assumes all subsequent entries in that column are of the same type. Default is False. header column headings rows a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied missing_data character assigned if a row has no entry for a column max_width maximum column width for printing row_ids if True, the 0'th column is used as row identifiers and keys for slicing. legend table legend column_templates dict of column headings or a function that will handle the formatting. dtype optional numpy array typecode. limit exits after this many lines. Only applied for non pickled data file types. data_frame a pandas DataFrame, supersedes header/rows format output format when using str(Table) """ sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if not (reader or static_column_types): if file_format == "pickle": f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() return _Table(**loaded_table) elif file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited(filename, delimiter=sep, limit=limit, **kwargs) title = title or loaded_title else: f = open_(filename, newline=None) if not reader: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" elif not sep: raise ValueError("static_column_types option requires a value " "for sep") reader = autogen_reader(f, sep, limit=limit, with_title=kwargs.get("with_title", False)) rows = [row for row in reader(f)] f.close() header = rows.pop(0) return make_table( header=header, rows=rows, digits=digits, title=title, dtype=dtype, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, row_ids=row_ids, legend=legend, format=format, ) return table
def LoadTable( filename=None, sep=None, reader=None, header=None, rows=None, row_order=None, digits=4, space=4, title="", missing_data="", max_width=1e100, row_ids=None, legend="", column_templates=None, dtype=None, static_column_types=False, limit=None, data_frame=None, format="simple", **kwargs, ): """ .. deprecated:: 2019.8.30a ``LoadTable`` will be removed in ``cogent3`` 2020.1.1. It's replaced by ``load_table`` and ``make_table``. """ sep = sep or kwargs.pop("delimiter", None) if filename is not None: file_format, compress_format = get_format_suffixes(filename) if filename is not None and not (reader or static_column_types): if file_format == "pickle": f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() return _Table(**loaded_table) elif file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited( filename, delimiter=sep, limit=limit, **kwargs ) title = title or loaded_title elif filename and (reader or static_column_types): f = open_(filename, newline=None) if not reader: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" elif not sep: raise ValueError( "static_column_types option requires a value " "for sep" ) reader = autogen_reader( f, sep, limit=limit, with_title=kwargs.get("with_title", False) ) rows = [row for row in reader(f)] f.close() header = rows.pop(0) table = _Table( header=header, rows=rows, digits=digits, row_order=row_order, title=title, dtype=dtype, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, row_ids=row_ids, legend=legend, data_frame=data_frame, format=format, ) return table
def load_table( filename, sep=None, reader=None, digits=4, space=4, title="", missing_data="", max_width=1e100, index_name=None, legend="", column_templates=None, static_column_types=False, limit=None, format="simple", skip_inconsistent=False, **kwargs, ): """ Parameters ---------- filename path to file containing a tabular data sep the delimiting character between columns reader a parser for reading filename. This approach assumes the first row returned by the reader will be the header row. static_column_types if True, and reader is None, identifies columns with a numeric/bool data types from the first non-header row. This assumes all subsequent entries in that column are of the same type. Default is False. digits floating point resolution space number of spaces between columns or a string title as implied missing_data character assigned if a row has no entry for a column max_width maximum column width for printing index_name column name with values to be used as row identifiers and keys for slicing. All column values must be unique. legend table legend column_templates dict of column headings or a function that will handle the formatting. limit exits after this many lines. Only applied for non pickled data file types. format output format when using str(Table) skip_inconsistent skips rows that have different length to header row """ import pathlib if not any(isinstance(filename, t) for t in (str, pathlib.PurePath)): raise TypeError( "filename must be string or Path, perhaps you want make_table()") if "index" in kwargs: deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (_Table, )) elif file_format in ("pickle", "pkl"): f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() r = _Table() r.__setstate__(loaded_table) return r if reader: with open_(filename, newline=None) as f: data = [row for row in reader(f)] header = data[0] data = {column[0]: column[1:] for column in zip(*data)} else: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited(filename, sep=sep, limit=limit, **kwargs) if skip_inconsistent: num_fields = len(header) rows = [r for r in rows if len(r) == num_fields] else: lengths = set(map(len, [header] + rows)) if len(lengths) != 1: msg = f"inconsistent number of fields {lengths}" raise ValueError(msg) title = title or loaded_title data = {column[0]: column[1:] for column in zip(header, *rows)} for key, value in data.items(): data[key] = cast_str_to_array(value, static_type=static_column_types) return make_table( header=header, data=data, digits=digits, title=title, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index_name=index_name, legend=legend, format=format, )
def make_table( header=None, data=None, row_order=None, digits=4, space=4, title="", max_width=1e100, index_name=None, legend="", missing_data="", column_templates=None, data_frame=None, format="simple", **kwargs, ): """ Parameters ---------- header column headings data a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied max_width maximum column width for printing index_name column name with values to be used as row identifiers and keys for slicing. All column values must be unique. legend table legend missing_data replace missing data with this column_templates dict of column headings or a function that will handle the formatting. limit exits after this many lines. Only applied for non pickled data file types. data_frame a pandas DataFrame, supersedes header/rows format output format when using str(Table) """ if any(isinstance(a, str) for a in (header, data)): raise TypeError(f"str type invalid, if its a path use load_table()") if "index" in kwargs: deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) data = kwargs.get("rows", data) if data_frame is not None: from pandas import DataFrame if not isinstance(data_frame, DataFrame): raise TypeError(f"expecting a DataFrame, got{type(data_frame)}") data = {c: data_frame[c].to_numpy() for c in data_frame} return _Table( header=header, data=data, digits=digits, row_order=row_order, title=title, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index_name=index_name, legend=legend, data_frame=data_frame, format=format, )
def load_table( filename, sep=None, reader=None, digits=4, space=4, title="", missing_data="", max_width=1e100, index=None, legend="", column_templates=None, dtype=None, static_column_types=False, limit=None, format="simple", skip_inconsistent=False, **kwargs, ): """ Parameters ---------- filename path to file containing a tabular data sep the delimiting character between columns reader a parser for reading filename. This approach assumes the first row returned by the reader will be the header row. static_column_types if True, and reader is None, identifies columns with a numeric/bool data types from the first non-header row. This assumes all subsequent entries in that column are of the same type. Default is False. header column headings rows a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied missing_data character assigned if a row has no entry for a column max_width maximum column width for printing index if True, the 0'th column is used as row identifiers and keys for slicing. legend table legend column_templates dict of column headings or a function that will handle the formatting. dtype optional numpy array typecode. limit exits after this many lines. Only applied for non pickled data file types. data_frame a pandas DataFrame, supersedes header/rows format output format when using str(Table) skip_inconsistent skips rows that have different length to header row """ sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if file_format in ("pickle", "pkl"): f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() r = _Table() r.__setstate__(loaded_table) return r if not reader: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited( filename, delimiter=sep, limit=limit, **kwargs ) if skip_inconsistent: num_fields = len(header) rows = [r for r in rows if len(r) == num_fields] else: lengths = set(map(len, [header] + rows)) if len(lengths) != 1: msg = f"inconsistent number of fields {lengths}" raise ValueError(msg) title = title or loaded_title data = {column[0]: column[1:] for column in zip(header, *rows)} else: f = open_(filename, newline=None) data = [row for row in reader(f)] header = data[0] data = {column[0]: column[1:] for column in zip(*data)} f.close() for key, value in data.items(): data[key] = cast_str_to_array(value, static_type=static_column_types) return make_table( header=header, data=data, digits=digits, title=title, dtype=dtype, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index=index, legend=legend, format=format, )
def make_table( header=None, data=None, row_order=None, digits=4, space=4, title="", max_width=1e100, index=None, legend="", missing_data="", column_templates=None, dtype=None, data_frame=None, format="simple", **kwargs, ): """ Parameters ---------- header column headings data a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied max_width maximum column width for printing index if True, the 0'th column is used as row identifiers and keys for slicing. legend table legend column_templates dict of column headings or a function that will handle the formatting. dtype optional numpy array typecode. limit exits after this many lines. Only applied for non pickled data file types. data_frame a pandas DataFrame, supersedes header/rows format output format when using str(Table) """ data = kwargs.get("rows", data) if data_frame is not None: from pandas import DataFrame if not isinstance(data_frame, DataFrame): raise TypeError(f"expecting a DataFrame, got{type(data_frame)}") data = {c: data_frame[c].to_numpy() for c in data_frame} table = _Table( header=header, data=data, digits=digits, row_order=row_order, title=title, dtype=dtype, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index=index, legend=legend, data_frame=data_frame, format=format, ) return table