def LoadTree( filename=None, treestring=None, tip_names=None, format=None, underscore_unmunge=False, ): """ .. deprecated:: 2019.8.30a ``LoadTree`` will be removed in ``cogent3`` 2020.1.1. It's replaced by ``load_tree`` and ``make_tree``. """ from cogent3.util.warning import deprecated if filename: deprecated("function", "LoadTree", "load_tree", "2020.1.1", 1) return load_tree(filename, format=format, underscore_unmunge=underscore_unmunge) deprecated("function", "LoadTree", "make_tree", "2020.1.1", 1) tree = make_tree( treestring=treestring, tip_names=tip_names, format=format, underscore_unmunge=underscore_unmunge, ) return tree
def load_delimited( filename, header=True, sep=",", delimiter=None, with_title=False, with_legend=False, limit=None, ): """ basic processing of tabular data Parameters ---------- filename: Path path to delimited file (can begin with ~) header: bool whether the first line of the file (after the title, if present) is a header sep: str the character separating columns with_title: bool whether the first line of the file is a title with_legend: bool whether the last line of the file is a legend limit: int maximum number of lines to read from the file Returns ------- header, rows, title, legend Notes ----- All row values remain as strings. """ if delimiter: sep = delimiter deprecated("argument", "delimiter", "sep", "2022.1") if limit is not None and header: limit += 1 # don't count header line with open_(filename) as f: reader = csv.reader(f, dialect="excel", delimiter=sep) title = "".join(next(reader)) if with_title else "" rows = [] num_lines = 0 for row in reader: rows.append(row) num_lines += 1 if limit is not None and num_lines >= limit: break header = rows.pop(0) if header else None legend = "".join(rows.pop(-1)) if with_legend else "" return header, rows, title, legend
def LoadTable( filename=None, sep=None, reader=None, header=None, rows=None, row_order=None, digits=4, space=4, title="", missing_data="", max_width=1e100, row_ids=None, legend="", column_templates=None, dtype=None, static_column_types=False, limit=None, data_frame=None, format="simple", **kwargs, ): """ .. deprecated:: 2019.8.30a ``LoadTable`` will be removed in ``cogent3`` 2020.1.1. It's replaced by ``load_table`` and ``make_table``. """ from cogent3.util.warning import deprecated args = {k: v for k, v in locals().items() if k != "deprecated"} kwargs = args.pop("kwargs", {}) args.update(kwargs) if filename: deprecated("function", "LoadTable", "load_table", "2020.1.1", 1) return load_table(**args) deprecated("function", "LoadTable", "make_table", "2020.1.1", 1) for skip in ("filename", "sep", "reader", "static_column_types", "limit"): del args[skip] return make_table(**args)
def LoadSeqs( filename=None, format=None, data=None, moltype=None, name=None, aligned=True, label_to_name=None, parser_kw=None, constructor_kw=None, array_align=True, **kw, ): """ .. deprecated:: 2019.8.30a ``LoadSeqs`` will be removed in ``cogent3`` 2020.1.1. It's replaced by ``load_unaligned_seqs``, ``load_aligned_seqs``, ``make_unaligned_seqs`` and ``make_aligned_seqs``. """ kwargs = locals() from cogent3.util.warning import deprecated if filename and aligned: deprecated("function", "LoadSeqs", "load_aligned_seqs", "2020.1.1", 1) for key in ("aligned", "data"): del kwargs[key] return load_aligned_seqs(**kwargs) elif filename: deprecated("function", "LoadSeqs", "load_unaligned_seqs", "2020.1.1", 1) for key in ("aligned", "data", "array_align"): del kwargs[key] return load_unaligned_seqs(**kwargs) elif aligned: deprecated("function", "LoadSeqs", "make_aligned_seqs", "2020.1.1", 1) for key in ("filename", "format", "aligned", "parser_kw"): del kwargs[key] return make_aligned_seqs(**kwargs) else: deprecated("function", "LoadSeqs", "make_unaligned_seqs", "2020.1.1", 1) for key in ("filename", "format", "aligned", "array_align", "parser_kw"): del kwargs[key] return make_unaligned_seqs(**kwargs)
def clustal_from_alignment(aln, interleave_len=None, wrap=None): """ Parameters ---------- aln can be an Alignment object or a dict wrap sequence line width. Only available if sequences are aligned. Returns ------- Returns a string in Clustal format """ if interleave_len is not None: from cogent3.util.warning import deprecated deprecated("argument", "interleave_len", "wrap", "2021.6") wrap = interleave_len if wrap == 60 else wrap if not aln: return "" # get seq output order try: order = aln.RowOrder except: order = list(aln.keys()) order.sort() seqs = SequenceCollection(aln) clustal_list = ["CLUSTAL\n"] if seqs.is_ragged(): raise ValueError( "Sequences in alignment are not all the same length." + "Cannot generate Clustal format.") aln_len = seqs.seq_len # Get all labels labels = copy(seqs.names) # Find all label lengths in order to get padding. label_lengths = [len(l) for l in labels] label_max = max(label_lengths) max_spaces = label_max + 4 # Get ordered seqs ordered_seqs = [seqs.named_seqs[label] for label in order] if wrap is not None: curr_ix = 0 while curr_ix < aln_len: clustal_list.extend([ "%s%s%s" % ( x, " " * (max_spaces - len(x)), y[curr_ix:curr_ix + wrap], ) for x, y in zip(order, ordered_seqs) ]) clustal_list.append("") curr_ix += wrap else: clustal_list.extend([ "%s%s%s" % (x, " " * (max_spaces - len(x)), y) for x, y in zip(order, ordered_seqs) ]) clustal_list.append("") return "\n".join(clustal_list)
def load_table( filename, sep=None, reader=None, digits=4, space=4, title="", missing_data="", max_width=1e100, index_name=None, legend="", column_templates=None, static_column_types=False, limit=None, format="simple", skip_inconsistent=False, **kwargs, ): """ Parameters ---------- filename path to file containing a tabular data sep the delimiting character between columns reader a parser for reading filename. This approach assumes the first row returned by the reader will be the header row. static_column_types if True, and reader is None, identifies columns with a numeric/bool data types from the first non-header row. This assumes all subsequent entries in that column are of the same type. Default is False. digits floating point resolution space number of spaces between columns or a string title as implied missing_data character assigned if a row has no entry for a column max_width maximum column width for printing index_name column name with values to be used as row identifiers and keys for slicing. All column values must be unique. legend table legend column_templates dict of column headings or a function that will handle the formatting. limit exits after this many lines. Only applied for non pickled data file types. format output format when using str(Table) skip_inconsistent skips rows that have different length to header row """ import pathlib if not any(isinstance(filename, t) for t in (str, pathlib.PurePath)): raise TypeError( "filename must be string or Path, perhaps you want make_table()") if "index" in kwargs: deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (_Table, )) elif file_format in ("pickle", "pkl"): f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() r = _Table() r.__setstate__(loaded_table) return r if reader: with open_(filename, newline=None) as f: data = [row for row in reader(f)] header = data[0] data = {column[0]: column[1:] for column in zip(*data)} else: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited(filename, sep=sep, limit=limit, **kwargs) if skip_inconsistent: num_fields = len(header) rows = [r for r in rows if len(r) == num_fields] else: lengths = set(map(len, [header] + rows)) if len(lengths) != 1: msg = f"inconsistent number of fields {lengths}" raise ValueError(msg) title = title or loaded_title data = {column[0]: column[1:] for column in zip(header, *rows)} for key, value in data.items(): data[key] = cast_str_to_array(value, static_type=static_column_types) return make_table( header=header, data=data, digits=digits, title=title, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index_name=index_name, legend=legend, format=format, )
def make_table( header=None, data=None, row_order=None, digits=4, space=4, title="", max_width=1e100, index_name=None, legend="", missing_data="", column_templates=None, data_frame=None, format="simple", **kwargs, ): """ Parameters ---------- header column headings data a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied max_width maximum column width for printing index_name column name with values to be used as row identifiers and keys for slicing. All column values must be unique. legend table legend missing_data replace missing data with this column_templates dict of column headings or a function that will handle the formatting. limit exits after this many lines. Only applied for non pickled data file types. data_frame a pandas DataFrame, supersedes header/rows format output format when using str(Table) """ if any(isinstance(a, str) for a in (header, data)): raise TypeError(f"str type invalid, if its a path use load_table()") if "index" in kwargs: deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) data = kwargs.get("rows", data) if data_frame is not None: from pandas import DataFrame if not isinstance(data_frame, DataFrame): raise TypeError(f"expecting a DataFrame, got{type(data_frame)}") data = {c: data_frame[c].to_numpy() for c in data_frame} return _Table( header=header, data=data, digits=digits, row_order=row_order, title=title, column_templates=column_templates, space=space, missing_data=missing_data, max_width=max_width, index_name=index_name, legend=legend, data_frame=data_frame, format=format, )