def __init__( self, name, sequence, qualities, primer=None, name2='', original_length=None, match=None, match_info=None, clipped=None, insert_overlap=False, merged=False, corrected=0, alphabet=None): # In colorspace, the first character is the last nucleotide of the # primer base and the second character encodes the transition from the # primer base to the first real base of the read. if primer is None: self.primer = sequence[0:1] sequence = sequence[1:] else: self.primer = primer if qualities is not None and len(sequence) != len(qualities): rname = truncate_string(name) raise FormatError( "In read named {0!r}: length of colorspace quality " "sequence ({1}) and length of read ({2}) do not match (primer " "is: {3!r})".format( rname, len(qualities), len(sequence), self.primer)) super().__init__( name, sequence, qualities, name2, original_length, match, match_info, clipped, insert_overlap, merged, corrected, alphabet=alphabet) # TODO: use 'alphabet' here if not self.primer in ('A', 'C', 'G', 'T'): raise FormatError( "Primer base is {0!r} in read {1!r}, but it should be one of " "A, C, G, T.".format(self.primer, truncate_string(name)))
def __repr__(self): fmt_str = \ '<ColorspaceSequence(name={0!r}, primer={1!r}, sequence={2!r}{3})>' qstr = '' if self.qualities is not None: qstr = ', qualities={0!r}'.format(truncate_string(self.qualities)) return fmt_str.format(truncate_string(self.name), self.primer, truncate_string(self.sequence), qstr)
def __repr__(self): fmt_str = \ '<ColorspaceSequence(name={0!r}, primer={1!r}, sequence={2!r}{3})>' qstr = '' if self.qualities is not None: qstr = ', qualities={0!r}'.format(truncate_string(self.qualities)) return fmt_str.format( truncate_string(self.name), self.primer, truncate_string(self.sequence), qstr)
def __iter__(self): """Read next entry from the file (single entry at a time). """ name = None seq = [] for i, line in enumerate(self._file): # strip() also removes DOS line breaks line = line.strip() if not line: continue if line and line[0] == '>': if name is not None: yield self.sequence_class( name, self._delimiter.join(seq), None, alphabet=self.alphabet) name = line[1:] seq = [] elif line and line[0] == '#': continue elif name is not None: seq.append(line) else: raise FormatError( "At line {0}: Expected '>' at beginning of FASTA record, " "but got {1!r}.".format(i+1, truncate_string(line))) if name is not None: yield self.sequence_class( name, self._delimiter.join(seq), None, alphabet=self.alphabet)
def __iter__(self): """Read next entry from the file (single entry at a time). """ name = None seq = [] for i, line in enumerate(self._file): # strip() also removes DOS line breaks line = line.strip() if not line: continue if line and line[0] == '>': if name is not None: yield self.sequence_class(name, self._delimiter.join(seq), None, alphabet=self.alphabet) name = line[1:] seq = [] elif line and line[0] == '#': continue elif name is not None: seq.append(line) else: raise FormatError( "At line {0}: Expected '>' at beginning of FASTA record, " "but got {1!r}.".format(i + 1, truncate_string(line))) if name is not None: yield self.sequence_class(name, self._delimiter.join(seq), None, alphabet=self.alphabet)
def __init__(self, name, sequence, qualities, primer=None, name2='', original_length=None, match=None, match_info=None, clipped=None, insert_overlap=False, merged=False, corrected=0, alphabet=None): # In colorspace, the first character is the last nucleotide of the # primer base and the second character encodes the transition from the # primer base to the first real base of the read. if primer is None: self.primer = sequence[0:1] sequence = sequence[1:] else: self.primer = primer if qualities is not None and len(sequence) != len(qualities): rname = truncate_string(name) raise FormatError( "In read named {0!r}: length of colorspace quality " "sequence ({1}) and length of read ({2}) do not match (primer " "is: {3!r})".format(rname, len(qualities), len(sequence), self.primer)) super().__init__(name, sequence, qualities, name2, original_length, match, match_info, clipped, insert_overlap, merged, corrected, alphabet=alphabet) # TODO: use 'alphabet' here if not self.primer in ('A', 'C', 'G', 'T'): raise FormatError( "Primer base is {0!r} in read {1!r}, but it should be one of " "A, C, G, T.".format(self.primer, truncate_string(name)))
def __call__(self, *args, colwidths=None, extra_width=None, justification=None, extra_justification=None, indent=None, extra_indent=None, header=False, underline='-', pct=None, default=None, **kwargs): """Print a row. Args: args: Fields in the row. colwidths, justification, indent: Row-specific colwidths, justification, indent. extra_width, extra_justification, extra_indent: colwidth/ justification/indent to use for extra fields. header: Whether this is a header row. underline: Whether to use an underline after the header row. Either a bool or a character. pct: Whether floating point values should be formatted as percentages. default: Default value. kwargs: Additional keyword arguments to pass to print. """ ncols = len(args) if ncols == 0: self.newline() return if pct is None: pct = self.pct def adjust(arr, extra=None): """Adjust an array. If longer than the number of columns, truncate; if shorter, fill in by repeating the last element. """ alen = len(arr) if alen == ncols: return arr elif alen > ncols: return arr[:ncols] else: return arr + ((extra or arr[-1], ) * (ncols - alen)) colwidths, justification, indent = (adjust( arr, extra) for arr, extra in zip(( colwidths or self.colwidths, justification or self.justification, indent or self.indent), (extra_width, extra_justification, extra_indent))) # adjust colwidths if this is a header if header: colwidths = tuple( max(w, len(str(a))) for w, a in zip(colwidths, args)) fmt_str = [] fmt_args = [] for i, (value, width, just, ind) in enumerate(zip(args, colwidths, justification, indent)): if value is None: value = default or self.default if isinstance(value, str): typ = 's' if len(value) > width: value = truncate_string(value, width) elif isinstance(value, float): typ = ',.1' + ('%' if pct else 'f') else: typ = ',d' fmt_str.append(ind + '{' + str(i) + ':' + just + str(width - len(ind)) + typ + '}') fmt_args.append(value) fmt_str = ' '.join(fmt_str) self._print(fmt_str.format(*fmt_args), **kwargs) if header: sepline = ' '.join((underline * width) for width in colwidths) self._print(sepline, **kwargs)
def __call__( self, *args, colwidths=None, extra_width=None, justification=None, extra_justification=None, indent=None, extra_indent=None, header=False, underline='-', pct=None, default=None, **kwargs): """Print a row. Args: args: Fields in the row. colwidths, justification, indent: Row-specific colwidths, justification, indent. extra_width, extra_justification, extra_indent: colwidth/ justification/indent to use for extra fields. header: Whether this is a header row. underline: Whether to use an underline after the header row. Either a bool or a character. pct: Whether floating point values should be formatted as percentages. default: Default value. kwargs: Additional keyword arguments to pass to print. """ ncols = len(args) if ncols == 0: self.newline() return if pct is None: pct = self.pct def adjust(arr, extra=None): """Adjust an array. If longer than the number of columns, truncate; if shorter, fill in by repeating the last element. """ alen = len(arr) if alen == ncols: return arr elif alen > ncols: return arr[:ncols] else: return arr + ((extra or arr[-1],) * (ncols - alen)) colwidths, justification, indent = ( adjust(arr, extra) for arr, extra in zip( ( colwidths or self.colwidths, justification or self.justification, indent or self.indent), (extra_width, extra_justification, extra_indent))) # adjust colwidths if this is a header if header: colwidths = tuple( max(w, len(str(a))) for w, a in zip(colwidths, args)) fmt_str = [] fmt_args = [] for i, (value, width, just, ind) in enumerate( zip(args, colwidths, justification, indent)): if value is None: value = default or self.default if isinstance(value, str): typ = 's' if len(value) > width: value = truncate_string(value, width) elif isinstance(value, float): typ = ',.1' + ('%' if pct else 'f') else: typ = ',d' fmt_str.append( ind + '{' + str(i) + ':' + just + str(width-len(ind)) + typ + '}') fmt_args.append(value) fmt_str = ' '.join(fmt_str) self._print(fmt_str.format(*fmt_args), **kwargs) if header: sepline = ' '.join((underline * width) for width in colwidths) self._print(sepline, **kwargs)