def parse_fasta(lines): """Parses FASTA sequences found in a sequence of lines, and returns a tuple for each FASTA record: ((name, meta-information), sequence) No assumptions are made about the line-lengths.""" lines = (line.rstrip() for line in lines) for record in split_before(lines, lambda v: v.startswith(">")): name = record[0] if (not name.startswith(">")) or (len(name) == 1): raise FASTAError("Unnamed FASTA record") elif len(record) == 1: raise FASTAError("FASTA record does not contain sequence: " + name[1:]) # Split out any meta information name = name[1:].split(None, 1) while len(name) < 2: name.append(None) yield tuple((tuple(name), "".join(record[1:])))
def _do_split(lst, key): # Convertion to list allows the implementation to be # lazy, while making comparisons for asserts easier return list(utils.split_before(lst, key))