def main(argv): options, args = parse_args(argv) if not options: return 1 print("Generating %i lane(s) of synthetic reads ...\nDISCLAIMER: For " "demonstration of PALEOMIX usage only; not for serious usage!" % (options.lanes_num,)) specimen = Specimen(options, args[0]) sample = Sample(options, specimen) damage = Damage(options, sample) library = Library(options, damage) for (lnum, lane) in enumerate(library.lanes, start=1): fragments = fragment(options.lanes_per_file, lane.sequences) for (readsnum, reads) in enumerate(fragments, start=1): templ = "%s%s_L%i_R%%s_%02i.fastq.gz" % (args[1], library.barcode, lnum, readsnum) print(" Writing %s" % (templ % "{Pair}",)) with gzip.open(templ % 1, "w") as out_1: with gzip.open(templ % 2, "w") as out_2: for (name, seq_1, seq_2) in reads: out_1.write("@%s%s/1\n%s\n" % (library.barcode, name, seq_1)) out_1.write("+\n%s\n" % ("I" * len(seq_1),)) out_2.write("@%s%s/2\n%s\n" % (library.barcode, name, seq_2)) out_2.write("+\n%s\n" % ("H" * len(seq_2),))
def _run(self, _config, temp): msa = read_msa(self._input_file) for excluded_group in self._excluded: msa.pop(excluded_group) lines = [] lines.append(" %i %i" % (len(msa), len(msa.itervalues().next()))) for (name, seq) in sorted(msa.iteritems()): lines.append("") lines.append(name) for line in fragment(60, seq.upper()): lines.append(" ".join(fragment(3, line))) with open(fileutils.reroot_path(temp, self._output_file), "w") as output: output.write("\n".join(lines))
def flush_fasta(sequence): """Takes a FASTA sequence as a string, fragments it into lines of exactly _FASTA_COLUMNS chars (e.g. 60), and prints all complete lines. The final incomplete line (if any) is returned. """ for seq_frag in utilities.fragment(_FASTA_COLUMNS, sequence): if len(seq_frag) < _FASTA_COLUMNS: return seq_frag print(seq_frag) return ""
def _run(self, _config, temp): fastas = {} for (name, filename) in self._infiles.iteritems(): current_fastas = {} for ((name, _meta), sequence) in read_fasta(filename): current_fastas[name] = sequence fastas[name] = current_fastas fastas = list(sorted(fastas.items())) for (sequence_name, taxa_map) in sorted(self._sequences.iteritems()): lines = [] for (taxon_name, sequences) in fastas: fastaseq = "\n".join(fragment(60, sequences[sequence_name])) current_name = taxa_map[taxon_name] lines.append(">%s %s\n%s\n" % (taxon_name, current_name, fastaseq)) filename = os.path.join(temp, sequence_name + ".fasta") with open(filename, "w") as fasta: fasta.write("".join(lines))
def test_fragment__multiple_fragments_partial(): assert_equal(list(utils.fragment(3, "abcdefgh")), ["abc", "def", "gh"]) assert_equal( list(utils.fragment(3, list("abcdefgh"))), [list("abc"), list("def"), list("gh")])
def test_fragment__single_fragment(): assert_equal(list(utils.fragment(3, "abc")), ["abc"]) assert_equal(list(utils.fragment(3, ["a", "b", "c"])), [["a", "b", "c"]])
def test_fragment__partial_fragment(): assert_equal(list(utils.fragment(3, "ab")), ["ab"]) assert_equal(list(utils.fragment(3, ["a", "b"])), [["a", "b"]])
def test_fragment__empty(): assert_equal(list(utils.fragment(5, "")), []) assert_equal(list(utils.fragment(5, [])), [])
def _format_int(self, value): return (",".join(fragment(3, str(value)[::-1])))[::-1]
def test_fragment__multiple_fragments_partial(): assert_equal(list(utils.fragment(3, "abcdefgh")), ["abc", "def", "gh"]) assert_equal(list(utils.fragment(3, list("abcdefgh"))), [list("abc"), list("def"), list("gh")])
def __repr__(self): """Process a printable FASTA sequence, wrapping long sequences at 60 chars.""" name = self.name if self.meta: name = "%s %s" % (name, self.meta) return ">%s\n%s\n" % (name, "\n".join(fragment(60, self.sequence)))
def test_fragment__iterable(): list(utils.fragment(3, xrange(6)))
def wrap_fasta(name, sequence): """Process a printable FASTA sequence, wrapping long sequences at 60 chars.""" return ">%s\n%s\n" % (name, "\n".join(fragment(60, sequence)))
def test_fragment__set(): list(utils.fragment(3, set(range(6))))