Exemplo n.º 1
0
def test_append():
    for ext in ["", ".gz"]:  # BZ2 does NOT support append
        text = "AB"
        reference = text + text
        filename = 'truncated.fastq' + ext
        mode = 'a'
        if ext != "":
            mode = 'ab'
            text = text.encode()
            reference = text + text
            text = get_compressor(filename).compress(
                text)  # On Py3, need to send BYTES, not unicode
        print("Trying ext=%s" % ext)
        with temporary_path(filename) as path:
            try:
                os.unlink(path)
            except OSError:
                pass
            with open_output(path, mode) as f:
                f.write(text)
            print(path)
            with open_output(path, mode) as f:
                f.write(text)
            with xopen(path, 'r') as f:
                try:
                    reference = reference.decode("utf-8")
                except AttributeError:
                    pass
                for appended in f:
                    assert appended == reference
Exemplo n.º 2
0
def test_append():
    for ext in ["", ".gz"]:  # BZ2 does NOT support append
        text = "AB"
        reference = text + text
        filename = 'truncated.fastq' + ext
        mode = 'a'
        if ext != "":
            mode = 'ab'
            text = text.encode()
            reference = text + text
            text = get_compressor(filename).compress(text)  # On Py3, need to send BYTES, not unicode
        print("Trying ext=%s" % ext)
        with temporary_path(filename) as path:
            try:
                os.unlink(path)
            except OSError:
                pass
            with open_output(path, mode) as f:
                f.write(text)
            print(path)
            with open_output(path, mode) as f:
                f.write(text)
            with xopen(path, 'r') as f:
                try:
                    reference = reference.decode("utf-8")
                except AttributeError:
                    pass
                for appended in f:
                    assert appended == reference
Exemplo n.º 3
0
def generate_fasta(outfile, summary, union=False, perinput=False):
    names = summary['input']['input_names'] or repeat(None)
    n_reads = summary['record_counts'][0]
    fasta_format = FastaFormat()
    if union:
        union_records = []
    if perinput:
        if outfile.endswith('.fasta'):
            name_prefix = outfile[:-6]
        elif outfile.endswith('.fa'):
            name_prefix = outfile[:-3]
        else:
            name_prefix = outfile
    
    def format_match(idx, match, records):
        name2 = [
            "kmer_freq={}".format(match['kmer_freq']),
            "kmer_freq_type={}".format(match["kmer_freq_type"])
        ]
        if match['abundance']:
            name2.append("abundance={}".format(match['abundance']))
            name2.append("abundance_frac={}".format(match['abundance'] / n_reads))
        if match['contaminant_to_known_match_frac']:
            name2.append("contaminant_to_known_match_frac={}".format(
                match["contaminant_to_known_match_frac"]))
            
        if match['is_known']:
            name = match['known_names'][0]
            name3 = []
            if len(match['known_names']) > 1:
                name3 = ["other_names={}".format('|'.join(match['known_names'][1:]))]
            if len(match['known_seqs']) > 1:
                for seq in match['known_seqs']:
                    records.append(fasta_format.format_entry(
                        "{}.{} {}".format(name, idx, ";".join(name2 + name3)),
                        seq))
            else:
                records.append(fasta_format.format_entry(
                    "{} {}".format(name, ";".join(name2 + name3)),
                    match['known_seqs'][0]))
        else:
            records.append(fasta_format.format_entry(
                "{} {}".format(idx, ";".join(name2)), 
                match['longest_kmer']))
    
    for i, (name, matches) in enumerate(zip(names, summary['detect']['matches'])):
        records = []
        for idx, match in enumerate(matches, 1):
            format_match(idx, match, records)
        if union:
            union_records.extend(records)
        if perinput:
            with open_output("{}.{}.fasta".format(name_prefix, i), 'wt') as out:
                out.write("".join(records))
    
    if union:
        with open_output(outfile, 'wt') as union_out:
            union_out.write("".join(union_records))
Exemplo n.º 4
0
 def test_write_sequence_object(self):
     fmt = FastaFormat()
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format(Sequence("name", "CCATA")))
         fw.write(fmt.format(Sequence("name2", "HELLO")))
     with open(self.path) as t:
         assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
Exemplo n.º 5
0
 def test_autodetect_fastq_format(self):
     path = os.path.join(self._tmpdir, 'tmp.fastq')
     fmt = get_format(path)
     with open_output(path, "w") as f:
         for seq in simple_fastq:
             f.write(fmt.format(seq))
     assert list(openseq(path)) == simple_fastq
Exemplo n.º 6
0
 def test_twoheaders(self):
     fmt = FastqFormat()
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format(Sequence("name", "CCATA", "!#!#!", name2="name")))
         fw.write(fmt.format(Sequence("name2", "HELLO", "&&&!&", name2="name2")))
     with open(self.path) as t:
         assert t.read() == '@name\nCCATA\n+name\n!#!#!\n@name2\nHELLO\n+name2\n&&&!&\n'
Exemplo n.º 7
0
 def test_write_sequence_object(self):
     fmt = FastaFormat()
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format(Sequence("name", "CCATA")))
         fw.write(fmt.format(Sequence("name2", "HELLO")))
     with open(self.path) as t:
         assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
Exemplo n.º 8
0
 def test(self):
     fmt = FastqFormat()
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format_entry("name", "CCATA", "!#!#!"))
         fw.write(fmt.format_entry("name2", "HELLO", "&&&!&&"))
     with open(self.path) as t:
         assert t.read() == '@name\nCCATA\n+\n!#!#!\n@name2\nHELLO\n+\n&&&!&&\n'
Exemplo n.º 9
0
 def test_autodetect_fastq_format(self):
     path = os.path.join(self._tmpdir, 'tmp.fastq')
     fmt = get_format(path)
     with open_output(path, "w") as f:
         for seq in simple_fastq:
             f.write(fmt.format(seq))
     assert list(openseq(path)) == simple_fastq
Exemplo n.º 10
0
    def get_writer(self, file_desc, compressed=False):
        """Create the writer for a file descriptor if it does not already
        exist.
        
        Args:
            file_desc: File descriptor. If `compressed==True`, this is a tuple
                (path, mode), otherwise it's only a path.
            compressed: Whether data has already been compressed.
        
        Returns:
            The writer.
        """
        if compressed:
            path, mode = file_desc
        else:
            path = file_desc

        if path not in self.writers:
            if self.suffix:
                real_path = add_suffix_to_path(path, self.suffix)
            else:
                real_path = path
            # TODO: test whether O_NONBLOCK allows non-blocking write to NFS
            if compressed:
                self.writers[path] = open_output(real_path, mode)
            else:
                self.writers[path] = xopen(real_path, "w")

        return self.writers[path]
Exemplo n.º 11
0
 def get_writer(self, file_desc, compressed=False):
     """Create the writer for a file descriptor if it does not already
     exist.
     
     Args:
         file_desc: File descriptor. If `compressed==True`, this is a tuple
             (path, mode), otherwise it's only a path.
         compressed: Whether data has already been compressed.
     
     Returns:
         The writer.
     """
     if compressed:
         path, mode = file_desc
     else:
         path = file_desc
     
     if path not in self.writers:
         if self.suffix:
             real_path = add_suffix_to_path(path, self.suffix)
         else:
             real_path = path
         # TODO: test whether O_NONBLOCK allows non-blocking write to NFS
         if compressed:
             self.writers[path] = open_output(real_path, mode)
         else:
             self.writers[path] = xopen(real_path, "w")
     
     return self.writers[path]
Exemplo n.º 12
0
 def test_write_qualities_to_fasta(self):
     path = os.path.join(self._tmpdir, 'tmp.fasta')
     fmt = get_format(path, qualities=True)
     assert isinstance(fmt, FastaFormat)
     with open_output(path, "w") as f:
         for seq in simple_fastq:
             f.write(fmt.format(seq))
     assert list(openseq(path)) == simple_fasta
Exemplo n.º 13
0
 def generate_text_report(self, fmt, summary, outfile, **kwargs):
     if fmt == 'txt':
         with open_output(outfile, context_wrapper=True) as out:
             generate_reports(out, summary, **kwargs)
     elif fmt == 'fasta':
         generate_fasta(outfile, summary, **kwargs)
     else:
         super().generate_from_template(fmt, summary, outfile, **kwargs)
Exemplo n.º 14
0
 def test_write_qualities_to_fasta(self):
     path = os.path.join(self._tmpdir, 'tmp.fasta')
     fmt = get_format(path, qualities=True)
     assert isinstance(fmt, FastaFormat)
     with open_output(path, "w") as f:
         for seq in simple_fastq:
             f.write(fmt.format(seq))
     assert list(openseq(path)) == simple_fasta
Exemplo n.º 15
0
 def test(self):
     fmt = FastqFormat()
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format_entry("name", "CCATA", "!#!#!"))
         fw.write(fmt.format_entry("name2", "HELLO", "&&&!&&"))
     with open(self.path) as t:
         assert t.read(
         ) == '@name\nCCATA\n+\n!#!#!\n@name2\nHELLO\n+\n&&&!&&\n'
Exemplo n.º 16
0
 def close(self):
     """Close all outputs.
     """
     for path in self.force_create:
         if path not in self.writers and path != STDOUT:
             with open_output(path, "w"):
                 pass
     for writer in self.writers.values():
         if writer not in (sys.stdout, sys.stderr):
             writer.close()
Exemplo n.º 17
0
 def test_linelength(self):
     fmt = FastaFormat(line_length=3)
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format_entry("r1", "ACG"))
         fw.write(fmt.format_entry("r2", "CCAT"))
         fw.write(fmt.format_entry("r3", "TACCAG"))
     with open(self.path) as t:
         x=t.read()
         print(x)
         assert x == '>r1\nACG\n>r2\nCCA\nT\n>r3\nTAC\nCAG\n'
Exemplo n.º 18
0
 def test_linelength(self):
     fmt = FastaFormat(line_length=3)
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format_entry("r1", "ACG"))
         fw.write(fmt.format_entry("r2", "CCAT"))
         fw.write(fmt.format_entry("r3", "TACCAG"))
     with open(self.path) as t:
         x = t.read()
         print(x)
         assert x == '>r1\nACG\n>r2\nCCA\nT\n>r3\nTAC\nCAG\n'
Exemplo n.º 19
0
 def close(self):
     """Close all outputs.
     """
     for path in self.force_create:
         if path not in self.writers and path != STDOUT:
             with open_output(path, "w"):
                 pass
     for writer in self.writers.values():
         if writer not in (sys.stdout, sys.stderr):
             writer.close()
Exemplo n.º 20
0
 def test_twoheaders(self):
     fmt = FastqFormat()
     with open_output(self.path, "w") as fw:
         fw.write(
             fmt.format(Sequence("name", "CCATA", "!#!#!", name2="name")))
         fw.write(
             fmt.format(Sequence("name2", "HELLO", "&&&!&", name2="name2")))
     with open(self.path) as t:
         assert t.read(
         ) == '@name\nCCATA\n+name\n!#!#!\n@name2\nHELLO\n+name2\n&&&!&\n'
Exemplo n.º 21
0
 def serialize(self, obj, fmt, mode, outfile, **kwargs):
     """Serialize a summary dict to a file.
     
     Args:
         obj: The summary dict.
         fmt: The serialization format (e.g. json, yaml).
         mode: The file mode (b=binary, t=text).
         outfile: The output file.
         kwargs: Additional arguments to pass to the `dump` method.
     """
     mod = importlib.import_module(fmt)
     with open_output(outfile, 'w' + mode) as stream:
         mod.dump(obj, stream, **kwargs)
Exemplo n.º 22
0
 def serialize(self, obj, fmt, mode, outfile, **kwargs):
     """Serialize a summary dict to a file.
     
     Args:
         obj: The summary dict.
         fmt: The serialization format (e.g. json, yaml).
         mode: The file mode (b=binary, t=text).
         outfile: The output file.
         kwargs: Additional arguments to pass to the `dump` method.
     """
     mod = importlib.import_module(fmt)
     with open_output(outfile, 'w' + mode) as stream:
         mod.dump(obj, stream, **kwargs)