Exemplo n.º 1
0
 def search_one(self, query_seqid, pctid, threads=None):
     pctid_str = "{:.1f}".format(pctid)
     print("Searching", query_seqid, "at", pctid_str, "pct identity")
     query_seq = self.seqs[query_seqid]
     query_fp = "temp_query.fasta"
     if os.path.exists(query_fp):
         os.rename(query_fp, "temp_prev_query.fasta")
     query_hits_fp = "temp_query_hits.txt"
     if os.path.exists(query_hits_fp):
         os.rename(query_hits_fp, "temp_prev_query_hits.txt")
     with open(query_fp, "w") as f:
         write_fasta(f, [(query_seqid, query_seq)])
     aligner = PctidAligner(self.fasta_fp)
     aligner.search(query_fp,
                    query_hits_fp,
                    min_pctid=pctid,
                    threads=threads,
                    max_hits=10000)
     with open(query_hits_fp) as f:
         hits = aligner.parse(f)
         for hit in hits:
             if hit["pident"] == pctid_str:
                 query = self.assemblies[hit["qseqid"]]
                 subject = self.assemblies[hit["sseqid"]]
                 pctid = hit["pident"]
                 yield AssemblyPair(query, subject, pctid, hit["qseqid"],
                                    hit["sseqid"])
Exemplo n.º 2
0
    def search_reference_seqs(self, query_seqs):
        query_file = tempfile.NamedTemporaryFile(suffix=".fasta", mode="wt")
        write_fasta(query_file, query_seqs)
        query_file.seek(0)

        reference_hits_file = tempfile.NamedTemporaryFile(suffix=".txt",
                                                          mode="wt")

        # 97.0 --> 0.97
        vsearch_min_id = "{:.2f}".format(self.min_pct_id / 100)
        vsearch_args = [
            "vsearch",
            "--usearch_global",
            query_file.name,
            "--db",
            self.reference_udb_fp,
            "--userout",
            reference_hits_file.name,
            "--iddef",
            "2",
            "--id",
            vsearch_min_id,
            "--maxaccepts",
            self.max_hits,
            "--userfields",
            "query+target+id2+alnlen+mism+gaps+qilo+qihi+tilo+tihi+qs+ts+qrow+trow",
        ]
        if self.num_threads:
            vsearch_args.extend(["--threads", str(self.num_threads)])

        subprocess.check_call(vsearch_args)
        reference_hits_file.seek(0)
        return reference_hits_file
Exemplo n.º 3
0
    def find_in_seqs(self, seqs):
        if seqs.all_matched():
            return

        # Create the file paths
        subject_fp = self._make_fp("subject_{0}.fa".format(self.suffix))
        query_fp = self._make_fp("query_{0}.fa".format(self.suffix))
        result_fp = self._make_fp("query_{0}.txt".format(self.suffix))

        # Search
        with open(subject_fp, "w") as f:
            write_fasta(f, seqs.get_matched_offset0())
        ba = VsearchAligner(subject_fp)
        search_args = {
            "min_id": round(self.min_pct_id / 100, 2),
            "top_hits_only": None}
        if self.cores > 0:
            search_args["threads"] = self.cores
        hits = ba.search(
            seqs.get_unmatched_recs(), input_fp=query_fp, output_fp=result_fp,
            **search_args)

        # Refine
        bext = HitExtender(seqs.get_unmatched_recs(), seqs.get_matched_offset0())
        for hit in hits:
            alignment = bext.extend_hit(hit)
            subject_match = seqs.matches[alignment.subject_id]
            aligned_region = AlignedRegion.from_subject(
                alignment, subject_match.start, subject_match.end)
            query_start_idx, query_end_idx = aligned_region.in_query()
            query_offset = aligned_region.query_offset()
            matchobj = PrimerMatch(
                query_start_idx, query_end_idx, query_offset, "Alignment")
            yield alignment.query_id, matchobj
Exemplo n.º 4
0
    def search(self, seqs, input_fp=None, output_fp=None, **kwargs):
        if input_fp is None:
            infile = tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8")
            write_fasta(infile, seqs)
            infile.seek(0)
            input_fp = infile.name
        else:
            with open(input_fp, "w") as f:
                write_fasta(f, seqs)

        if output_fp is None:
            outfile = tempfile.NamedTemporaryFile()
            output_fp = outfile.name

        self._call(input_fp, self.ref_seqs_fp, output_fp, **kwargs)

        with open(output_fp) as f:
            for hit in self._parse(f):
                yield hit
Exemplo n.º 5
0
 def search_seq(self, query_seqid, query_seq, min_pctid=90.0, threads=None):
     query_fp = "temp_query.fasta"
     if os.path.exists(query_fp):
         os.rename(query_fp, "temp_prev_query.fasta")
     query_hits_fp = "temp_query_hits.txt"
     if os.path.exists(query_hits_fp):
         os.rename(query_hits_fp, "temp_prev_query_hits.txt")
     with open(query_fp, "w") as f:
         write_fasta(f, [(query_seqid, query_seq)])
     aligner = PctidAligner(self.fasta_fp)
     aligner.search(query_fp,
                    query_hits_fp,
                    min_pctid=min_pctid,
                    threads=threads,
                    max_hits=10000)
     with open(query_hits_fp) as f:
         hits = aligner.parse(f)
         for hit in hits:
             query = self.assemblies[hit["qseqid"]]
             subject = self.assemblies[hit["sseqid"]]
             pctid = hit["pident"]
             if query.accession != subject.accession:
                 yield AssemblyPair(query, subject, pctid, hit["qseqid"],
                                    hit["sseqid"])
Exemplo n.º 6
0
 def save(self):
     with open(self.fasta_fp, "w") as f:
         write_fasta(f, self.seqs.items())
     with open(self.accession_fp, "w") as f:
         for seqid, assembly in self.assemblies.items():
             f.write("{0}\t{1}\n".format(seqid, assembly.accession))
Exemplo n.º 7
0
 def test_write_fasta(self):
     f = tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8")
     seqs = [("a", "CCGGT"), ("b", "TTTTTTTTT")]
     write_fasta(f, seqs)
     f.seek(0)
     self.assertEqual(f.read(), ">a\nCCGGT\n>b\nTTTTTTTTT\n")