def check_seq_tax_labels(self, seq_name, orig_ranks, ranks, lws):
        mislabel_lvl = -1
        min_len = min(len(orig_ranks),len(ranks))
        for rank_lvl in range(min_len):
            if ranks[rank_lvl] != Taxonomy.EMPTY_RANK and ranks[rank_lvl] != orig_ranks[rank_lvl]:
                mislabel_lvl = rank_lvl
                break

        if mislabel_lvl >= 0:
            real_lvl = self.guess_rank_level(orig_ranks, mislabel_lvl)
            mis_rec = {}
            mis_rec['name'] = EpacConfig.strip_ref_prefix(seq_name)
            mis_rec['orig_level'] = mislabel_lvl
            mis_rec['real_level'] = real_lvl
            mis_rec['level_name'] = self.rank_level_name(real_lvl)[1]
            mis_rec['inv_level'] = -1 * real_lvl  # just for sorting
            mis_rec['orig_ranks'] = orig_ranks
            mis_rec['ranks'] = ranks
            mis_rec['lws'] = lws
            mis_rec['conf'] = lws[mislabel_lvl]
            self.mislabels.append(mis_rec)
            
            return mis_rec
        else:
            return None
示例#2
0
 def check_seq_ids(self):
     # check that seq IDs in taxonomy and alignment correspond
     self.mis_ids = []
     for sid in self.taxonomy.seq_ranks_map.iterkeys():
         unprefixed_sid = EpacConfig.strip_ref_prefix(sid)
         if not self.alignment.has_seq(unprefixed_sid):
             self.mis_ids.append(unprefixed_sid)
             
     if len(self.mis_ids) > 0 and self.verbose:
         errmsg = "ERROR: Following %d sequence(s) are missing in your alignment file:\n%s\n\n" % (len(self.mis_ids), "\n".join(self.mis_ids))
         errmsg += "Please make sure sequence IDs in taxonomic annotation file and in alignment are identical!\n"
         self.cfg.exit_user_error(errmsg)
         
     return self.mis_ids
示例#3
0
    def check_seq_ids(self):
        # check that seq IDs in taxonomy and alignment correspond
        self.mis_ids = []
        for sid in self.taxonomy.seq_ranks_map.iterkeys():
            unprefixed_sid = EpacConfig.strip_ref_prefix(sid)
            if not self.alignment.has_seq(unprefixed_sid):
                self.mis_ids.append(unprefixed_sid)

        if len(self.mis_ids) > 0 and self.verbose:
            errmsg = "ERROR: Following %d sequence(s) are missing in your alignment file:\n%s\n\n" % (
                len(self.mis_ids), "\n".join(self.mis_ids))
            errmsg += "Please make sure sequence IDs in taxonomic annotation file and in alignment are identical!\n"
            self.cfg.exit_user_error(errmsg)

        return self.mis_ids
示例#4
0
 def mis_rec_to_string(self, mis_rec):
     lvl = mis_rec['orig_level']
     uncorr_name = EpacConfig.strip_ref_prefix(self.refjson.get_uncorr_seqid(mis_rec['name']))
     uncorr_orig_ranks = self.refjson.get_uncorr_ranks(mis_rec['orig_ranks'])
     uncorr_ranks = self.refjson.get_uncorr_ranks(mis_rec['ranks'])
     output = uncorr_name + "\t"
   
     if lvl >= 0:
         output += "%s\t%s\t%s\t%.3f\t" % (mis_rec['level_name'], 
             uncorr_orig_ranks[lvl], uncorr_ranks[lvl], mis_rec['lws'][lvl])
     else:
         output += "%s\t%s\t%s\t%.3f\t" % (mis_rec['level_name'], 
             "NA", "NA", mis_rec['lws'][0])
     
     output += Taxonomy.lineage_str(uncorr_orig_ranks) + "\t"
     output += Taxonomy.lineage_str(uncorr_ranks) + "\t"
     output += ";".join(["%.3f" % conf for conf in mis_rec['lws']])
     if 'rank_conf' in mis_rec:
         output += "\t%.3f" % mis_rec['rank_conf']
     return output
示例#5
0
    def mis_rec_to_string(self, mis_rec):
        lvl = mis_rec['orig_level']
        uncorr_name = EpacConfig.strip_ref_prefix(
            self.refjson.get_uncorr_seqid(mis_rec['name']))
        uncorr_orig_ranks = self.refjson.get_uncorr_ranks(
            mis_rec['orig_ranks'])
        uncorr_ranks = self.refjson.get_uncorr_ranks(mis_rec['ranks'])
        output = uncorr_name + "\t"

        if lvl >= 0:
            output += "%s\t%s\t%s\t%.3f\t" % (
                mis_rec['level_name'], uncorr_orig_ranks[lvl],
                uncorr_ranks[lvl], mis_rec['lws'][lvl])
        else:
            output += "%s\t%s\t%s\t%.3f\t" % (mis_rec['level_name'], "NA",
                                              "NA", mis_rec['lws'][0])

        output += Taxonomy.lineage_str(uncorr_orig_ranks) + "\t"
        output += Taxonomy.lineage_str(uncorr_ranks) + "\t"
        output += ";".join(["%.3f" % conf for conf in mis_rec['lws']])
        if 'rank_conf' in mis_rec:
            output += "\t%.3f" % mis_rec['rank_conf']
        return output