示例#1
0
文件: filters.py 项目: ajm/seance
    def accept(self, seq) :
        seqprimer = seq.sequence[:self.len]

        ret = IUPAC.close_enough(self.primer, seq.sequence, self.err)

        #print self.primer, seq.sequence[:self.len], ret

        if ret and self.clip :
            # primer part of sequence may be longer or
            # shorter, but it does not really matter
            # as terminal gaps are not included in our
            # definition of identity
            seq.remove_mid(self.len)
            
        return ret
示例#2
0
文件: tools.py 项目: ajm/seance
    def extract(self, sff, outdir, primer, primer_errors, barcode, barcode_errors, max_homopolymer) :
        try :
            from Bio import SeqIO
        except ImportError :
            print >> sys.stderr, "BioPython not installed (only required for working with SFF files)"
            sys.exit(1)

        barcode_len = len(barcode)
        primer_len = len(primer)

        raw_seq_total = 0

        names = []
        flows = []
        flowlens = []

        for record in SeqIO.parse(sff.get_filename(), "sff") :
            raw_seq_total += 1
            good_bases = record.seq[record.annotations["clip_qual_left"] : record.annotations["clip_qual_right"]]
            barcode_seq = good_bases[:barcode_len]
            primer_seq = good_bases[barcode_len : barcode_len + primer_len]

            new_length = 0

            for i in range(0, len(record.annotations["flow_values"]), 4) : 
                signal = 0
                noise = 0

                for j in range(4) :
                    f = float(record.annotations["flow_values"][i + j]) / 100.0

                    if int(f + 0.5) > max_homopolymer :
                        break

                    if f > 0.5 :
                        signal += 1
                        if f < 0.7 :
                            noise += 1

                if noise > 0 or signal == 0 :
                    break

                new_length += 1

            new_length *= 4

            if new_length > 450 :
                new_length = 450

            if new_length >= 360 and \
                    IUPAC.close_enough(barcode, barcode_seq, barcode_errors) and \
                    IUPAC.close_enough(primer, primer_seq, primer_errors) :
                flows.append(record.annotations["flow_values"])
                flowlens.append(new_length)
                names.append(record.id)



        if len(flows) == 0 :
            self.log.info("kept 0/%d sequences" % raw_seq_total)
            return 0, None

        # output pyronoise input file
        # see http://userweb.eng.gla.ac.uk/christopher.quince/Software/PyroNoise.html
        f = open(join(outdir, "flows.dat"), 'w')

        print >> f, "%d %d" % (len(flows), max([ len(i) for i in flows ]))
        for i in range(len(flows)) :
            print >> f, " ".join([ names[i], str(flowlens[i]) ] + [ "%.2f" % (float(i) / 100.0) for i in flows[i] ])

        f.close()

        self.log.info("kept %d/%d sequences" % (len(flows), raw_seq_total))
        return len(flows), f.name