Пример #1
0
    def parseFasta(self, fasta_f, fasta_type):
        print BtLog.status_d['1'] % ('FASTA', fasta_f)
        self.assembly_f = abspath(fasta_f)
        if (fasta_type):
            # Set up CovLibObj for coverage in assembly header
            self.covLibs[fasta_type] = CovLibObj(fasta_type, fasta_type,
                                                 fasta_f)

        for name, seq in BtIO.readFasta(fasta_f):
            blObj = BlObj(name, seq)
            if not blObj.name in self.dict_of_blobs:
                self.seqs += 1
                self.length += blObj.length
                self.n_count += blObj.n_count

                if (fasta_type):
                    cov = BtIO.parseCovFromHeader(fasta_type, blObj.name)
                    self.covLibs[fasta_type].cov_sum += cov
                    blObj.addCov(fasta_type, cov)

                self.order_of_blobs.append(blObj.name)
                self.dict_of_blobs[blObj.name] = blObj
            else:
                BtLog.error('5', blObj.name)

        if self.seqs == 0 or self.length == 0:
            BtLog.error('1')
Пример #2
0
    def parseFasta(self, fasta_f, fasta_type):
        print BtLog.status_d['1'] % ('FASTA', fasta_f)
        self.assembly_f = abspath(fasta_f)
        if (fasta_type):
            # Set up CovLibObj for coverage in assembly header
            self.covLibs[fasta_type] = CovLibObj(fasta_type, fasta_type, fasta_f)

        for name, seq in BtIO.readFasta(fasta_f):
            blObj = BlObj(name, seq)
            if not blObj.name in self.dict_of_blobs:
                self.seqs += 1
                self.length += blObj.length
                self.n_count += blObj.n_count
                
                if (fasta_type):
                    cov = BtIO.parseCovFromHeader(fasta_type, blObj.name)
                    self.covLibs[fasta_type].cov_sum += cov
                    blObj.addCov(fasta_type, cov)

                self.order_of_blobs.append(blObj.name)
                self.dict_of_blobs[blObj.name] = blObj
            else:
                BtLog.error('5', blObj.name)
        
        if self.seqs == 0 or self.length == 0:
            BtLog.error('1')
Пример #3
0
def main():
    args = docopt(__doc__)
    fasta_f = args['--infile']
    list_f = args['--list']
    invert = args['--invert']
    prefix = args['--out']

    output = []
    out_f = BtIO.getOutFile(fasta_f, prefix, "filtered.fna")

    print BtLog.status_d['1'] % ("list", list_f)
    items = BtIO.parseSet(list_f)
    items_count = len(items)
    print BtLog.status_d['22'] % fasta_f
    items_parsed = []
    sequences = 0
    for header, sequence in BtIO.readFasta(fasta_f):
        sequences += 1
        if header in items:
            if not (invert):
                items_parsed.append(header)
                output.append(">%s\n%s\n" % (header, sequence))
        else:
            if (invert):
                items_parsed.append(header)
                output.append(">%s\n%s\n" % (header, sequence))
        BtLog.progress(len(output), 10, items_count, no_limit=True)
    BtLog.progress(items_count, 10, items_count)

    items_parsed_count = len(items_parsed)
    print BtLog.status_d['23'] % ('{:.2%}'.format(items_parsed_count/sequences), "{:,}".format(items_count), "{:,}".format(items_parsed_count), "{:,}".format(sequences))

    items_parsed_count_unique = len(set(items_parsed))
    if not items_parsed_count == items_parsed_count_unique:
        print BtLog.warn_d['8'] % "\n\t\t\t".join(list(set([x for x in items_parsed if items_parsed.count(x) > 1])))

    with open(out_f, "w") as fh:
        print BtLog.status_d['24'] % out_f
        fh.write("".join(output))
Пример #4
0
def main():
    args = docopt(__doc__)
    fasta_f = args['--infile']
    list_f = args['--list']
    invert = args['--invert']
    prefix = args['--out']

    output = []
    out_f = BtIO.getOutFile(fasta_f, prefix, "filtered.fna")

    print(BtLog.status_d['1'] % ("list", list_f))
    items = BtIO.parseSet(list_f)
    items_count = len(items)
    print(BtLog.status_d['22'] % fasta_f)
    items_parsed = []
    
    with tqdm(total=items_count, desc="[%] ", ncols=200, unit_scale=True) as pbar:
        for header, sequence in BtIO.readFasta(fasta_f):
            if header in items:
                if not (invert):
                    items_parsed.append(header)
                    output.append(">%s\n%s\n" % (header, sequence))
            else:
                if (invert):
                    items_parsed.append(header)
                    output.append(">%s\n%s\n" % (header, sequence))
        pbar.update()

    items_parsed_count = len(items_parsed)

    items_parsed_count_unique = len(set(items_parsed))
    if not items_parsed_count == items_parsed_count_unique:
        print(BtLog.warn_d['8'] % "\n\t\t\t".join(list(set([x for x in items_parsed if items_parsed.count(x) > 1]))))

    with open(out_f, "w") as fh:
        print(BtLog.status_d['24'] % out_f)
        fh.write("".join(output))