Пример #1
0
    def move_out_chimeric(self):
        chimeric_ids = self.get_chimeric_ids()
        for idx_key in self.input_file_names:
            fasta_file_path    = os.path.join(self.indir, self.input_file_names[idx_key])   
            read_fasta         = fa.ReadFasta(fasta_file_path)
            read_fasta.close()
            
            non_chimeric_file  = fasta_file_path + self.nonchimeric_suffix
            non_chimeric_fasta = fa.FastaOutput(non_chimeric_file)

            fasta              = fa.SequenceSource(fasta_file_path, lazy_init = False) 
            while fasta.next():
                if not fasta.id in chimeric_ids:
                    non_chimeric_fasta.store(fasta, store_frequencies = False)
            non_chimeric_fasta.close()
Пример #2
0
    def get_chimeric_ids(self):
        ids = set()
        chimera_file_names = self.get_chimera_file_names(self.outdir)
        file_ratio = self.check_chimeric_stats()
        
        for file_name in chimera_file_names:
#             print "from get_chimeric_ids: file_name = %s" % file_name
            if file_name.endswith(self.chimeric_suffix):
                both_or_denovo = self.get_chimeras_suffix(file_ratio, file_name)
#                 TODO: run ones for each file_base = ".".join(file_name.split(".")[0:3]) (for txt and db)
                if file_name.endswith(both_or_denovo):                    
                    file_name_path = os.path.join(self.outdir, file_name)        
                    self.utils.print_both("Get ids from %s" % file_name_path)
                    read_fasta     = fa.ReadFasta(file_name_path)
                    ids.update(set(read_fasta.ids))
        return ids
Пример #3
0
    start_dir = args.start_dir
    # start_dir = sys.argv[1]
    if (is_verbatim):
        print "Start from %s" % start_dir
        print "Getting file names"

    fa_files = seq_len.get_files(start_dir, args.ext)
    if (is_verbatim):
        print "Found %s fa files" % (len(fa_files))

    for file_name in fa_files:
        if (is_verbatim):
            print file_name

        try:
            f_input = fa.ReadFasta(file_name)
            if (args.short_s):
                seq_len.print_short_seq(f_input, file_name, args.min_len)
            if (args.histogram):
                seq_len.get_seq_len_distrib(f_input)

        except RuntimeError:
            if (is_verbatim):
                print sys.exc_info()[0]
        except:
            print "Unexpected error:", sys.exc_info()[0]
            raise
            next

    if (is_verbatim):
        print "Current directory:"