def main(): (options, args) = setup_options() if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) # Split backbone fasta file into chunks. backbone_to_id = split_backbone(options) # Find the reads that correspond to a given backbone. reads_to_backbone = build_reads_to_backbone_dict(options) # Split the reads based on their corresponding backbone file. file_pointers_dict = {} pf = ParseFasta(options.reads_filename) tuple = pf.getRecord() id = None while tuple is not None: if len(reads_to_backbone[tuple[0]]) > 0: for backbone in reads_to_backbone[tuple[0]]: id = backbone_to_id[backbone] new_fp = open( options.output_dir + '/' + str(id) + '.reads.fasta', 'a') new_fp.write('>' + tuple[0] + '\n' + tuple[1] + '\n') new_fp.close() tuple = pf.getRecord()
def split_backbone(options): """ Split backbone fasta file into chunks. Returns dictionary of backbone -> id. """ backbone_to_id = {} id_counter = 0 # Write all backbone files to their own fasta file. pf = ParseFasta(options.backbone_filename) tuple = pf.getRecord() while tuple is not None: print tuple[0] split_backbone = open( options.output_dir + '/' + options.prefix + '-' + str(id_counter) + '.fasta', 'w') split_backbone.write('>' + tuple[0] + '\n' + tuple[1]) split_backbone.close() backbone_to_id[tuple[0]] = options.prefix + '-' + str(id_counter) id_counter += 1 tuple = pf.getRecord() return backbone_to_id
def main(): (options, args) = setup_options() if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) # Split backbone fasta file into chunks. backbone_to_id = split_backbone(options) # Find the reads that correspond to a given backbone. reads_to_backbone = build_reads_to_backbone_dict(options) # Split the reads based on their corresponding backbone file. file_pointers_dict = {} pf = ParseFasta(options.reads_filename) tuple = pf.getRecord() id = None while tuple is not None: if len(reads_to_backbone[tuple[0]]) > 0: for backbone in reads_to_backbone[tuple[0]]: id = backbone_to_id[backbone] new_fp = open(options.output_dir + '/' + str(id) + '.reads.fasta', 'a') new_fp.write('>' + tuple[0] + '\n' + tuple[1] + '\n') new_fp.close() tuple = pf.getRecord()
def split_backbone(options): """ Split backbone fasta file into chunks. Returns dictionary of backbone -> id. """ backbone_to_id = {} id_counter = 0 # Write all backbone files to their own fasta file. pf = ParseFasta(options.backbone_filename) tuple = pf.getRecord() while tuple is not None: print tuple[0] split_backbone = open(options.output_dir + '/' + options.prefix + '-' + str(id_counter) + '.fasta', 'w') split_backbone.write('>' + tuple[0] + '\n' + tuple[1]) split_backbone.close() backbone_to_id[tuple[0]] = options.prefix + '-' + str(id_counter) id_counter += 1 tuple = pf.getRecord() return backbone_to_id
def build_readseqs_dict(options): readseqs = {} pf = ParseFasta(options.reads_filename) ## typically 'ctg_pb.fasta' in pipeline tuple = pf.getRecord() while tuple is not None: readseqs[tuple[0]] = tuple[1] tuple = pf.getRecord() return readseqs
def main(): (options, args) = setup_options() if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) # Split backbone fasta file into chunks. backbone_to_id = split_backbone(options) # use: backbone_to_id[seqname] = fileprefix-number # Find the reads that correspond to a given backbone. reads_to_backbone = build_reads_to_backbone_dict(options) ## use: reads_to_backbone[read_or_contig_name].append(backbone_name) ## New approach: ## open and close files as you need them ## A little slower than original pf = ParseFasta(options.reads_filename) tuple = pf.getRecord() id = None while tuple is not None: if len(reads_to_backbone[tuple[0]]) > 0: for backbone in reads_to_backbone[tuple[0]]: id = backbone_to_id[backbone] fname = options.output_dir + '/' + str(id) + '.reads.fasta' if os.path.isfile(fname): with open(fname, 'a') as f: f.write('>' + tuple[0] + '\n' + tuple[1] + '\n') else: with open(fname, 'w') as f: f.write('>' + tuple[0] + '\n' + tuple[1] + '\n') tuple = pf.getRecord()
def main(): (options, args) = setup_options() if not os.path.exists(options.output_dir): os.makedirs(options.output_dir) # Split backbone fasta file into chunks. backbone_to_id = split_backbone(options) # Find the reads that correspond to a given backbone. reads_to_backbone = build_reads_to_backbone_dict(options) # Split the reads based on their corresponding backbone file. file_pointers_dict = {} pf = ParseFasta(options.reads_filename) tuple = pf.getRecord() id = None while tuple is not None: if len(reads_to_backbone[tuple[0]]) > 0: for backbone in reads_to_backbone[tuple[0]]: id = backbone_to_id[backbone] # print tuple[0] + '\t-->\t' + options.output_dir + '/' + str(id) + '.reads.fasta' if id in file_pointers_dict: file_pointers_dict[id].write(">" + tuple[0] + "\n" + tuple[1] + "\n") else: new_fp = open(options.output_dir + "/" + str(id) + ".reads.fasta", "w") file_pointers_dict[id] = new_fp file_pointers_dict[id].write(">" + tuple[0] + "\n" + tuple[1] + "\n") # else: # print 'MISSING\t' + tuple[0] tuple = pf.getRecord()