def write_clean_abundance_file(self): """ Writes the abundance file from the new names file and new unique file. These files have already had their ids checked from the deleted file """ for lane_key in self.lane_keys: original_abundance_file = os.path.join(self.trim_dir, lane_key + ".abund.fa") new_abundance_file = os.path.join(self.trim_dir, lane_key + ".newabund.fa") new_names_file = os.path.join(self.trim_dir, lane_key + ".names") new_unique_file = os.path.join(self.trim_dir, lane_key + ".unique.fa") names = {} uniques = {} deleted_id_list = self.deleted_ids[lane_key] if len(deleted_id_list) == 0: continue newnames_fh = open(new_names_file, "r") for line in newnames_fh.readlines(): lst = line.strip().split() names[lst[0]] = lst[1].split(',') #print(names) fasta = fa.SequenceSource(new_unique_file) while fasta.next(): fasta.id uniques[fasta.seq] = fasta.id #print(uniques) sorted_uniques = mysort(uniques, names) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size=" + str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(new_abundance_file, 'a') # rename to newuniques => uniques os.rename( original_abundance_file, os.path.join(self.trim_dir, lane_key + ".abund_dirty.fa")) os.rename(new_abundance_file, original_abundance_file)
def write_clean_abundance_file(self): """ Writes the abundance file from the new names file and new unique file. Thes file have already had their ids checked from the deleted file """ for lane_key in self.lane_keys: original_abundance_file = os.path.join(self.outputdir, lane_key + ".abund.fa" ) new_abundance_file = os.path.join(self.outputdir, lane_key + ".newabund.fa") new_names_file = os.path.join(self.outputdir, lane_key + ".names") new_unique_file = os.path.join(self.outputdir, lane_key + ".unique.fa") names = {} uniques = {} deleted_id_list = self.deleted_ids[lane_key] if len(deleted_id_list) == 0: continue newnames_fh = open( new_names_file, "r" ) for line in newnames_fh.readlines(): lst = line.strip().split() names[lst[0]] = lst[1].split(',') #print names fasta = SequenceSource(new_unique_file) while fasta.next(): fasta.id uniques[fasta.seq] = fasta.id #print uniques sorted_uniques = mysort( uniques, names ) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size="+str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(new_abundance_file,'a') # rename to newuniques => uniques os.rename(original_abundance_file, os.path.join(self.outputdir, lane_key + ".abund_dirty.fa" )) os.rename(new_abundance_file, original_abundance_file )
def write_clean_abundance_file(self): """ Writes the abundance file from the new names file and new unique file. Thes file have already had their ids checked from the deleted file """ for lane_key in self.lane_keys: original_abundance_file = self.outputdir + "/" + lane_key + ".abund.fa" new_abundance_file = self.outputdir + "/" + lane_key + ".newabund.fa" new_names_file = self.outputdir + "/" + lane_key + ".names" new_unique_file = self.outputdir + "/" + lane_key + ".unique.fa" deleted_file = self.outputdir + "/" + lane_key + ".deleted.txt" names = {} uniques = {} if not (os.path.exists(deleted_file) and os.path.getsize(deleted_file) > 0): continue newnames_fh = open(new_names_file, "r") for line in newnames_fh.readlines(): lst = line.strip().split() names[lst[0]] = lst[1].split(',') #print names fasta = SequenceSource(new_unique_file) while fasta.next(): fasta.id uniques[fasta.seq] = fasta.id #print uniques sorted_uniques = mysort(uniques, names) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size=" + str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(new_abundance_file, 'a') # rename to newuniques => uniques os.rename(original_abundance_file, self.outputdir + "/" + lane_key + ".abund_dirty.fa") os.rename(new_abundance_file, original_abundance_file)
def write_data_files(self, lane_keys): ################################################################### # # 10-print out files of unique trimmed sequence # and deleted ids # for each amplification: # fasta file is this needed? # names file (just like mothur) # uniques fasta file (just like mothur) # abundance fasta file -formated for usearch (-uc): sorted most abund first # deleted read_ids file # Also there should be one file each for rawaseq, trimseq, primers and runkeys # Not divided by amplification for rapidly puting in db. # These are printed in out directory: './out'+rundate # ################################################################### #rawseqFileName = self.outdir + '/rawseq_file.txt' #trimseqFileName = self.outdir + '/trimseq_file.txt' #f_rawseq = open(rawseqFileName, "w") #f_trimseq = open(trimseqFileName,"w") for lane_key in self.run.run_keys: self.fa[lane_key].close() base_file_name = os.path.join(self.outdir,lane_key) uniquesFileName = base_file_name + ".unique.fa" abundFileName = base_file_name + ".abund.fa" namesFileName = base_file_name + ".names" delFileName = base_file_name + ".deleted.txt" f_names = open(namesFileName,"w") # if we order the uniques by length of self.uniques[lane_key][seq] then we have abundance file # Write abund.fa file # mysort returns a list of tuples: (read_id, count, seq) sorted highest to lowest freq try: sorted_uniques = mysort( self.uniques[lane_key], self.names[lane_key] ) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size="+str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(abundFileName,'a') except: print "**********fail abund **************" success_code = ('FAIL','abund',lane_key) # Write uniques.fa file try: for seq in self.uniques[lane_key]: read_id = self.uniques[lane_key][seq] uniquefa = sfasta(read_id, seq) uniquefa.write(uniquesFileName,'a') logger.debug("\nwrote uniques file " + uniquesFileName) except: success_code = ('FAIL','unique',lane_key) # Write names file try: for id in self.names[lane_key]: others = ','.join(self.names[lane_key][id]) f_names.write(id+"\t"+others+"\n") f_names.close() logger.debug("wrote names file " + namesFileName) except: success_code = ('FAIL','names',lane_key) # Write deleted.txt file if lane_key in self.deleted_ids and self.deleted_ids[lane_key]: f_del = open(delFileName, "w") reason_counts = {} for id in self.deleted_ids[lane_key]: reason = self.deleted_ids[lane_key][id] f_del.write(id+"\t"+reason+"\n") current_count = reason_counts.get(reason, 0) reason_counts[reason] = current_count + 1 # now write out some stats f_del.write("\nTotal Passed Reads in this lane/key: " + str(len(self.names[lane_key])) + "\n") if(len(self.names[lane_key]) > 0): for key,value in reason_counts.items(): f_del.write(" " + key + ": " + str(value) + " " + str(float(value*100.0)/float(len(self.names[lane_key]))) + "% of total \n") else: pass f_del.close() logger.debug("wrote deleted file: " + delFileName) # print out readids that failed the key test: one file only if 'nokey' in self.deleted_ids and self.deleted_ids['nokey']: nokeyFileName = os.path.join(self.outdir,'nokey.deleted.txt') f_del = open(nokeyFileName,"w") for id in self.deleted_ids['nokey']: f_del.write(id+"\tnokey\n") f_del.close() if True: print print 'Output Directory:', './'+self.outdir print self.number_of_raw_sequences, "raw sequences read" pct = '%.1f' % ((float(self.number_of_good_sequences)/self.number_of_raw_sequences) *100) print self.number_of_good_sequences, "sequences passed" ,pct+'%' print "Unique Counts:" count_uniques = 0 good_lane_keys = [] for lane_key in self.run.run_keys: count = len(self.uniques[lane_key]) if count > 0: good_lane_keys.append(lane_key) count_uniques = count_uniques + count print " ",lane_key,self.dna_regions[lane_key],count print " Total Uniques:",count_uniques ##### # # Write to stats file for this run # self.stats_fp.write("Run: "+self.rundate+"\n") self.stats_fp.write("Unique Counts:\n") #stats_fp.write("Run: "+self.rundate) count_uniques = 0 for lane_key in self.run.run_keys: count = len(self.uniques[lane_key]) count_uniques = count_uniques + count self.stats_fp.write(" " + str(count)+"\t"+lane_key+"\n") self.stats_fp.write("Total Uniques: "+str(count_uniques)+"\n") self.stats_fp.write("\nDeleted Counts (before chimera check):\n") self.stats_fp.write(" deleted_count_for_nokey:\t" + str(self.deleted_count_for_nokey) + "\n") self.stats_fp.write(" deleted_count_for_proximal:\t" + str(self.deleted_count_for_proximal)+ "\n") self.stats_fp.write(" deleted_count_for_distal:\t" + str(self.deleted_count_for_distal)+ "\n") self.stats_fp.write(" deleted_count_for_n:\t" + str(self.deleted_count_for_n)+ "\n") self.stats_fp.write(" deleted_count_for_quality:\t" + str(self.deleted_count_for_quality)+ "\n") self.stats_fp.write(" deleted_count_for_no_insert:\t" + str(self.deleted_count_for_no_insert)+ "\n") self.stats_fp.write(" deleted_count_for_minimum_length:\t" + str(self.deleted_count_for_minimum_length)+ "\n") self.stats_fp.write(" deleted_count_for_unknown_lane_runkey:\t" + str(self.deleted_count_for_unknown_lane_runkey)+ "\n") self.stats_fp.write("Total Deleted: "+str(self.number_of_raw_sequences-self.number_of_good_sequences)+"\n") self.stats_fp.close() success_code='' if not success_code: success_code = ('SUCCESS ' + str(good_lane_keys)) return success_code
def write_data_files(self, lane_keys): ################################################################### # # 10-print out files of unique trimmed sequence # and deleted ids # for each amplification: # fasta file is this needed? # names file (just like mothur) # uniques fasta file (just like mothur) # abundance fasta file -formated for usearch (-uc): sorted most abund first # deleted read_ids file # Also there should be one file each for rawaseq, trimseq, primers and runkeys # Not divided by amplification for rapidly puting in db. # These are printed in out directory: './out'+rundate # ################################################################### #rawseqFileName = self.outdir + '/rawseq_file.txt' #trimseqFileName = self.outdir + '/trimseq_file.txt' #f_rawseq = open(rawseqFileName, "w") #f_trimseq = open(trimseqFileName,"w") for lane_key in self.run.run_keys: self.fa[lane_key].close() uniquesFileName = self.outdir + '/' + lane_key + ".unique.fa" abundFileName = self.outdir + '/' + lane_key + ".abund.fa" namesFileName = self.outdir + '/' + lane_key + ".names" delFileName = self.outdir + '/' + lane_key + ".deleted.txt" f_names = open(namesFileName, "w") # if we order the uniques by length of self.uniques[lane_key][seq] then we have abundance file # Write abund.fa file # mysort returns a list of tuples: (read_id, count, seq) sorted highest to lowest freq try: sorted_uniques = mysort(self.uniques[lane_key], self.names[lane_key]) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size=" + str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(abundFileName, 'a') except: success_code = ('FAIL', 'abund', lane_key) # Write uniques.fa file try: for seq in self.uniques[lane_key]: read_id = self.uniques[lane_key][seq] uniquefa = sfasta(read_id, seq) uniquefa.write(uniquesFileName, 'a') if self.VERBOSE: print "\nwrote uniques file", uniquesFileName except: success_code = ('FAIL', 'unique', lane_key) # Write names file try: for id in self.names[lane_key]: others = ','.join(self.names[lane_key][id]) f_names.write(id + "\t" + others + "\n") f_names.close() if self.VERBOSE: print "wrote names file", namesFileName except: success_code = ('FAIL', 'names', lane_key) # Write deleted.txt file if lane_key in self.deleted_ids and self.deleted_ids[lane_key]: f_del = open(delFileName, "w") for id in self.deleted_ids[lane_key]: reason = self.deleted_ids[lane_key][id] f_del.write(id + "\t" + reason + "\n") f_del.close() if self.VERBOSE: print "wrote deleted file", delFileName # print out readids that failed the key test: one file only if 'nokey' in self.deleted_ids and self.deleted_ids['nokey']: delfileName = self.outdir + '/nokey.deleted.txt' f_del = open(delfileName, "w") for id in self.deleted_ids['nokey']: f_del.write(id + "\tnokey\n") f_del.close() if not self.QUIET: print print 'Output Directory:', './' + self.outdir print self.number_of_raw_sequences, "raw sequences read" pct = '%.1f' % ((float(self.number_of_good_sequences) / self.number_of_raw_sequences) * 100) print self.number_of_good_sequences, "sequences passed", pct + '%' print "Unique Counts:" count_uniques = 0 good_lane_keys = [] for lane_key in self.run.run_keys: count = count_keys(self.uniques[lane_key]) if count > 0: good_lane_keys.append(lane_key) count_uniques = count_uniques + count print " ", lane_key, self.dna_regions[lane_key], count print " Total Uniques:", count_uniques ##### # # Write to stats file for this run # self.stats_fp.write("Run: " + self.rundate + "\n") self.stats_fp.write("Unique Counts:\n") #stats_fp.write("Run: "+self.rundate) count_uniques = 0 for lane_key in self.run.run_keys: count = count_keys(self.uniques[lane_key]) count_uniques = count_uniques + count self.stats_fp.write(" " + str(count) + "\t" + lane_key + "\n") self.stats_fp.write("Total Uniques: " + str(count_uniques) + "\n") self.stats_fp.write("\nDeleted Counts (before chimera check):\n") for reason in self.deleted_count: self.stats_fp.write(" " + str(self.deleted_count[reason]) + "\t" + reason + "\n") self.stats_fp.write("Total Deleted: " + str(self.number_of_raw_sequences - self.number_of_good_sequences) + "\n") self.stats_fp.close() success_code = '' if not success_code: success_code = ('SUCCESS', '', good_lane_keys) return success_code
def write_data_files(self, lane_keys): ################################################################### # # 10-print out files of unique trimmed sequence # and deleted ids # for each amplification: # fasta file is this needed? # names file (just like mothur) # uniques fasta file (just like mothur) # abundance fasta file -formated for usearch (-uc): sorted most abund first # deleted read_ids file # Also there should be one file each for rawaseq, trimseq, primers and runkeys # Not divided by amplification for rapidly puting in db. # These are printed in out directory: './out'+rundate # ################################################################### #rawseqFileName = self.outdir + '/rawseq_file.txt' #trimseqFileName = self.outdir + '/trimseq_file.txt' #f_rawseq = open(rawseqFileName, "w") #f_trimseq = open(trimseqFileName,"w") for lane_key in self.run.run_keys: self.fa[lane_key].close() uniquesFileName = self.outdir + '/' + lane_key + ".unique.fa" abundFileName = self.outdir + '/' + lane_key + ".abund.fa" namesFileName = self.outdir + '/' + lane_key + ".names" delFileName = self.outdir + '/' + lane_key + ".deleted.txt" f_names = open(namesFileName,"w") # if we order the uniques by length of self.uniques[lane_key][seq] then we have abundance file # Write abund.fa file # mysort returns a list of tuples: (read_id, count, seq) sorted highest to lowest freq try: sorted_uniques = mysort( self.uniques[lane_key], self.names[lane_key] ) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size="+str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(abundFileName,'a') except: success_code = ('FAIL','abund',lane_key) # Write uniques.fa file try: for seq in self.uniques[lane_key]: read_id = self.uniques[lane_key][seq] uniquefa = sfasta(read_id, seq) uniquefa.write(uniquesFileName,'a') if self.VERBOSE: print "\nwrote uniques file",uniquesFileName except: success_code = ('FAIL','unique',lane_key) # Write names file try: for id in self.names[lane_key]: others = ','.join(self.names[lane_key][id]) f_names.write(id+"\t"+others+"\n") f_names.close() if self.VERBOSE: print "wrote names file",namesFileName except: success_code = ('FAIL','names',lane_key) # Write deleted.txt file if lane_key in self.deleted_ids and self.deleted_ids[lane_key]: f_del = open(delFileName, "w") for id in self.deleted_ids[lane_key]: reason = self.deleted_ids[lane_key][id] f_del.write(id+"\t"+reason+"\n") f_del.close() if self.VERBOSE: print "wrote deleted file",delFileName # print out readids that failed the key test: one file only if 'nokey' in self.deleted_ids and self.deleted_ids['nokey']: delfileName = self.outdir + '/nokey.deleted.txt' f_del = open(delfileName,"w") for id in self.deleted_ids['nokey']: f_del.write(id+"\tnokey\n") f_del.close() if not self.QUIET: print print 'Output Directory:', './'+self.outdir print self.number_of_raw_sequences, "raw sequences read" pct = '%.1f' % ((float(self.number_of_good_sequences)/self.number_of_raw_sequences) *100) print self.number_of_good_sequences, "sequences passed" ,pct+'%' print "Unique Counts:" count_uniques = 0 good_lane_keys = [] for lane_key in self.run.run_keys: count = count_keys(self.uniques[lane_key]) if count > 0: good_lane_keys.append(lane_key) count_uniques = count_uniques + count print " ",lane_key,self.dna_regions[lane_key],count print " Total Uniques:",count_uniques ##### # # Write to stats file for this run # self.stats_fp.write("Run: "+self.rundate+"\n") self.stats_fp.write("Unique Counts:\n") #stats_fp.write("Run: "+self.rundate) count_uniques = 0 for lane_key in self.run.run_keys: count = count_keys(self.uniques[lane_key]) count_uniques = count_uniques + count self.stats_fp.write(" " + str(count)+"\t"+lane_key+"\n") self.stats_fp.write("Total Uniques: "+str(count_uniques)+"\n") self.stats_fp.write("\nDeleted Counts (before chimera check):\n") for reason in self.deleted_count: self.stats_fp.write(" " + str(self.deleted_count[reason]) +"\t"+reason+"\n") self.stats_fp.write("Total Deleted: "+str(self.number_of_raw_sequences-self.number_of_good_sequences)+"\n") self.stats_fp.close() success_code='' if not success_code: success_code = ('SUCCESS','',good_lane_keys) return success_code
def write_data_files(self, idx_keys): ################################################################### # # 10-print(out files of unique trimmed sequence) # and deleted ids # for each amplification: # fasta file is this needed? # names file (just like mothur) # uniques fasta file (just like mothur) # abundance fasta file -formated for usearch (-uc): sorted most abund first # deleted read_ids file # Also there should be one file each for rawaseq, trimseq, primers and runkeys # Not divided by amplification for rapidly puting in db. # These are printed in out directory: './out'+rundate # ################################################################### #rawseqFileName = self.outdir + '/rawseq_file.txt' #trimseqFileName = self.outdir + '/trimseq_file.txt' #f_rawseq = open(rawseqFileName, "w") #f_trimseq = open(trimseqFileName,"w") if self.runobj.platform == 'illumina': return for idx_key in self.runobj.run_keys: self.fa[idx_key].close() base_file_name = os.path.join(self.trimming_dir, idx_key) uniquesFileName = base_file_name + ".unique.fa" abundFileName = base_file_name + ".abund.fa" namesFileName = base_file_name + ".names" delFileName = base_file_name + ".deleted.txt" # clean out old files if they exists remove_file(uniquesFileName) remove_file(abundFileName) remove_file(namesFileName) remove_file(delFileName) f_names = open(namesFileName, "w") # if we order the uniques by length of self.uniques[idx_key][seq] then we have abundance file # Write abund.fa file # mysort returns a list of tuples: (read_id, count, seq) sorted highest to lowest freq try: sorted_uniques = mysort(self.uniques[idx_key], self.names[idx_key]) for item in sorted_uniques: read_id = item[0] count = item[1] seq = item[2] sfastaRead = read_id + ";size=" + str(count) abundfa = sfasta(sfastaRead, seq) abundfa.write(abundFileName, 'a') except: print("**********fail abund **************") success_code = ('FAIL', 'abund', idx_key) # Write uniques.fa file #print('UNIQUES',self.uniques) try: for seq in self.uniques[idx_key]: read_id = self.uniques[idx_key][seq] print(uniquesFileName, read_id, seq) uniquefa = sfasta(read_id, seq) uniquefa.write(uniquesFileName, 'a') logger.debug("\nwrote uniques file " + uniquesFileName) except: success_code = ('FAIL', 'unique', idx_key) # Write names file try: for id in self.names[idx_key]: others = ','.join(self.names[idx_key][id]) f_names.write(id + "\t" + others + "\n") f_names.close() logger.debug("wrote names file " + namesFileName) except: success_code = ('FAIL', 'names', idx_key) # Write deleted.txt file if idx_key in self.deleted_ids and self.deleted_ids[idx_key]: f_del = open(delFileName, "w") reason_counts = {} for id in self.deleted_ids[idx_key]: reason = self.deleted_ids[idx_key][id] f_del.write(id + "\t" + reason + "\n") current_count = reason_counts.get(reason, 0) reason_counts[reason] = current_count + 1 # now write out some stats # f_del.write("\nTotal Passed Reads in this lane/key: " + str(len(self.names[idx_key])) + "\n") # if(len(self.names[idx_key]) > 0): # for key,value in reason_counts.items(): # f_del.write(" " + key + ": " + str(value) + " " + str(float(value*100.0)/float(len(self.names[idx_key]))) + "% of total \n") # else: # pass f_del.close() logger.debug("wrote deleted file: " + delFileName) # print(out readids that failed the key test: one file only) if 'nokey' in self.deleted_ids and self.deleted_ids['nokey']: nokeyFileName = os.path.join(self.trimming_dir, 'nokey.deleted.txt') f_del = open(nokeyFileName, "w") for id in self.deleted_ids['nokey']: f_del.write(id + "\tnokey\n") f_del.close() if True: print print('Output Directory:', './' + self.outdir) print(self.number_of_raw_sequences, "raw sequences read") pct = '%.1f' % ((float(self.number_of_good_sequences) / self.number_of_raw_sequences) * 100) print(self.number_of_good_sequences, "sequences passed", pct + '%') print("Unique Counts:") count_uniques = 0 good_idx_keys = [] for idx_key in self.runobj.run_keys: count = len(self.uniques[idx_key]) if count > 0: good_idx_keys.append(idx_key) count_uniques = count_uniques + count print(" ", idx_key, self.dna_regions[idx_key], count) print(" Total Uniques:", count_uniques) ##### # # Write to stats file for this run # self.stats_fp.write("Run_code: " + self.run + "\n") self.stats_fp.write( "========================================================\n") self.stats_fp.write( "Deleted Counts (before chimera check if performed):\n") self.stats_fp.write( " deleted_count_for_nokey:......................" + str(self.deleted_count_for_nokey) + "\n") self.stats_fp.write( " deleted_count_for_proximal:..................." + str(self.deleted_count_for_proximal) + "\n") self.stats_fp.write( " deleted_count_for_distal:....................." + str(self.deleted_count_for_distal) + "\n") self.stats_fp.write( " deleted_count_for_N:.........................." + str(self.deleted_count_for_n) + "\n") self.stats_fp.write( " deleted_count_for_quality:...................." + str(self.deleted_count_for_quality) + "\n") self.stats_fp.write( " deleted_count_for_no_insert:.................." + str(self.deleted_count_for_no_insert) + "\n") self.stats_fp.write(" deleted_count_for_minimum_length(" + str(self.runobj.minimumLength) + "bp):......." + str(self.deleted_count_for_minimum_length) + "\n") self.stats_fp.write(" deleted_count_for_maximum_length(" + str(self.runobj.maximumLength) + "bp):........." + str(self.deleted_count_for_maximum_length) + "\n") self.stats_fp.write( " deleted_count_for_unknown_lane_runkey:........" + str(self.deleted_count_for_unknown_lane_runkey) + "\n") self.stats_fp.write("Total Raw Sequences:...." + str(self.number_of_raw_sequences) + "\n") self.stats_fp.write("Total Good Sequences:..." + str(self.number_of_good_sequences) + "\n") self.stats_fp.write("Total Deleted:.........." + str(self.number_of_raw_sequences - self.number_of_good_sequences) + "\n") self.stats_fp.write("Total Uniques:.........." + str(count_uniques) + "\n") self.stats_fp.write("\nUnique Counts per Dataset:\n") #stats_fp.write("Run: "+self.run) count_uniques = 0 for idx_key in self.runobj.run_keys: count = len(self.uniques[idx_key]) count_uniques = count_uniques + count self.stats_fp.write(" " + str(count) + "\t" + idx_key + "\t" + self.runobj.samples[idx_key].dataset + "\n") self.stats_fp.write( "\n========================================================\n") self.stats_fp.close() success_code = '' if not success_code: success_code = ('SUCCESS ' + str(good_idx_keys)) return success_code