def test_prune(self): folder = "NGS" blast_data = [] f = open("NGS/blast_data.csv", "r") tmp = f.readlines() f.close() for i in tmp: blast_data.append(i.strip()) seq_record = SeqIO.parse("NGS/seq_record.fastq", "fastq") ion_id = "3856" min_aln_length = "40" result = NGS.prune(folder, blast_data, seq_record, ion_id, min_aln_length) # It should drop on seq_record from the blast_data self.assertEqual(len(result), 998)
def filter_reads(ion_chunk, blast_chunk, folder): from Bio import SeqIO; from pyphylogenomics import NGS ''' \* *Internal function* \* Accepting alignment lengths higher than 40 bp longer than our primer lengths ''' min_aln_length = 40; blast_file = open(blast_chunk, "r"); tmp = blast_file.readlines(); blast_file.close(); blast_data = [] for i in tmp: blast_data.append(i.strip()) # iterate over ion torrent reads for seq_record in SeqIO.parse(ion_chunk, "fastq"): if len(blast_data) > 0: #print "\n\nNew record--------------------" #print "seq record id @%s" % seq_record.id # avoid processing seq_records that are not in blast file # first id in blast_data #print blast_data first_id_in_blast_data = blast_data[0].split(",")[0] #print "fist id in blast_data %s" % first_id_in_blast_data if int(seq_record.id) >= int(first_id_in_blast_data): #if str(seq_record.id) == ion_id and aln_length > min_aln_length: if str(seq_record.id) == first_id_in_blast_data: #print "prune" blast_data = NGS.prune(folder, blast_data, seq_record, first_id_in_blast_data, min_aln_length) else: break