示例#1
0
    def test_prune(self):
        folder = "NGS"

        blast_data = []
        f = open("NGS/blast_data.csv", "r")
        tmp = f.readlines()
        f.close()
        for i in tmp:
            blast_data.append(i.strip())

        seq_record = SeqIO.parse("NGS/seq_record.fastq", "fastq")

        ion_id = "3856"
        min_aln_length = "40"

        result = NGS.prune(folder, blast_data, seq_record, ion_id,
                            min_aln_length)
        # It should drop on seq_record from the blast_data
        self.assertEqual(len(result), 998)
示例#2
0
def filter_reads(ion_chunk, blast_chunk, folder):
    from Bio import SeqIO;
    from pyphylogenomics import NGS
    '''
    \* *Internal function* \*

    Accepting alignment lengths higher than 40 bp
    longer than our primer lengths
    '''
    min_aln_length = 40;

    blast_file = open(blast_chunk, "r");
    tmp = blast_file.readlines();
    blast_file.close();

    blast_data = []
    for i in tmp:
        blast_data.append(i.strip())
        

    # iterate over ion torrent reads
    for seq_record in SeqIO.parse(ion_chunk, "fastq"):
        if len(blast_data) > 0:
            #print "\n\nNew record--------------------"
            #print "seq record id @%s" % seq_record.id
            # avoid processing seq_records that are not in blast file
            # first id in blast_data
            #print blast_data
            first_id_in_blast_data = blast_data[0].split(",")[0]
            #print "fist id in blast_data %s" % first_id_in_blast_data

            if int(seq_record.id) >= int(first_id_in_blast_data):
                #if str(seq_record.id) == ion_id and aln_length > min_aln_length:
                if str(seq_record.id) == first_id_in_blast_data:
                    #print "prune"
                    blast_data = NGS.prune(folder, blast_data, seq_record,
                                first_id_in_blast_data, min_aln_length)
                else:
                    break