def test_writeFASTA(self):
     read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1']
     read_fasta_data_seqids = bioio.splitFASTA(read_fasta_data)['output_seq_ids']
     read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs']
     bioio.writeFASTA('rewritten_sample1.fasta',read_fasta_data_seqids,read_fasta_data_seqs)
     self.assertEqual(bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'],read_fasta_data)
     os.remove('rewritten_sample1.fasta')
Exemplo n.º 2
0
 def test_writeFASTA(self):
     read_fasta_data = bioio.readFASTA(['sample1.fasta'])['sample1']
     read_fasta_data_seqids = bioio.splitFASTA(
         read_fasta_data)['output_seq_ids']
     read_fasta_data_seqs = bioio.splitFASTA(read_fasta_data)['output_seqs']
     bioio.writeFASTA('rewritten_sample1.fasta', read_fasta_data_seqids,
                      read_fasta_data_seqs)
     self.assertEqual(
         bioio.readFASTA(['rewritten_sample1.fasta'])['rewritten_sample1'],
         read_fasta_data)
     os.remove('rewritten_sample1.fasta')
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extension and read file
read_csv = bioio.readCSV([sys.argv[-1]])
input_csv_name = sys.argv[-1][:-4]
input_csv_data = read_csv[input_csv_name]

# find longest sequences and get their corresponding ids
output_csv_data = biomath.findLongestSeq(input_csv_data)
output_seq_ids = bioio.splitCSV(output_csv_data)['output_seq_ids']
output_seqs = bioio.splitCSV(output_csv_data)['output_seqs']
output_seq_ids_txt = bioio.addGreaterThans(output_seq_ids)

# define names of the resulting files
output_csv_name = input_csv_name+"_trimmed.csv"
output_txt_name = input_csv_name+"_names_only.txt"
output_fasta_name = input_csv_name+".fasta"

# write the resulting data to files
bioio.writeCSV(output_csv_name,output_csv_data)
bioio.writeTXT(output_txt_name,output_seq_ids_txt)
bioio.writeFASTA(output_fasta_name,output_seq_ids,output_seqs)
Exemplo n.º 4
0
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extensions,'>' and read files
read_fasta = bioio.readFASTA([sys.argv[-2]])
input_fasta_name = sys.argv[-2][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_splitdata = bioio.splitFASTA(input_fasta_data)
input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids']
input_fasta_seqs = input_fasta_splitdata['output_seqs']
read_txt = bioio.readTXT([sys.argv[-1]])
input_txt_name = sys.argv[-1][:-4]
input_txt_data = read_txt[input_txt_name]

# find seqs from database using seqids list
output_fasta_data = biomath.reduceNames(input_txt_data,input_fasta_seq_ids,input_fasta_seqs)
output_seq_ids = output_fasta_data['output_seq_ids']
output_seqs = output_fasta_data['output_seqs']

# write the seqs to the file
output_fasta_name = input_fasta_name+"_concatenated.fasta"
bioio.writeFASTA(output_fasta_name,output_seq_ids,output_seqs)