Exemplo n.º 1
0
 def test_readTXT(self):
     self.assertDictEqual(bioio.readTXT(example_txt_files),
                          expected_txt_input)
     with self.assertRaises(SystemExit):
         bioio.readTXT(['sample1.txt', 'path/sample2.txt'])
     with self.assertRaises(SystemExit):
         bioio.readTXT(['sample1.csv', 'sample2.txt'])
Exemplo n.º 2
0
 def test_writeTXT(self):
     read_txt_data = bioio.readTXT(['sample1.txt'])['sample1']
     bioio.writeTXT('rewritten_sample1.txt', read_txt_data)
     self.assertEqual(
         bioio.readTXT(['rewritten_sample1.txt'])['rewritten_sample1'],
         read_txt_data)
     os.remove('rewritten_sample1.txt')
# -*- coding: utf-8 -*-
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio

# strip file extension, read file, reset '>'
read_txt = bioio.readTXT([sys.argv[-1]])
input_txt_name = sys.argv[-1][:-4]
input_txt_data = bioio.addGreaterThans(read_txt[input_txt_name])
output_txt_data = []

# break lines on first instance of a dash
for line in input_txt_data:
    first_dash_index = line.find('-')
    output_txt_data.append(line[:first_dash_index] + '\n' + line[first_dash_index:])

# write the seqs to the file
output_txt_name = input_txt_name + "_clean.txt"
bioio.writeTXT(output_txt_name, output_txt_data)
Exemplo n.º 4
0
# -*- coding: utf-8 -*-
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0], 'lib'))
sys.path.append(os.path.join(sys.path[0], 'test'))
import bioio

# strip file extension, read file, reset '>'
read_txt = bioio.readTXT([sys.argv[-1]])
input_txt_name = sys.argv[-1][:-4]
input_txt_data = bioio.addGreaterThans(read_txt[input_txt_name])
output_txt_data = []

# break lines on first instance of a dash
for line in input_txt_data:
    first_dash_index = line.find('-')
    output_txt_data.append(line[:first_dash_index] + '\n' +
                           line[first_dash_index:])

# write the seqs to the file
output_txt_name = input_txt_name + "_clean.txt"
bioio.writeTXT(output_txt_name, output_txt_data)
Exemplo n.º 5
0
import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extensions and read files
read_fasta = bioio.readFASTA(sys.argv[1])
input_fasta_name = sys.argv[1][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_splitdata = bioio.splitFASTA(input_fasta_data)
input_fasta_seq_ids = input_fasta_splitdata['output_seq_ids']
input_fasta_seqs = input_fasta_splitdata['output_seqs']
read_txt = bioio.readTXT(sys.argv[2:])
output_combined_data = []

# reformat and combine seqid lists
for filename,data in read_txt.iteritems():

	# split on greaterthans
	output_data = bioio.splitLinearSeqids(data)

	# add venom codes based on filename
	output_data = bioio.addVenomCodes(output_data,filename)

	# replace s??? codes with sample info code
	output_data = bioio.replaceSCodes(output_data)

	# write the 'fixed' version of each file
# -*- coding: utf-8 -*-
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
sys.path.append(os.path.join(sys.path[0],'test'))
import bioio
import biomath

# strip file extensions and read files
read_txt = bioio.readTXT(sys.argv[-1])
input_txt_name = sys.argv[-1][:-4]
input_txt_data = read_txt[input_txt_name]
read_fasta = bioio.readFASTA(sys.argv[-2])
input_fasta_name = sys.argv[-2][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids']

# compare input files to find missing lines
output_seq_ids = biomath.findMissingSeqs(input_txt_data,input_fasta_seq_ids)

# define names of the resulting files
output_txt_name = input_txt_name+"_missing.txt"

# write the missing lines to a file
bioio.writeTXT(output_txt_name,output_seq_ids)
Exemplo n.º 7
0
# -*- coding: utf-8 -*-
"""
@author: thatbudakguy
"""

import sys
import os
sys.path.append(os.path.join(sys.path[0], 'lib'))
sys.path.append(os.path.join(sys.path[0], 'test'))
import bioio
import biomath

# strip file extensions and read files
read_txt = bioio.readTXT(sys.argv[-1])
input_txt_name = sys.argv[-1][:-4]
input_txt_data = read_txt[input_txt_name]
read_fasta = bioio.readFASTA(sys.argv[-2])
input_fasta_name = sys.argv[-2][:-6]
input_fasta_data = read_fasta[input_fasta_name]
input_fasta_seq_ids = bioio.splitFASTA(input_fasta_data)['output_seq_ids']

# compare input files to find missing lines
output_seq_ids = biomath.findMissingSeqs(input_txt_data, input_fasta_seq_ids)

# define names of the resulting files
output_txt_name = input_txt_name + "_missing.txt"

# write the missing lines to a file
bioio.writeTXT(output_txt_name, output_seq_ids)
 def test_writeTXT(self):
     read_txt_data = bioio.readTXT(['sample1.txt'])['sample1']
     bioio.writeTXT('rewritten_sample1.txt',read_txt_data)
     self.assertEqual(bioio.readTXT(['rewritten_sample1.txt'])['rewritten_sample1'],read_txt_data)
     os.remove('rewritten_sample1.txt')
 def test_readTXT(self):
     self.assertDictEqual(bioio.readTXT(example_txt_files),expected_txt_input)
     with self.assertRaises(SystemExit):
         bioio.readTXT(['sample1.txt','path/sample2.txt'])
     with self.assertRaises(SystemExit):
         bioio.readTXT(['sample1.csv','sample2.txt'])