Exemplo n.º 1
0
	def _get_pdb_file_path(self):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
				self.file_path = pdb_file
Exemplo n.º 2
0
	def _get_downloaded_file_path(self, pdb_code):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		ent_files = file_handlers.find_files(file_paths, 'ent')
		for ent_file in ent_files:
			if pdb_code == file_handlers.get_file_name(ent_file).split('.')[0].lstrip('pdb').upper():
				return ent_file
def main():
    file_handlers = FileHandlers()
    file_paths = file_handlers.search_directory()
    pep_files = file_handlers.find_files(file_paths, "pep")
    for pep_file in pep_files:
        file_name = file_handlers.get_file_name(pep_file)
        run_muscle(pep_file, file_name)
def main():
	file_handlers = FileHandlers()
	file_paths = file_handlers.search_directory()
	fasta_files = file_handlers.find_files(file_paths, 'fasta')
	for path in fasta_files:
		file_name = file_handlers.get_file_name(path)
		get_dm_raxml(path, file_name, 4)
Exemplo n.º 5
0
	def _build_SASA_dict(self):
		file_handlers = FileHandlers()
		self.SASA_dict[self.filename] = {}
		self._run_POPS()
		self._get_data()
		for line in self.data:
			fields = line.split('\t')
			cleaned = file_handlers.clean(fields)
			if len(cleaned) == 9: 
				(position, 
				aa, 
				tot_SA, 
				SASA, 
				frac_SASA, 
				phob, 
				phil) = (cleaned[2],
						cleaned[0],
						cleaned[8],
						cleaned[5],
						cleaned[6],
						cleaned[3],
						cleaned[4])
				self.SASA_dict[self.filename][position] = [aa, 
													tot_SA, 
													SASA, 
													frac_SASA, 
													phob, 
													phil]
Exemplo n.º 6
0
def build_SASA_dict(out_files):
	SASA_dict = {}
	for path in out_files:
		file_handlers = FileHandlers()
		file_name = file_handlers.get_file_name(path)
		SASA_dict[file_name] = {}
		for line in open(path):
			file_handlers2 = FileHandlers()
			fields = line.split('\t')
			cleaned = file_handlers2.clean(fields)
			if len(cleaned) == 9: #and int(cleaned[2]) >= 1:
				(position, 
				aa, 
				tot_SA, 
				SASA, 
				frac_SASA, 
				phob, 
				phil) = (cleaned[2],
						cleaned[0],
						cleaned[8],
						cleaned[5],
						cleaned[6],
						cleaned[3],
						cleaned[4])
				SASA_dict[file_name][position] = [aa, 
													tot_SA, 
													SASA, 
													frac_SASA, 
													phob, 
													phil]
	return SASA_dict
def main():
	file_handlers = FileHandlers()
	file_paths = file_handlers.search_directory()
	fasta_files = file_handlers.find_files(file_paths, 'fasta')
	for path in fasta_files:
		file_name, file_list, interval = find_indices(path)
		write_fasta_new_names(path, file_name, file_list, interval)
class PickleFasta:
	def __init__(self):
		self.file_handlers = FileHandlers()

	def _get_fasta_files(self):
		file_paths = self.file_handlers.search_directory()
		fasta_files = self.file_handlers.find_files(file_paths, 'faa')
		print "There are %d .faa files in this directory" % len(fasta_files)
		return fasta_files

	def pickle_organism_fasta(self):
		fasta_files = self._get_fasta_files()
		fasta_dictionary = {}
		for fasta_file in fasta_files:
			file_name = self.file_handlers.get_file_name(fasta_file)
			name_list = file_name.split('.')
			Data = open(fasta_file)
			D = Data.readlines()
			Data.close()
			for d in D:
				if d.startswith('>'):
					d_list = d.split(' ')
					if name_list[0] in fasta_dictionary:
						fasta_dictionary[name_list[0]].append(d_list[0].lstrip('>'))
					else:
						fasta_dictionary[name_list[0]] = [d_list[0].lstrip('>')]
				else:
					pass
		return fasta_dictionary
def main():
	## THIS WORKED, DON'T ERASE
	## Save organism data to pickled dictionary
	#pickle_fasta = PickleFasta()
	#fasta_dictionary = pickle_fasta.pickle_organism_fasta()
	#print "There are %d entries in the fasta_dictionary" % len(fasta_dictionary)
	#pickle.dump(fasta_dictionary, open('organism_dictionary.pkl', 'wb'))
	
	file_handlers = FileHandlers()
	dm_files = get_dm_files()

	# Load the dictionary back from the pickle file
	print "Loading fasta_dictionary..."
	open_fasta = open('organism_dictionary.pkl', 'rb')
	fasta_dictionary = pickle.load(open_fasta)
	open_fasta.close()
	print "Length of fasta dictionary: ", len(fasta_dictionary)

	## THIS WORKED, DON'T ERASE
	## Build mapping dictionary and pickle
	dm_processing = dmDictionary()
	dm_processing.init_mapping_dictionary(fasta_dictionary)
	for path in dm_files:
		file_name = file_handlers.get_file_name(path)
		print "Opening %s..." % file_name
		dm_dictionary = dm_processing.individual_dm_dictionary(path, file_name)
		print "Length of dm_dictionary for %s is %d" % (file_name, len(dm_dictionary))
		mapping_dictionary = dm_processing.build_mapping_dictionary(fasta_dictionary, dm_dictionary)
	
	open_mapping = open('mapping_dictionary.pkl', 'wb')
	pickle.dump(mapping_dictionary, open_mapping)
	open_mapping.close()
	print mapping_dictionary
Exemplo n.º 10
0
 def _get_downloaded_file_path(self, pdb_code):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     ent_files = file_handlers.find_files(file_paths, 'ent')
     for ent_file in ent_files:
         if pdb_code == file_handlers.get_file_name(ent_file).split(
                 '.')[0].lstrip('pdb').upper():
             return ent_file
Exemplo n.º 11
0
 def _build_data_dict(self, file_tag):
     self.data_dict = {}
     self._get_data(file_tag)
     file_handlers = FileHandlers()
     for line in self.data:
         fields = line.split('\t')
         cleaned = file_handlers.clean(fields)
         self.data_dict[cleaned[0]] = float(cleaned[1])
Exemplo n.º 12
0
	def _build_data_dict(self, file_tag):
		self.data_dict = {}
		self._get_data(file_tag)
		file_handlers = FileHandlers()
		for line in self.data:
			fields = line.split('\t')
			cleaned = file_handlers.clean(fields)
			self.data_dict[cleaned[0]] = float(cleaned[1])
Exemplo n.º 13
0
	def _get_output(self):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		out_files = file_handlers.find_files(file_paths, 'out')
		for out_file in out_files:
			TXT = open(out_file)
			data = TXT.readlines()
			TXT.close()
		os.remove(out_file)
		return data
Exemplo n.º 14
0
 def _get_file_path(self):
     os.chdir("./database/pdbs/pdb")
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     pdb_files = file_handlers.find_files(file_paths, 'pdb')
     for pdb_file in pdb_files:
         if self.filename == file_handlers.get_file_name(pdb_file).split(
                 '.')[0]:
             self.file_path = pdb_file
     os.chdir("../../../")
Exemplo n.º 15
0
	def _get_file_path(self):
		os.chdir("./database/pdbs/pdb")
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
				self.file_path = pdb_file
				self.out_file = file_handlers.get_file_name(pdb_file).split('.')[0] + '_pops.out'
				self.out_file_path = self.dir_path + '/' + self.out_file
Exemplo n.º 16
0
	def _open_file(self):
		#os.chdir("../src/database/pdbs")
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
				Data = open(pdb_file)
				self.data = Data.readlines()
				Data.close
Exemplo n.º 17
0
	def _get_pdb(self):
		#os.chdir("./database/pdbs/pdb")
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
				PDB = open(pdb_file)
				self.pdb = PDB.readlines()
				PDB.close()
Exemplo n.º 18
0
 def _get_data(self, filename):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     txt_files = file_handlers.find_files(file_paths, 'txt')
     for txt_file in txt_files:
         if filename == file_handlers.get_file_name(txt_file):
             TXT = open(txt_file)
             data = TXT.readlines()
             TXT.close()
     return data
Exemplo n.º 19
0
	def _get_data(self, file_tag):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		txt_files = file_handlers.find_files(file_paths, 'txt')
		for txt_file in txt_files:
			if self.filename == file_handlers.get_file_name(txt_file).split('_')[0]:
				if (file_tag.split('_')[1] + '.txt') == file_handlers.get_file_name(txt_file).split('_')[1]:
					TXT = open(txt_file)
					self.data = TXT.readlines()
					TXT.close()
Exemplo n.º 20
0
	def _get_data(self, filename):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		txt_files = file_handlers.find_files(file_paths, 'txt')
		for txt_file in txt_files:
			if filename == file_handlers.get_file_name(txt_file):
				TXT = open(txt_file)
				data = TXT.readlines()
				TXT.close()
		return data
Exemplo n.º 21
0
 def _get_pdb(self):
     #os.chdir("./database/pdbs/pdb")
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     pdb_files = file_handlers.find_files(file_paths, 'pdb')
     for pdb_file in pdb_files:
         if self.filename == file_handlers.get_file_name(pdb_file).split(
                 '.')[0]:
             PDB = open(pdb_file)
             self.pdb = PDB.readlines()
             PDB.close()
Exemplo n.º 22
0
 def _get_gb_record(self):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     gb_files = file_handlers.find_files(file_paths, 'gb')
     print gb_files
     print self.genbank_id
     for gb_file in gb_files:
         if self.genbank_id == file_handlers.get_file_name(gb_file).split(
                 '.')[0]:
             self.gb_file_path = gb_file
             print self.gb_file_path
Exemplo n.º 23
0
def find_indices(path):
	file_handlers = FileHandlers()
	#for path in fasta_files:
	indices = []
	open_file = open(path, 'rU')
	file_list = open_file.readlines()
	for line in file_list:
		if '>' in line:
			indices.append(file_list.index(line))
	interval = indices[1] - indices[0]
	file_name = file_handlers.get_file_name(path)
	return file_name, file_list, interval
Exemplo n.º 24
0
def BuildRulesDict():
	"""Construct a dictionary from the .rul file. Each key-value pair is 
	constructed from a single line of the .rul file The .rul file has the 
	following format:

	this	replace_this
	R	A	G
	Y	C	T
	M	A	C
	K	G	T
	S	C	G
	W	A	T
	H	A	C	T
	B	C	G	T
	V	A	C	G
	D	A	G	T
	N	A	C	G	T

	Parameters
	----------
	none

	Returns
	-------
	rules_dict: dict
		dictionary in which the key is a string resulting from joining the 
		nucleotides (A, G, C, T) in columns 2-5 of each line from the .rul
		file and the value corresponds to the string in the first column of
		each line of the .rul file

	Examples
	--------
	>>> rules_dict = BuildRulesDict()
	"""
	file_handlers = FileHandlers()
	rules_file = LoadFiles('rul')
	rules_dict = {}
	try:
		for line in open(rules_file[0]):
			fields = line.split("\t")
			cleaned = file_handlers.clean(fields)
			if ('this' and 'replace_this') in line:
				pass
			else:
				if ''.join((cleaned[1:])) not in rules_dict:
					rules_dict[''.join((cleaned[1:]))] = cleaned[0]
				else:
					pass
		return rules_dict
	except IOError:
		print("An error occurred while trying to load the rules file." +
		"Make sure the file is located in your current working directory.")			
Exemplo n.º 25
0
def BuildUsageDict():
	"""Build a codon usage dictionary based on the user selected codon usage
	file
		
	Useful for downstream calculations involving known codon usage frequencies
	in a given organism

	Parameters
	----------
	none

	Returns
	-------
	usage_dict: dict
		Dictionary of lists of dictionaries for codon usage. Dictionary has the
		following structure:
		{
			F : [{TTT: 0.58}, {TTC: 0.42}],
			L : [{TTA: 0.14}, {TTG: 0.13}, {CTT: 0.12}, {CTC: 0.1}, 
					{CTA: 0.04}, {CTG: 0.47}],
			I : [{ATT: 0.49}, {ATC: 0.39}, {ATA: 0.11}],
			...
			...
			...
			G : [{GGT: 0.35}, {GGC: 0.37}, {GGA: 0.13}, {GGG: 0.15}]
		}

	Examples
	--------
	>>> usage_dict = BuildUsageDict()
	"""
	file_handlers = FileHandlers()
	all_files = LoadFiles('txt')
	selection_int, file_path, file_name = GetDataFile(all_files)
	usage_dict = {}
	try:
		for line in open(file_path):
			fields = line.split("\t")
			cleaned = file_handlers.clean(fields)
			if ('Codon' and 'name' and 'prob') in line:
				pass
			else:
				if cleaned[1] in usage_dict:
					usage_dict[cleaned[1]].append({cleaned[0]: cleaned[2]})
				else:
					usage_dict[cleaned[1]] = [{cleaned[0]: cleaned[2]}]
		return usage_dict
	except IOError:
		print("An error occurred while trying to load the data file." +
		"Make sure the file is located in your current working directory.")
Exemplo n.º 26
0
 def _get_outfile(self):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     out_files = file_handlers.find_files(file_paths, 'out')
     if (self.pdb_code != '' and self.psiblast != ''):
         for out_file in out_files:
             if (self.pdb_code + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.ddG_results_filepath = out_file
             elif (self.psiblast + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.llikelihood_filepath = out_file
     elif (self.pdb_code != '' and self.psiblast == ''):
         for out_file in out_files:
             if (self.pdb_code + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.ddG_results_filepath = out_file
                 print "Fetching data from %s ....." % \
                     file_handlers.get_file_name(out_file)
     elif (self.pdb_code == '' and self.psiblast != ''):
         for out_file in out_files:
             if (self.psiblast + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.llikelihood_filepath = out_file
     else:
         print "You have not specified any results data to parse."
         exit(1)
Exemplo n.º 27
0
	def _get_file_path(self, ligand=False, pdb=False):
		#os.chdir("./database/pdbs/pdb")
		self.file_path = ''
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		if ligand == True:
			for pdb_file in pdb_files:
				if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
					self.file_path = pdb_file
		elif pdb == True:
			for pdb_file in pdb_files:
				if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
					self.file_path = pdb_file
Exemplo n.º 28
0
 def _build_SASA_dict(self):
     file_handlers = FileHandlers()
     self.SASA_dict[self.filename] = {}
     self._run_POPS()
     self._get_data()
     for line in self.data:
         fields = line.split('\t')
         cleaned = file_handlers.clean(fields)
         if len(cleaned) == 9:
             (position, aa, tot_SA, SASA, frac_SASA, phob,
              phil) = (cleaned[2], cleaned[0], cleaned[8], cleaned[5],
                       cleaned[6], cleaned[3], cleaned[4])
             self.SASA_dict[self.filename][position] = [
                 aa, tot_SA, SASA, frac_SASA, phob, phil
             ]
Exemplo n.º 29
0
	def _parse_ddG_data(self):
		file_handlers = FileHandlers()
		self._get_data()
		ddG_data_map = {}
		for i in range(len(self.ddG_data)):
			fields = self.ddG_data[i].split(' ')
			cleaned = file_handlers.clean(fields)
			while cleaned.count('') > 0:
				cleaned.remove('')
			if len(cleaned[1].split("-")) < 2:	## ignore first line
				pass
			else:
				chain, mutation = cleaned[2].split("-")
				wt_res, position, mut_res = mutation[0], mutation[1:-1], mutation[-1]
				ddG_data_map[(chain, wt_res, position, mut_res)] = cleaned[3]
		self.ddG_data_map = ddG_data_map
Exemplo n.º 30
0
 def _build_data_structure(self, lines):
     file_handlers = FileHandlers()
     feature_data_dict = {}
     for i in range(len(self.sequence_annotations)):
         feature_data_dict[self.sequence_annotations[i][1]] = \
             [[], self.sequence_annotations[i][3]]
         residues = []
         current_chain = self.sequence_annotations[i][0]
         for line in lines:
             fields = line.split('\t')
             cleaned = file_handlers.clean(fields)
             chain = cleaned[1]
             residue_number = cleaned[2]
             if chain == current_chain:
                 residues.append(residue_number)
         feature_data_dict[self.sequence_annotations[i][1]][0] = residues
     # {gene_name: [ [residue numbers of interest], sequence ]}
     return feature_data_dict
Exemplo n.º 31
0
 def _get_filename(self):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     pdb_files = file_handlers.find_files(file_paths, "pdb")
     if self.tag == "":
         for pdb_file in pdb_files:
             if self.filename == file_handlers.get_file_name(pdb_file).split(".")[0]:
                 return file_handlers.get_file_name(pdb_file).split(".")[0]
     else:
         for pdb_file in pdb_files:
             if (self.filename + self.tag) == file_handlers.get_file_name(pdb_file).split(".")[0]:
                 return file_handlers.get_file_name(pdb_file).split(".")[0]
Exemplo n.º 32
0
def GetUserSelection(sorted_dict):
	"""Prompt user for selection of amino acids to remove from list

	Parameters
	----------
	sorted_dict: dict
		Dictionary of lists of dictionaries for codon usage. For example, the
		output of BuildUsageDict() would work as input. In this case, any 
		dictionary that has single letter amino acid symbols as keys would
		work

	Returns
	-------
	aa_list: list
		List of amino acids that the user has entered. Amino acid symbols are
		converted to uppercase and all white space is removed.
	
	Examples
	--------
	>>> selection = GetUserSelection()
	"""	
	file_handlers = FileHandlers()
	while True:
		selection = raw_input("Choose amino acids to remove (multiple amino " +
							"acids are indicated as a comma-separated list: ")
		aa_list = file_handlers.clean(selection.split(','))
		try:
			for i in range(len(aa_list)):
				if aa_list[i].upper() in sorted_dict:
					aa_list[i] = aa_list[i].upper()
				else:
					raise ValueError()
			return aa_list
		except ValueError:
				print("Invalid entry. You must enter a letter or series of " +
				"comma-separated letters corresponding to the amino acids " + 
				"you wish to omit.")
def build_distance_dict(dm_files, inverse_mapping_dict, organism_pairs):
	file_handlers = FileHandlers()
	for dm_file in dm_files:
		file_name = file_handlers.get_file_name(dm_file)
		print "Opening %s...." % file_name
		data = open(dm_file)
		D = data.readlines()
		data.close()
		for d in D:
			data = d.strip().split(' ')
			enz1 = data[0]
			enz2 = data[1]
			distance = data[-1]
			for key in organism_pairs:
				if enz1 in inverse_mapping_dict and enz2 in inverse_mapping_dict:
					if inverse_mapping_dict[enz1] in key and inverse_mapping_dict[enz2] in key:
						#print inverse_mapping_dict[enz1], inverse_mapping_dict[enz2], key
						organism_pairs[key].append(distance)
						#print organism_pairs[key]
						#print "Length of distance list is %d" % len(organism_pairs[key])
				else:
					print "Could not find %s and %s in mapping_dict" % (enz1, enz2)
		print "Finished parsing %s...." % file_name
	return organism_pairs
Exemplo n.º 34
0
def LoadFiles(extention):
	"""Get the paths to the .txt files listed in the current directory
		
	Useful when you have a set of custom .txt files that need to be loaded and
	parsed prior to some calculation or data analysis

	Parameters
	----------
	none

	Returns
	-------
	list
		List of strings corresponding to the path/to/file.txt for each txt
		file found in the current directory  

	Examples
	--------
	>>> all_files = LoadFiles()
	"""
	file_handlers = FileHandlers()
	file_paths = file_handlers.search_directory()
	files = file_handlers.find_files(file_paths, extention)
	return files
Exemplo n.º 35
0
 def _get_data(self, file_tag):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     txt_files = file_handlers.find_files(file_paths, 'txt')
     for txt_file in txt_files:
         if self.filename == file_handlers.get_file_name(txt_file).split(
                 '_')[0]:
             if (file_tag.split('_')[1] + '.txt'
                 ) == file_handlers.get_file_name(txt_file).split('_')[1]:
                 TXT = open(txt_file)
                 self.data = TXT.readlines()
                 TXT.close()
Exemplo n.º 36
0
	def _get_filepath(self, data_file=False, pdb_file=False):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		if data_file == True:
			files = file_handlers.find_files(file_paths, 'txt')
			for path in files:
				if (self.filename + '_mutant_list') == file_handlers.get_file_name(path).split('.')[0]:
					return path
		elif pdb_file == True:
			files = file_handlers.find_files(file_paths, 'pdb')
			for path in files:
				if (self.filename + '_0001') == file_handlers.get_file_name(path).split('.')[0]:
					return path
		else:
			print "Specify file type"
Exemplo n.º 37
0
	def _get_pdb(self, rosetta_min=False, refined_pocket=False):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if rosetta_min == True and refined_pocket == True:
				print "Invalid input"
			elif rosetta_min == True:
				if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
					print "Found ", (self.filename + '_0001.pdb')
					filepath = pdb_file
			elif refined_pocket == True:
				if ('pocket0') == file_handlers.get_file_name(pdb_file).split('.')[0]:
					print "Found pocket0.pdb"
					filepath = pdb_file
			else:
				if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
					print "Found ", (self.filename + '.pdb')
					filepath = pdb_file
		return filepath
Exemplo n.º 38
0
import subprocess
from util import FileHandlers
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio import AlignIO

file_handlers = FileHandlers()
#file_paths = file_handlers.search_directory()
#fasta_files = file_handlers.find_files(file_paths, 'faa')

#for path in fasta_files:
#	cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path]
#	subprocess.call(cmd, shell=True)

file_paths = file_paths = file_handlers.search_directory()
pep_files = file_handlers.find_files(file_paths, 'pep')

for path in pep_files:
    file_name = file_handlers.get_file_name(path)
    name_list = file_name.split('.')
    out_file = ''.join([name_list[0] + '_out.' + name_list[1]])
    cmd = ['muscle -in ' + path + ' -out ' + out_file]
    subprocess.call(cmd, shell=True)

#aln = AlignIO.read('path/to/alignnment/file', 'format (i.e. phylip)')
#calculator = DistanceCalculator('identity') # identity is the name of the model(scoring matrix) to calculate the distance. The identity model is the default one and can be used both for DNA and protein sequence.
#dm = calculator.get_distance(aln)
Exemplo n.º 39
0
import subprocess
from util import FileHandlers
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio import AlignIO

file_handlers = FileHandlers()
file_paths = file_handlers.search_directory()
# fasta_files = file_handlers.find_files(file_paths, 'faa')

# for path in fasta_files:
# 	cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path]
# 	subprocess.call(cmd, shell=True)


# file_paths = file_handlers.search_directory()
# pep_files = file_handlers.find_files(file_paths, 'pep')
#
# for path in pep_files:
# 	file_name = file_handlers.get_file_name(path)
# 	name_list = file_name.split('.')
# 	out_file = ''.join([name_list[0] + '_out.' + name_list[1]])
# 	cmd = ['muscle -in ' + path + ' -out ' + out_file]
# 	subprocess.call(cmd, shell=True)


def run_muscle(path):
    file_name = file_handlers.get_file_name(path)
    name_list = file_name.split(".")
    out_file = "".join(name_list[0] + "_out." + name_list[1])
    cmd = ["muscle -in " + path + " -out " + out_file]
    subprocess.call(cmd, shell=True)
Exemplo n.º 40
0
 def _mkdir(self):
     file_handlers = FileHandlers()
     file_handlers.make_results_folder(self.dir_path.split('/')[-1])
Exemplo n.º 41
0
import os
import subprocess
from util import FileHandlers

file_handlers = FileHandlers()
file_paths = file_handlers.search_directory()
fasta_files = file_handlers.find_files(file_paths, 'faa')
print len(fasta_files)

#path = '/Users/andrea/repositories/AMPHORA2/all.faa/Aggregatibacter_actinomycetemcomitans_D11S_1_uid41333/NC_013416.faa'
#cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path]
#print cmd
#subprocess.call(cmd, shell=True)

for path in fasta_files:
    cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path]
    subprocess.call(cmd, shell=True)

#echo = "echo"
#for path in fasta_files:
#	os.system(echo + " perl ./Scripts/MarkerScanner.pl -Bacteria " + path)
#	os.system(echo + "\n")