Пример #1
0
 def __init__(self, pdb_code, sequence_annotations, SurfRes=False,
              pocket=False, lpocket=False):
     self.filename = pdb_code
     self.sequence_annotations = sequence_annotations
     if SurfRes:
         file_handlers = FileHandlers()
         file_paths = file_handlers.search_directory()
         txt_files = file_handlers.find_files(file_paths, 'txt')
         for txt_file in txt_files:
             if (self.filename + '_SurfRes.txt') == \
                     file_handlers.get_file_name(txt_file):
                 self.surfres_file = txt_file
             else:
                 self.surfres_file = ''
     if pocket:
         file_handlers = FileHandlers()
         file_paths = file_handlers.search_directory()
         txt_files = file_handlers.find_files(file_paths, 'txt')
         for txt_file in txt_files:
             if (self.filename + '_pocketres.txt') == \
                     file_handlers.get_file_name(txt_file):
                 self.pocketres_file = txt_file
             else:
                 self.pocketres_file = ''
     if lpocket:
         file_handlers = FileHandlers()
         file_paths = file_handlers.search_directory()
         txt_files = file_handlers.find_files(file_paths, 'txt')
         for txt_file in txt_files:
             if (self.filename + '_lpocket.txt') == \
                     file_handlers.get_file_name(txt_file):
                 self.lpocket_file = txt_file
             else:
                 self.lpocket_file = ''
Пример #2
0
 def _get_outfile(self):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     out_files = file_handlers.find_files(file_paths, 'out')
     if (self.pdb_code != '' and self.psiblast != ''):
         for out_file in out_files:
             if (self.pdb_code + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.ddG_results_filepath = out_file
             elif (self.psiblast + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.llikelihood_filepath = out_file
     elif (self.pdb_code != '' and self.psiblast == ''):
         for out_file in out_files:
             if (self.pdb_code + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.ddG_results_filepath = out_file
                 print "Fetching data from %s ....." % \
                     file_handlers.get_file_name(out_file)
     elif (self.pdb_code == '' and self.psiblast != ''):
         for out_file in out_files:
             if (self.psiblast + '_mutants') == \
                     file_handlers.get_file_name(out_file).split('.')[0]:
                 self.llikelihood_filepath = out_file
     else:
         print "You have not specified any results data to parse."
         exit(1)
Пример #3
0
	def _get_file_path(self):
		os.chdir("./database/pdbs/pdb")
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
				self.file_path = pdb_file
				self.out_file = file_handlers.get_file_name(pdb_file).split('.')[0] + '_pops.out'
				self.out_file_path = self.dir_path + '/' + self.out_file
Пример #4
0
	def _get_data(self, file_tag):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		txt_files = file_handlers.find_files(file_paths, 'txt')
		for txt_file in txt_files:
			if self.filename == file_handlers.get_file_name(txt_file).split('_')[0]:
				if (file_tag.split('_')[1] + '.txt') == file_handlers.get_file_name(txt_file).split('_')[1]:
					TXT = open(txt_file)
					self.data = TXT.readlines()
					TXT.close()
Пример #5
0
 def _get_data(self, file_tag):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     txt_files = file_handlers.find_files(file_paths, 'txt')
     for txt_file in txt_files:
         if self.filename == file_handlers.get_file_name(txt_file).split(
                 '_')[0]:
             if (file_tag.split('_')[1] + '.txt'
                 ) == file_handlers.get_file_name(txt_file).split('_')[1]:
                 TXT = open(txt_file)
                 self.data = TXT.readlines()
                 TXT.close()
Пример #6
0
 def _get_file_path(self):
     os.chdir("./database/pdbs/pdb")
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     pdb_files = file_handlers.find_files(file_paths, 'pdb')
     for pdb_file in pdb_files:
         if self.filename == file_handlers.get_file_name(pdb_file).split(
                 '.')[0]:
             self.file_path = pdb_file
             self.out_file = file_handlers.get_file_name(pdb_file).split(
                 '.')[0] + '_pops.out'
             self.out_file_path = self.dir_path + '/' + self.out_file
Пример #7
0
 def _get_filename(self):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     pdb_files = file_handlers.find_files(file_paths, "pdb")
     if self.tag == "":
         for pdb_file in pdb_files:
             if self.filename == file_handlers.get_file_name(pdb_file).split(".")[0]:
                 return file_handlers.get_file_name(pdb_file).split(".")[0]
     else:
         for pdb_file in pdb_files:
             if (self.filename + self.tag) == file_handlers.get_file_name(pdb_file).split(".")[0]:
                 return file_handlers.get_file_name(pdb_file).split(".")[0]
Пример #8
0
	def _get_file_path(self, ligand=False, pdb=False):
		#os.chdir("./database/pdbs/pdb")
		self.file_path = ''
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		if ligand == True:
			for pdb_file in pdb_files:
				if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
					self.file_path = pdb_file
		elif pdb == True:
			for pdb_file in pdb_files:
				if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
					self.file_path = pdb_file
Пример #9
0
	def _get_filepath(self, data_file=False, pdb_file=False):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		if data_file == True:
			files = file_handlers.find_files(file_paths, 'txt')
			for path in files:
				if (self.filename + '_mutant_list') == file_handlers.get_file_name(path).split('.')[0]:
					return path
		elif pdb_file == True:
			files = file_handlers.find_files(file_paths, 'pdb')
			for path in files:
				if (self.filename + '_0001') == file_handlers.get_file_name(path).split('.')[0]:
					return path
		else:
			print "Specify file type"
class PickleFasta:
	def __init__(self):
		self.file_handlers = FileHandlers()

	def _get_fasta_files(self):
		file_paths = self.file_handlers.search_directory()
		fasta_files = self.file_handlers.find_files(file_paths, 'faa')
		print "There are %d .faa files in this directory" % len(fasta_files)
		return fasta_files

	def pickle_organism_fasta(self):
		fasta_files = self._get_fasta_files()
		fasta_dictionary = {}
		for fasta_file in fasta_files:
			file_name = self.file_handlers.get_file_name(fasta_file)
			name_list = file_name.split('.')
			Data = open(fasta_file)
			D = Data.readlines()
			Data.close()
			for d in D:
				if d.startswith('>'):
					d_list = d.split(' ')
					if name_list[0] in fasta_dictionary:
						fasta_dictionary[name_list[0]].append(d_list[0].lstrip('>'))
					else:
						fasta_dictionary[name_list[0]] = [d_list[0].lstrip('>')]
				else:
					pass
		return fasta_dictionary
def main():
	## THIS WORKED, DON'T ERASE
	## Save organism data to pickled dictionary
	#pickle_fasta = PickleFasta()
	#fasta_dictionary = pickle_fasta.pickle_organism_fasta()
	#print "There are %d entries in the fasta_dictionary" % len(fasta_dictionary)
	#pickle.dump(fasta_dictionary, open('organism_dictionary.pkl', 'wb'))
	
	file_handlers = FileHandlers()
	dm_files = get_dm_files()

	# Load the dictionary back from the pickle file
	print "Loading fasta_dictionary..."
	open_fasta = open('organism_dictionary.pkl', 'rb')
	fasta_dictionary = pickle.load(open_fasta)
	open_fasta.close()
	print "Length of fasta dictionary: ", len(fasta_dictionary)

	## THIS WORKED, DON'T ERASE
	## Build mapping dictionary and pickle
	dm_processing = dmDictionary()
	dm_processing.init_mapping_dictionary(fasta_dictionary)
	for path in dm_files:
		file_name = file_handlers.get_file_name(path)
		print "Opening %s..." % file_name
		dm_dictionary = dm_processing.individual_dm_dictionary(path, file_name)
		print "Length of dm_dictionary for %s is %d" % (file_name, len(dm_dictionary))
		mapping_dictionary = dm_processing.build_mapping_dictionary(fasta_dictionary, dm_dictionary)
	
	open_mapping = open('mapping_dictionary.pkl', 'wb')
	pickle.dump(mapping_dictionary, open_mapping)
	open_mapping.close()
	print mapping_dictionary
Пример #12
0
	def _get_pdb_file_path(self):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
				self.file_path = pdb_file
def main():
	file_handlers = FileHandlers()
	file_paths = file_handlers.search_directory()
	fasta_files = file_handlers.find_files(file_paths, 'fasta')
	for path in fasta_files:
		file_name = file_handlers.get_file_name(path)
		get_dm_raxml(path, file_name, 4)
Пример #14
0
def build_SASA_dict(out_files):
	SASA_dict = {}
	for path in out_files:
		file_handlers = FileHandlers()
		file_name = file_handlers.get_file_name(path)
		SASA_dict[file_name] = {}
		for line in open(path):
			file_handlers2 = FileHandlers()
			fields = line.split('\t')
			cleaned = file_handlers2.clean(fields)
			if len(cleaned) == 9: #and int(cleaned[2]) >= 1:
				(position, 
				aa, 
				tot_SA, 
				SASA, 
				frac_SASA, 
				phob, 
				phil) = (cleaned[2],
						cleaned[0],
						cleaned[8],
						cleaned[5],
						cleaned[6],
						cleaned[3],
						cleaned[4])
				SASA_dict[file_name][position] = [aa, 
													tot_SA, 
													SASA, 
													frac_SASA, 
													phob, 
													phil]
	return SASA_dict
def main():
    file_handlers = FileHandlers()
    file_paths = file_handlers.search_directory()
    pep_files = file_handlers.find_files(file_paths, "pep")
    for pep_file in pep_files:
        file_name = file_handlers.get_file_name(pep_file)
        run_muscle(pep_file, file_name)
Пример #16
0
	def _get_downloaded_file_path(self, pdb_code):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		ent_files = file_handlers.find_files(file_paths, 'ent')
		for ent_file in ent_files:
			if pdb_code == file_handlers.get_file_name(ent_file).split('.')[0].lstrip('pdb').upper():
				return ent_file
Пример #17
0
 def _get_downloaded_file_path(self, pdb_code):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     ent_files = file_handlers.find_files(file_paths, 'ent')
     for ent_file in ent_files:
         if pdb_code == file_handlers.get_file_name(ent_file).split(
                 '.')[0].lstrip('pdb').upper():
             return ent_file
Пример #18
0
	def _open_file(self):
		#os.chdir("../src/database/pdbs")
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
				Data = open(pdb_file)
				self.data = Data.readlines()
				Data.close
Пример #19
0
	def _get_pdb(self):
		#os.chdir("./database/pdbs/pdb")
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
				PDB = open(pdb_file)
				self.pdb = PDB.readlines()
				PDB.close()
Пример #20
0
 def _get_data(self, filename):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     txt_files = file_handlers.find_files(file_paths, 'txt')
     for txt_file in txt_files:
         if filename == file_handlers.get_file_name(txt_file):
             TXT = open(txt_file)
             data = TXT.readlines()
             TXT.close()
     return data
Пример #21
0
	def _get_data(self, filename):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		txt_files = file_handlers.find_files(file_paths, 'txt')
		for txt_file in txt_files:
			if filename == file_handlers.get_file_name(txt_file):
				TXT = open(txt_file)
				data = TXT.readlines()
				TXT.close()
		return data
Пример #22
0
 def _get_gb_record(self):
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     gb_files = file_handlers.find_files(file_paths, 'gb')
     print gb_files
     print self.genbank_id
     for gb_file in gb_files:
         if self.genbank_id == file_handlers.get_file_name(gb_file).split(
                 '.')[0]:
             self.gb_file_path = gb_file
             print self.gb_file_path
Пример #23
0
 def _get_pdb(self):
     #os.chdir("./database/pdbs/pdb")
     file_handlers = FileHandlers()
     file_paths = file_handlers.search_directory()
     pdb_files = file_handlers.find_files(file_paths, 'pdb')
     for pdb_file in pdb_files:
         if self.filename == file_handlers.get_file_name(pdb_file).split(
                 '.')[0]:
             PDB = open(pdb_file)
             self.pdb = PDB.readlines()
             PDB.close()
Пример #24
0
	def _get_pdb(self, rosetta_min=False, refined_pocket=False):
		file_handlers = FileHandlers()
		file_paths = file_handlers.search_directory()
		pdb_files = file_handlers.find_files(file_paths, 'pdb')
		for pdb_file in pdb_files:
			if rosetta_min == True and refined_pocket == True:
				print "Invalid input"
			elif rosetta_min == True:
				if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]:
					print "Found ", (self.filename + '_0001.pdb')
					filepath = pdb_file
			elif refined_pocket == True:
				if ('pocket0') == file_handlers.get_file_name(pdb_file).split('.')[0]:
					print "Found pocket0.pdb"
					filepath = pdb_file
			else:
				if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]:
					print "Found ", (self.filename + '.pdb')
					filepath = pdb_file
		return filepath
def find_indices(path):
	file_handlers = FileHandlers()
	#for path in fasta_files:
	indices = []
	open_file = open(path, 'rU')
	file_list = open_file.readlines()
	for line in file_list:
		if '>' in line:
			indices.append(file_list.index(line))
	interval = indices[1] - indices[0]
	file_name = file_handlers.get_file_name(path)
	return file_name, file_list, interval
def build_distance_dict(dm_files, inverse_mapping_dict, organism_pairs):
	file_handlers = FileHandlers()
	for dm_file in dm_files:
		file_name = file_handlers.get_file_name(dm_file)
		print "Opening %s...." % file_name
		data = open(dm_file)
		D = data.readlines()
		data.close()
		for d in D:
			data = d.strip().split(' ')
			enz1 = data[0]
			enz2 = data[1]
			distance = data[-1]
			for key in organism_pairs:
				if enz1 in inverse_mapping_dict and enz2 in inverse_mapping_dict:
					if inverse_mapping_dict[enz1] in key and inverse_mapping_dict[enz2] in key:
						#print inverse_mapping_dict[enz1], inverse_mapping_dict[enz2], key
						organism_pairs[key].append(distance)
						#print organism_pairs[key]
						#print "Length of distance list is %d" % len(organism_pairs[key])
				else:
					print "Could not find %s and %s in mapping_dict" % (enz1, enz2)
		print "Finished parsing %s...." % file_name
	return organism_pairs
Пример #27
0
import subprocess
from util import FileHandlers
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio import AlignIO

file_handlers = FileHandlers()
#file_paths = file_handlers.search_directory()
#fasta_files = file_handlers.find_files(file_paths, 'faa')

#for path in fasta_files:
#	cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path]
#	subprocess.call(cmd, shell=True)

file_paths = file_paths = file_handlers.search_directory()
pep_files = file_handlers.find_files(file_paths, 'pep')

for path in pep_files:
    file_name = file_handlers.get_file_name(path)
    name_list = file_name.split('.')
    out_file = ''.join([name_list[0] + '_out.' + name_list[1]])
    cmd = ['muscle -in ' + path + ' -out ' + out_file]
    subprocess.call(cmd, shell=True)

#aln = AlignIO.read('path/to/alignnment/file', 'format (i.e. phylip)')
#calculator = DistanceCalculator('identity') # identity is the name of the model(scoring matrix) to calculate the distance. The identity model is the default one and can be used both for DNA and protein sequence.
#dm = calculator.get_distance(aln)
# 	sequences = open_file.readlines()
# 	i = 0
# 	while i < len(sequences):
# 		if sequences[i] in temp_dict:
# 			i += 14
# 		else:
# 			temp_dict[sequences[i]] = sequences[i + 1 : i + 13]
# 			new_file.write(str(i) + "\n")
# 			for item in temp_dict[sequences[i]]:
# 				new_file.write(item)
# 			i += 14


fasta_files = file_handlers.find_files(file_paths, "fasta")
for path in fasta_files:
    file_name = file_handlers.get_file_name(path)
    print file_name
    name_list = file_name.split(".")
    # derep_out_file = ''.join(name_list[0] + '_uniques.fasta')
    dm_out_file = "".join(name_list[0] + "_dm.txt")
    # cmd = ['usearch -derep_fulllength ' + path + ' -fastaout ' + derep_out_file]
    # subprocess.call(cmd, shell=True)

    new_file = open("/Users/andrea/repositories/AMPHORA2/muscle_alignments/" + dm_out_file, "w")
    aln = AlignIO.read(path, "fasta")
    calculator = DistanceCalculator(
        "identity"
    )  # identity is the name of the model(scoring matrix) to calculate the distance. The identity model is the default one and can be used both for DNA and protein sequence.
    dm = calculator.get_distance(aln)
    new_file.write(dm)
    new_file.close()