def __init__(self, pdb_code, sequence_annotations, SurfRes=False, pocket=False, lpocket=False): self.filename = pdb_code self.sequence_annotations = sequence_annotations if SurfRes: file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if (self.filename + '_SurfRes.txt') == \ file_handlers.get_file_name(txt_file): self.surfres_file = txt_file else: self.surfres_file = '' if pocket: file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if (self.filename + '_pocketres.txt') == \ file_handlers.get_file_name(txt_file): self.pocketres_file = txt_file else: self.pocketres_file = '' if lpocket: file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if (self.filename + '_lpocket.txt') == \ file_handlers.get_file_name(txt_file): self.lpocket_file = txt_file else: self.lpocket_file = ''
def _get_outfile(self): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() out_files = file_handlers.find_files(file_paths, 'out') if (self.pdb_code != '' and self.psiblast != ''): for out_file in out_files: if (self.pdb_code + '_mutants') == \ file_handlers.get_file_name(out_file).split('.')[0]: self.ddG_results_filepath = out_file elif (self.psiblast + '_mutants') == \ file_handlers.get_file_name(out_file).split('.')[0]: self.llikelihood_filepath = out_file elif (self.pdb_code != '' and self.psiblast == ''): for out_file in out_files: if (self.pdb_code + '_mutants') == \ file_handlers.get_file_name(out_file).split('.')[0]: self.ddG_results_filepath = out_file print "Fetching data from %s ....." % \ file_handlers.get_file_name(out_file) elif (self.pdb_code == '' and self.psiblast != ''): for out_file in out_files: if (self.psiblast + '_mutants') == \ file_handlers.get_file_name(out_file).split('.')[0]: self.llikelihood_filepath = out_file else: print "You have not specified any results data to parse." exit(1)
def main(): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() fasta_files = file_handlers.find_files(file_paths, 'fasta') for path in fasta_files: file_name = file_handlers.get_file_name(path) get_dm_raxml(path, file_name, 4)
def _get_downloaded_file_path(self, pdb_code): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() ent_files = file_handlers.find_files(file_paths, 'ent') for ent_file in ent_files: if pdb_code == file_handlers.get_file_name(ent_file).split('.')[0].lstrip('pdb').upper(): return ent_file
class PickleFasta: def __init__(self): self.file_handlers = FileHandlers() def _get_fasta_files(self): file_paths = self.file_handlers.search_directory() fasta_files = self.file_handlers.find_files(file_paths, 'faa') print "There are %d .faa files in this directory" % len(fasta_files) return fasta_files def pickle_organism_fasta(self): fasta_files = self._get_fasta_files() fasta_dictionary = {} for fasta_file in fasta_files: file_name = self.file_handlers.get_file_name(fasta_file) name_list = file_name.split('.') Data = open(fasta_file) D = Data.readlines() Data.close() for d in D: if d.startswith('>'): d_list = d.split(' ') if name_list[0] in fasta_dictionary: fasta_dictionary[name_list[0]].append(d_list[0].lstrip('>')) else: fasta_dictionary[name_list[0]] = [d_list[0].lstrip('>')] else: pass return fasta_dictionary
def main(): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() fasta_files = file_handlers.find_files(file_paths, 'fasta') for path in fasta_files: file_name, file_list, interval = find_indices(path) write_fasta_new_names(path, file_name, file_list, interval)
def _get_pdb_file_path(self): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]: self.file_path = pdb_file
def main(): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pep_files = file_handlers.find_files(file_paths, "pep") for pep_file in pep_files: file_name = file_handlers.get_file_name(pep_file) run_muscle(pep_file, file_name)
def _get_downloaded_file_path(self, pdb_code): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() ent_files = file_handlers.find_files(file_paths, 'ent') for ent_file in ent_files: if pdb_code == file_handlers.get_file_name(ent_file).split( '.')[0].lstrip('pdb').upper(): return ent_file
def _open_file(self): #os.chdir("../src/database/pdbs") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]: Data = open(pdb_file) self.data = Data.readlines() Data.close
def _get_file_path(self): os.chdir("./database/pdbs/pdb") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split( '.')[0]: self.file_path = pdb_file os.chdir("../../../")
def _get_data(self, filename): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if filename == file_handlers.get_file_name(txt_file): TXT = open(txt_file) data = TXT.readlines() TXT.close() return data
def _get_pdb(self): #os.chdir("./database/pdbs/pdb") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]: PDB = open(pdb_file) self.pdb = PDB.readlines() PDB.close()
def _get_data(self, file_tag): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if self.filename == file_handlers.get_file_name(txt_file).split('_')[0]: if (file_tag.split('_')[1] + '.txt') == file_handlers.get_file_name(txt_file).split('_')[1]: TXT = open(txt_file) self.data = TXT.readlines() TXT.close()
def _get_output(self): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() out_files = file_handlers.find_files(file_paths, 'out') for out_file in out_files: TXT = open(out_file) data = TXT.readlines() TXT.close() os.remove(out_file) return data
def _get_file_path(self): os.chdir("./database/pdbs/pdb") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]: self.file_path = pdb_file self.out_file = file_handlers.get_file_name(pdb_file).split('.')[0] + '_pops.out' self.out_file_path = self.dir_path + '/' + self.out_file
def _get_gb_record(self): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() gb_files = file_handlers.find_files(file_paths, 'gb') print gb_files print self.genbank_id for gb_file in gb_files: if self.genbank_id == file_handlers.get_file_name(gb_file).split( '.')[0]: self.gb_file_path = gb_file print self.gb_file_path
def _get_pdb(self): #os.chdir("./database/pdbs/pdb") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split( '.')[0]: PDB = open(pdb_file) self.pdb = PDB.readlines() PDB.close()
def _get_filename(self): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, "pdb") if self.tag == "": for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split(".")[0]: return file_handlers.get_file_name(pdb_file).split(".")[0] else: for pdb_file in pdb_files: if (self.filename + self.tag) == file_handlers.get_file_name(pdb_file).split(".")[0]: return file_handlers.get_file_name(pdb_file).split(".")[0]
def _get_data(self, file_tag): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if self.filename == file_handlers.get_file_name(txt_file).split( '_')[0]: if (file_tag.split('_')[1] + '.txt' ) == file_handlers.get_file_name(txt_file).split('_')[1]: TXT = open(txt_file) self.data = TXT.readlines() TXT.close()
def _get_file_path(self, ligand=False, pdb=False): #os.chdir("./database/pdbs/pdb") self.file_path = '' file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') if ligand == True: for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]: self.file_path = pdb_file elif pdb == True: for pdb_file in pdb_files: if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]: self.file_path = pdb_file
def _get_filepath(self, data_file=False, pdb_file=False): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() if data_file == True: files = file_handlers.find_files(file_paths, 'txt') for path in files: if (self.filename + '_mutant_list') == file_handlers.get_file_name(path).split('.')[0]: return path elif pdb_file == True: files = file_handlers.find_files(file_paths, 'pdb') for path in files: if (self.filename + '_0001') == file_handlers.get_file_name(path).split('.')[0]: return path else: print "Specify file type"
def _get_pdb(self, rosetta_min=False, refined_pocket=False): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if rosetta_min == True and refined_pocket == True: print "Invalid input" elif rosetta_min == True: if (self.filename + '_0001') == file_handlers.get_file_name(pdb_file).split('.')[0]: print "Found ", (self.filename + '_0001.pdb') filepath = pdb_file elif refined_pocket == True: if ('pocket0') == file_handlers.get_file_name(pdb_file).split('.')[0]: print "Found pocket0.pdb" filepath = pdb_file else: if self.filename == file_handlers.get_file_name(pdb_file).split('.')[0]: print "Found ", (self.filename + '.pdb') filepath = pdb_file return filepath
def LoadFiles(extention): """Get the paths to the .txt files listed in the current directory Useful when you have a set of custom .txt files that need to be loaded and parsed prior to some calculation or data analysis Parameters ---------- none Returns ------- list List of strings corresponding to the path/to/file.txt for each txt file found in the current directory Examples -------- >>> all_files = LoadFiles() """ file_handlers = FileHandlers() file_paths = file_handlers.search_directory() files = file_handlers.find_files(file_paths, extention) return files
def get_dm_files(): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() dm_files = file_handlers.find_files(file_paths, 'dm') return dm_files
import subprocess from util import FileHandlers from Bio.Phylo.TreeConstruction import DistanceCalculator from Bio import AlignIO file_handlers = FileHandlers() file_paths = file_handlers.search_directory() # fasta_files = file_handlers.find_files(file_paths, 'faa') # for path in fasta_files: # cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path] # subprocess.call(cmd, shell=True) # file_paths = file_handlers.search_directory() # pep_files = file_handlers.find_files(file_paths, 'pep') # # for path in pep_files: # file_name = file_handlers.get_file_name(path) # name_list = file_name.split('.') # out_file = ''.join([name_list[0] + '_out.' + name_list[1]]) # cmd = ['muscle -in ' + path + ' -out ' + out_file] # subprocess.call(cmd, shell=True) def run_muscle(path): file_name = file_handlers.get_file_name(path) name_list = file_name.split(".") out_file = "".join(name_list[0] + "_out." + name_list[1]) cmd = ["muscle -in " + path + " -out " + out_file] subprocess.call(cmd, shell=True)
import subprocess from util import FileHandlers from Bio.Phylo.TreeConstruction import DistanceCalculator from Bio import AlignIO file_handlers = FileHandlers() #file_paths = file_handlers.search_directory() #fasta_files = file_handlers.find_files(file_paths, 'faa') #for path in fasta_files: # cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path] # subprocess.call(cmd, shell=True) file_paths = file_paths = file_handlers.search_directory() pep_files = file_handlers.find_files(file_paths, 'pep') for path in pep_files: file_name = file_handlers.get_file_name(path) name_list = file_name.split('.') out_file = ''.join([name_list[0] + '_out.' + name_list[1]]) cmd = ['muscle -in ' + path + ' -out ' + out_file] subprocess.call(cmd, shell=True) #aln = AlignIO.read('path/to/alignnment/file', 'format (i.e. phylip)') #calculator = DistanceCalculator('identity') # identity is the name of the model(scoring matrix) to calculate the distance. The identity model is the default one and can be used both for DNA and protein sequence. #dm = calculator.get_distance(aln)
def load_out_files(): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() out_files = file_handlers.find_files(file_paths, 'out') return out_files