def _get_downloaded_file_path(self, pdb_code): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() ent_files = file_handlers.find_files(file_paths, 'ent') for ent_file in ent_files: if pdb_code == file_handlers.get_file_name(ent_file).split( '.')[0].lstrip('pdb').upper(): return ent_file
def _build_data_dict(self, file_tag): self.data_dict = {} self._get_data(file_tag) file_handlers = FileHandlers() for line in self.data: fields = line.split('\t') cleaned = file_handlers.clean(fields) self.data_dict[cleaned[0]] = float(cleaned[1])
def _get_data(self, filename): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if filename == file_handlers.get_file_name(txt_file): TXT = open(txt_file) data = TXT.readlines() TXT.close() return data
def _get_file_path(self): os.chdir("./database/pdbs/pdb") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split( '.')[0]: self.file_path = pdb_file os.chdir("../../../")
def _get_gb_record(self): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() gb_files = file_handlers.find_files(file_paths, 'gb') print gb_files print self.genbank_id for gb_file in gb_files: if self.genbank_id == file_handlers.get_file_name(gb_file).split( '.')[0]: self.gb_file_path = gb_file print self.gb_file_path
def _get_pdb(self): #os.chdir("./database/pdbs/pdb") file_handlers = FileHandlers() file_paths = file_handlers.search_directory() pdb_files = file_handlers.find_files(file_paths, 'pdb') for pdb_file in pdb_files: if self.filename == file_handlers.get_file_name(pdb_file).split( '.')[0]: PDB = open(pdb_file) self.pdb = PDB.readlines() PDB.close()
def _get_data(self, file_tag): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() txt_files = file_handlers.find_files(file_paths, 'txt') for txt_file in txt_files: if self.filename == file_handlers.get_file_name(txt_file).split( '_')[0]: if (file_tag.split('_')[1] + '.txt' ) == file_handlers.get_file_name(txt_file).split('_')[1]: TXT = open(txt_file) self.data = TXT.readlines() TXT.close()
def _build_SASA_dict(self): file_handlers = FileHandlers() self.SASA_dict[self.filename] = {} self._run_POPS() self._get_data() for line in self.data: fields = line.split('\t') cleaned = file_handlers.clean(fields) if len(cleaned) == 9: (position, aa, tot_SA, SASA, frac_SASA, phob, phil) = (cleaned[2], cleaned[0], cleaned[8], cleaned[5], cleaned[6], cleaned[3], cleaned[4]) self.SASA_dict[self.filename][position] = [ aa, tot_SA, SASA, frac_SASA, phob, phil ]
import subprocess from util import FileHandlers from Bio.Phylo.TreeConstruction import DistanceCalculator from Bio import AlignIO file_handlers = FileHandlers() #file_paths = file_handlers.search_directory() #fasta_files = file_handlers.find_files(file_paths, 'faa') #for path in fasta_files: # cmd = ['perl ./Scripts/MarkerScanner.pl -Bacteria ' + path] # subprocess.call(cmd, shell=True) file_paths = file_paths = file_handlers.search_directory() pep_files = file_handlers.find_files(file_paths, 'pep') for path in pep_files: file_name = file_handlers.get_file_name(path) name_list = file_name.split('.') out_file = ''.join([name_list[0] + '_out.' + name_list[1]]) cmd = ['muscle -in ' + path + ' -out ' + out_file] subprocess.call(cmd, shell=True) #aln = AlignIO.read('path/to/alignnment/file', 'format (i.e. phylip)') #calculator = DistanceCalculator('identity') # identity is the name of the model(scoring matrix) to calculate the distance. The identity model is the default one and can be used both for DNA and protein sequence. #dm = calculator.get_distance(aln)
def _mkdir(self): file_handlers = FileHandlers() file_handlers.make_results_folder(self.dir_path.split('/')[-1])