def __init__(self, folderName): self.basePath = os.path.join(FLAGS.workplace, folderName) self.tempFolderPath = os.path.join(self.basePath, 'temp') self.dockingFolderPath = os.path.join(self.basePath, 'dock') self.thread_num = FLAGS.thread_num self.process_num = FLAGS.process_num try_create_chain_folder(self.tempFolderPath) try_create_chain_folder(self.dockingFolderPath)
def __init__(self): self.parse() self.tempFolderPath = tempfile.mkdtemp() self.basePath = os.path.join(self.workplace, 'blast') self.formsPath = os.path.join(self.basePath, 'forms') self.err_log_file = os.path.join(self.basePath, 'err.log') self.mergedPath = os.path.join(self.basePath, 'merged_forms') try_create_chain_folder(self.tempFolderPath) try_create_chain_folder(self.formsPath)
def downloads(self,item): download_address = self.get_address(item) if os.path.exists(os.path.join(FLAGS.rowdata_folder,item+'.pdb')): print item," exists" return None print 'download ',item os.system('wget -P {} {}'.format(FLAGS.rowdata_folder,download_address)) pdbname = item.lower() ligand_folder = os.path.join(FLAGS.splited_ligand_folder,pdbname) try_create_chain_folder(ligand_folder) try: parsed = prody.parsePDB(os.path.join(FLAGS.rowdata_folder,item+'.pdb')) except: self.error_log('can not parse {}.\n'.format(item)) return None hetero = parsed.select('(hetero and not water) or resname ATP or resname ADP or sesname AMP or resname GTP or resname GDP or resname GMP') receptor = parsed.select('protein or nucleic') if receptor is None: self.error_log("{} doesn't have receptor.\n".format(item)) return None if hetero is None: self.error_log("{} doesn't have ligand.\n".format(item)) return None ligand_flags = False for each in prody.HierView(hetero).iterResidues(): if each.numAtoms() <= 10: continue else: ligand_flags = True ResId = each.getResindex() ligand_path = os.path.join(FLAGS.splited_ligand_folder,pdbname,"{}_{}_ligand.pdb".format(pdbname,ResId)) try_create_chain_parent_folder(ligand_path) prody.writePDB(ligand_path,each) if ligand_flags: receptor_path = os.path.join(FLAGS.splited_receptor_folder,pdbname+'.pdb') prody.writePDB(receptor_path,receptor) else: self.error_log("{} doesn't convert, not ligand have more than 10 atoms.\n".format(item))
def __init__(self): self.parse() try_create_chain_folder(self.formsPath)
def convert_database_to_av4(database_path, positives_folder=None, decoys_folder=None, receptors_folder=None): """Crawls the folder (receptors in this case) and saves every PDB it finds into .npy array with 1) coordinates 2) mapped to the atom name number """ # make a directory where the av4 form of the output will be written output_path = str(database_path + '_av4') if not os.path.exists(output_path): os.makedirs(output_path) def save_av4(filepath, labels, elements, multiframe_coords): labels = np.asarray(labels, dtype=np.int32) elements = np.asarray(elements, dtype=np.int32) multiframe_coords = np.asarray(multiframe_coords, dtype=np.float32) if not (int(len(multiframe_coords[:, 0]) == int(len(elements)))): raise Exception( 'Number of atom elements is not equal to the number of coordinates' ) if multiframe_coords.ndim == 2: if not int(len(labels)) == 1: raise Exception( 'Number labels is not equal to the number of coordinate frames' ) else: if not (int(len(multiframe_coords[0, 0, :]) == int(len(labels)))): raise Exception( 'Number labels is not equal to the number of coordinate frames' ) number_of_examples = np.array([len(labels)], dtype=np.int32) av4_record = number_of_examples.tobytes() av4_record += labels.tobytes() av4_record += elements.tobytes() av4_record += multiframe_coords.tobytes() f = open(filepath + ".av4", 'w') f.write(av4_record) f.close() count = 0 database_ligand_path = os.path.join(database_path, 'actives') database_receptor_path = os.path.join(database_path, 'receptors') for receptor in os.listdir(database_ligand_path): for ligand_name in os.listdir( os.path.join(database_ligand_path, receptor)): count += 1 destFile = os.path.join(output_path, receptor, ligand_name + ".av4") if os.path.exists(destFile): continue if FLAGS.orchestra_arrayjob and FLAGS.orchestra_jobid % FLAGS.orchestra_jobsize != count % FLAGS.orchestra_jobsize: continue ligand_folder = os.path.join(database_ligand_path, receptor, ligand_name) splited_ligands = os.listdir(ligand_folder) if len(splited_ligands) == 0: with open(os.path.join(database_path, 'empty.txt'), 'a') as fout: fout.write(ligand_folder + '\n') continue path_to_receptor = os.path.join(database_receptor_path, receptor + '.pdb') path_to_first_ligand = os.path.join(ligand_folder, splited_ligands[0]) try: prody_receptor = prody.parsePDB(path_to_receptor) prody_first_ligand = prody.parsePDB(path_to_first_ligand) multiframe_ligand_coords = prody_first_ligand.getCoords() # for decoys set all the label as 0 #labels = np.array([0]) # for actives set all the label as 1 labels = np.array([1]) # if have more than one ligands, write them as one multiframe ligand if len(splited_ligands) > 1: for rest_ligand in splited_ligands[1:]: prody_rest = prody.parsePDB( os.path.join(ligand_folder, rest_ligand)) # see if decoy is same as the initial ligand if not all( np.asarray(prody_rest.getElements()) == np.asarray(prody_first_ligand.getElements())): raise Exception( 'attempting to add ligand with different order of atoms' ) multiframe_ligand_coords = np.dstack( (multiframe_ligand_coords, prody_rest.getCoords())) # for decoys set all the label as 0 #labels = np.concatenate((labels, [0])) # for actives set all teh label as 1 labels = np.concatenate((labels, [1])) except Exception as e: print e stats.ligands_failed += 1 print "ligands parsed:", stats.ligands_parsed, "ligands failed:", stats.ligands_failed continue stats.ligands_parsed += 1 print "ligands parsed:", stats.ligands_parsed, "ligands failed:", stats.ligands_failed # create an output path to write binaries for protein and ligands path_to_pdb_subfolder = os.path.join(output_path, receptor) try_create_chain_folder(path_to_pdb_subfolder) # convert atomnames to tags and write the data to disk def atom_to_number(atomname): atomic_tag_number = atom_dictionary.ATM[atomname.lower()] return atomic_tag_number print prody_receptor.getElements() receptor_elements = map(atom_to_number, prody_receptor.getElements()) ligand_elements = map(atom_to_number, prody_first_ligand.getElements()) receptor_output_path = os.path.join(path_to_pdb_subfolder, receptor) save_av4(receptor_output_path, [0], receptor_elements, prody_receptor.getCoords()) ligand_output_path = os.path.join(path_to_pdb_subfolder, ligand_name) save_av4(ligand_output_path, labels, ligand_elements, multiframe_ligand_coords)