def run(self): db_functions.transfer_data(self.data_file) conn, c = db_functions.connectDB() c.execute( 'UPDATE soakdb_files SET status_code=2 where filename like %s;', (self.data_file, )) conn.commit()
def requires(self): conn, c = db_functions.connectDB() exists = db_functions.table_exists(c, 'soakdb_files') if not exists: return TransferAllFedIDsAndDatafiles() else: return FindSoakDBFiles()
def run(self): # connect to central postgres db conn, c = db_functions.connectDB() # set up # logging logfile = self.date.strftime('transfer_logs/fedids_%Y%m%d.txt') # logging.basicConfig(filename=logfile, level=# logging.DEBUG, format='%(asctime)s %(message)s', # datefrmt='%m/%d/%y %H:%M:%S') # use list from previous step as input to write to postgres with self.input().open('r') as database_list: for database_file in database_list.readlines(): database_file = database_file.replace('\n', '') out, err, proposal = db_functions.pop_soakdb(database_file) # logging.info(str('FedIDs written for ' + proposal)) proposal_list = [] c.execute('SELECT proposal FROM soakdb_files') rows = c.fetchall() for row in rows: proposal_list.append(str(row[0])) for proposal_number in set(proposal_list): db_functions.pop_proposals(proposal_number) c.close() with self.output().open('w') as f: f.write('TransferFeDIDs DONE')
def on_the_fly_analysis(): import db_functions import os # set filepaths hit_directory = '/dls/science/groups/proasis/LabXChem/' conn, c = db_functions.connectDB() c.execute( "SELECT bound_conf, crystal_name, protein, strucid FROM proasis_hits WHERE strucid !=''" ) rows = c.fetchall() for row in rows: bound_pdb = str(row[0]) protein_name = str(row[2]) crystal = str(row[1]) strucid = str(row[3]) # set up directory paths for where files will be stored (for proasis) proasis_protein_directory = str( str(hit_directory) + '/' + str(protein_name) + '/') proasis_crystal_directory = str( str(hit_directory) + '/' + str(protein_name) + '/' + str(crystal) + '/') pdb_file_name = str(bound_pdb).split('/')[-1] # if the bound pdb is in a refinement folder, change the path to find the map files if 'Refine' in bound_pdb.replace(pdb_file_name, ''): remove_string = str( str(bound_pdb).split('/')[-2] + '/' + pdb_file_name) map_directory = str(bound_pdb).replace(remove_string, '') else: map_directory = str(bound_pdb).replace(pdb_file_name, '') print('Analysing ' + crystal + ' (' + strucid + ')') #os.system('module unload ccp4; source /dls/science/groups/i04-1/software/pandda-update/ccp4/ccp4-7.0/bin/ccp4.setup-sh; ') giant_score_string = str( 'module unload ccp4; ' 'source /dls/science/groups/i04-1/software/pandda-update' '/ccp4/ccp4-7.0/bin/ccp4.setup-sh; giant.score_model ' + str(bound_pdb) + ' ' + str(proasis_crystal_directory + '/refine.mtz')) process = subprocess.Popen(giant_score_string, stdout=subprocess.PIPE, shell=True) out, err = process.communicate() print out print err
def get_file_list(self, status_code): datafiles = [] fileids = [] conn, c = db_functions.connectDB() c.execute( 'SELECT filename, id FROM soakdb_files WHERE status_code = %s', (status_code, )) rows = c.fetchall() for row in rows: datafiles.append(str(row[0])) fileids.append(str(row[1])) list = zip(datafiles, fileids) return list
def check_modification_date(self, filename): if os.path.isfile(filename): proasis_file_date = misc_functions.get_mod_date(filename) modification_date = self.mod_date if proasis_file_date != modification_date: conn, c = db_functions.connectDB() c.execute( 'SELECT strucid FROM proasis_hits WHERE bound_conf = %s and modification_date = %s', (self.bound_pdb, modification_date)) rows = c.fetchall() for row in rows: if len(str(row[0])) > 1: proasis_api_funcs.delete_structure(str(row[0])) c.execute( 'UPDATE proasis_hits SET strucid = NULL WHERE bound_conf = %s and modification_date = %s', (self.bound_pdb, modification_date))
def run(self): conn, c = db_functions.connectDB() c.execute('delete from lab where file_id=%s', (self.file_id, )) conn.commit() c.execute('delete from refinement where file_id=%s', (self.file_id, )) conn.commit() c.execute('delete from dimple where file_id=%s', (self.file_id, )) conn.commit() c.execute('delete from data_processing where file_id=%s', (self.file_id, )) conn.commit() db_functions.transfer_data(self.data_file) c.execute( 'UPDATE soakdb_files SET status_code=2 where filename like %s;', (self.data_file, )) conn.commit()
def get_list(self): path_list = [] protein_list = [] reference_list = [] conn, c = db_functions.connectDB() c.execute( '''SELECT pandda_path, protein, reference_pdb FROM proasis_leads WHERE pandda_path !='' and pandda_path !='None' and reference_pdb !='' and reference_pdb !='None' ''' ) rows = c.fetchall() for row in rows: path_list.append(str(row[0])) protein_list.append(str(row[1])) reference_list.append(str(row[2])) list = zip(path_list, protein_list, reference_list) return list
def requires(self): conn, c = dbf.connectDB() c.execute("select crystal_name, strucid from proasis_hits where strucid !=''") rows = c.fetchall() crystal_list=[] strucid_list=[] for row in rows: crystal_list.append(str(row[0])) strucid_list.append(str(row[1])) list = zip(crystal_list, strucid_list) return data_in_proasis.StartHitTransfers(), \ [EdstatsScores(crystal=crystal_name, strucid=strucid_no) for (crystal_name, strucid_no) in list]
def get_list(self): bound_list = [] crystal_list = [] protein_list = [] smiles_list = [] modification_list = [] conn, c = db_functions.connectDB() c.execute( '''SELECT bound_conf, crystal_name, protein, smiles, modification_date FROM proasis_hits WHERE bound_conf !='' and bound_conf !='None' and modification_date !='' and modification_date !='None' ''' ) rows = c.fetchall() for row in rows: bound_list.append(str(row[0])) crystal_list.append(str(row[1])) protein_list.append(str(row[2])) smiles_list.append(str(row[3])) modification_list.append(str(row[4])) list = zip(bound_list, crystal_list, protein_list, smiles_list, modification_list) return list
def run(self): # connect to central postgres db conn, c = db_functions.connectDB() # use list from previous step as input to write to postgres with self.input().open('r') as database_list: for database_file in database_list.readlines(): database_file = database_file.replace('\n', '') out, err, proposal = db_functions.pop_soakdb(database_file) proposal_list = [] c.execute('SELECT proposal FROM soakdb_files') rows = c.fetchall() for row in rows: proposal_list.append(str(row[0])) for proposal_number in set(proposal_list): db_functions.pop_proposals(proposal_number) c.close() with self.output().open('w') as f: f.write('TransferFeDIDs DONE')
def run(self): # set up directory paths for where files will be stored (for proasis) proasis_protein_directory = str( str(self.hit_directory) + '/' + str(self.protein_name) + '/') proasis_crystal_directory = str( str(self.hit_directory) + '/' + str(self.protein_name) + '/' + str(self.crystal) + '/') # find the name of the file, and create filepath name in proasis directories pdb_file_name = str(self.bound_pdb).split('/')[-1] proasis_bound_pdb = str(proasis_crystal_directory + pdb_file_name) self.check_modification_date(proasis_bound_pdb) # copy refinement pdb specified in datasource to proasis directories print('Copying refinement pdb...') # if the proasis project (protein) dir does not exist, create it if not os.path.isdir(proasis_protein_directory): print('not a directory') os.system(str('mkdir ' + proasis_protein_directory)) # if the crystal directory does not exist, create it if not os.path.isdir(proasis_crystal_directory): print('not a directory') os.system(str('mkdir ' + proasis_crystal_directory)) # copy the file to the proasis directories os.system( str('cp ' + str(self.bound_pdb) + ' ' + proasis_crystal_directory)) # if the bound pdb is in a refinement folder, change the path to find the map files if 'Refine' in self.bound_pdb.replace(pdb_file_name, ''): remove_string = str( str(self.bound_pdb).split('/')[-2] + '/' + pdb_file_name) map_directory = str(self.bound_pdb).replace(remove_string, '') else: map_directory = str(self.bound_pdb).replace(pdb_file_name, '') # copy the 2fofc and fofc maps over to the proasis directories if os.path.isfile(str(map_directory + '/2fofc.map')): os.system( str('cp ' + str(map_directory + '/2fofc.map ' + proasis_crystal_directory))) if os.path.isfile(str(map_directory + '/fofc.map')): os.system( str('cp ' + str(map_directory + '/fofc.map ' + proasis_crystal_directory))) print(map_directory) # create 2D sdf files for all ligands from SMILES string misc_functions.create_sd_file( self.crystal, self.smiles, str(os.path.join(proasis_crystal_directory, self.crystal + '.sdf'))) # look for ligands in the pdb file print('detecting ligand for ' + str(self.crystal)) pdb_file = open(proasis_bound_pdb, 'r') ligands = [] lig_string = '' for line in pdb_file: if "LIG" in line: lig_string = re.search(r"LIG.......", line).group() ligands.append(str(lig_string)) # find all unique ligands ligands = list(set(ligands)) # create the submission string for proasis if len(ligands) == 1: print('submission string:\n') submit_to_proasis = str( "/usr/local/Proasis2/utils/submitStructure.py -d 'admin' -f " + "'" + str(proasis_bound_pdb) + "' -l '" + lig_string + "' -m " + str( os.path.join(proasis_crystal_directory, str(self.crystal) + '.sdf')) + " -p " + str(self.protein_name) + " -t " + str(self.crystal) + " -x XRAY -N") print submit_to_proasis # submit the structure to proasis strucid = self.submit_proasis_job_string(submit_to_proasis) # same as above, but for structures containing more than one ligand elif len(ligands) > 1: lig1 = ligands[0] lign = " -o '" for i in range(1, len(ligands) - 1): lign += str(ligands[i] + ',') lign += str(ligands[len(ligands) - 1] + "'") submit_to_proasis = str( "/usr/local/Proasis2/utils/submitStructure.py -d 'admin' -f " + "'" + str(proasis_bound_pdb) + "' -l '" + lig1 + "' " + lign + " -m " + str( os.path.join(proasis_crystal_directory, str(self.crystal) + '.sdf')) + " -p " + str(self.protein_name) + " -t " + str(self.crystal) + " -x XRAY -N") print submit_to_proasis strucid = self.submit_proasis_job_string(submit_to_proasis) submit_2fofc = str( '/usr/local/Proasis2/utils/addnewfile.py -i 2fofc_c -f ' + proasis_crystal_directory + '/2fofc.map -s ' + strucid + ' -t ' + "'" + str(self.crystal) + "_2fofc'") submit_fofc = str( '/usr/local/Proasis2/utils/addnewfile.py -i fofc_c -f ' + proasis_crystal_directory + '/fofc.map -s ' + strucid + ' -t ' + "'" + str(self.crystal) + "_fofc'") os.system(submit_2fofc) os.system(submit_fofc) # add strucid to database conn, c = db_functions.connectDB() c.execute( 'UPDATE proasis_hits SET strucid = %s where bound_conf = %s and modification_date = %s', (strucid, self.bound_pdb, self.mod_date)) conn.commit() with self.output().open('wb') as f: f.write('')
def run(self): pandda_analyse_centroids = str(self.pandda_directory + '/analyses/pandda_analyse_sites.csv') if os.path.isfile(pandda_analyse_centroids): site_list = pandas.read_csv( str(pandda_analyse_centroids))['native_centroid'] print(' Searching for residue atoms for ' + str(len(site_list)) + ' site centroids \n') print( ' NOTE: 3 residue atoms are required for each site centroid \n' ) print site_list else: print('file does not exist!') no = 0 for centroid in site_list: # print('next centroid') structure = PDBParser(PERMISSIVE=0).get_structure( str(self.name), str(self.reference_structure)) no += 1 res_list = [] # initial distance for nearest neighbor (NN) search is 20A neighbor_distance = 20 centroid_coordinates = centroid.replace('(', '[') centroid_coordinates = centroid_coordinates.replace(')', ']') centroid_coordinates = eval(str(centroid_coordinates)) # define centroid as an atom object for NN search centroid_atom = Atom.Atom('CEN', centroid_coordinates, 0, 0, 0, 0, 9999, 'C') atoms = list(structure.get_atoms()) center = np.array(centroid_atom.get_coord()) ns = NeighborSearch(atoms) # calculate NN list neighbors = ns.search(center, neighbor_distance) res_list = [] # for each atom in the NN list for neighbor in neighbors: try: # get the residue that the neighbor belongs to parent = Atom.Atom.get_parent(neighbor) # if the residue is not a water etc. (amino acids have blank) if parent.get_id()[0] == ' ': # get the chain that the residue belongs to chain = Residue.Residue.get_parent(parent) # if statements for fussy proasis formatting if len(str(parent.get_id()[1])) == 3: space = ' ' if len(str(parent.get_id()[1])) == 2: space = ' ' if 'HOH' not in str(parent.get_resname()): res = (str(parent.get_resname()) + ' ' + str(chain.get_id()) + space + str(parent.get_id()[1])) res_list.append(res) except: continue res_list = (list(set(res_list))) print res_list lig1 = str("'" + str(res_list[0]) + ' :' + str(res_list[1]) + ' :' + str(res_list[2]) + " ' ") print lig1 # some faff to get rid of waters and add remaining ligands in multiples of 3 - proasis is fussy alt_lig_option = " -o '" res_string = "" full_res_string = '' count = 0 for i in range(3, len(res_list)): count += 1 multiple = int(round(count / 3) * 3) count = 0 for i in range(3, multiple): if count == 0: res_string += alt_lig_option if count <= 1: res_string += str(res_list[i] + ' ,') count += 1 elif count == 2: res_string += str(res_list[i] + " '") full_res_string.join(res_string) count = 0 # copy reference structure to proasis directories ref_structure_file_name = str(self.reference_structure).split('/')[-1] proasis_project_directory = str( str(self.proasis_directory) + '/' + str(self.name)) proasis_reference_directory = str( str(proasis_project_directory) + '/reference/') proasis_reference_structure = str(proasis_reference_directory + '/' + str(ref_structure_file_name)) if not os.path.isdir(str(proasis_project_directory)): os.system(str('mkdir ' + str(proasis_project_directory))) if not os.path.isdir(proasis_reference_directory): os.system(str('mkdir ' + str(proasis_reference_directory))) os.system( str('cp ' + str(self.reference_structure) + ' ' + str(proasis_reference_structure))) #submit_to_proasis = str('/usr/local/Proasis2/utils/submitStructure.py -p ' + str(self.name) + ' -t ' + str(self.name) + '_lead -d admin -f ' + str(proasis_reference_structure) + ' -l ' + str(lig1) + str(res_string) + " -x XRAY -n") submit_to_proasis = str( '/usr/local/Proasis2/utils/submitStructure.py -p ' + str(self.name) + ' -t ' + str(self.name) + '_lead -d admin -f ' + str(proasis_reference_structure) + ' -l ' + str(lig1) + "-x XRAY -n") print submit_to_proasis process = subprocess.Popen(submit_to_proasis, stdout=subprocess.PIPE, shell=True) out, err = process.communicate() print(out) if err: raise Exception('There was a problem submitting this lead: ' + str(err)) strucidstr = misc_functions.get_id_string(out) if len(strucidstr) < 5: raise Exception('No strucid was detected!') add_lead = str('/usr/local/Proasis2/utils/addnewlead.py -p ' + str(self.name) + ' -s ' + str(strucidstr)) process = subprocess.Popen(add_lead, stdout=subprocess.PIPE, shell=True) out, err = process.communicate() print(out) if err: raise Exception('There was a problem submitting this lead: ' + str(err)) conn, c = db_functions.connectDB() c.execute( 'UPDATE proasis_leads SET strucid = %s WHERE reference_pdb = %s and pandda_path = %s', (strucidstr, self.reference_structure, self.pandda_directory)) conn.commit() with self.output().open('wb') as f: f.write('')
def run(self): # logfile = self.date.strftime('transfer_logs/CheckFiles_%Y%m%d.txt') # # logging.basicConfig(filename=logfile, level=# logging.DEBUG, format='%(asctime)s %(message)s', # datefrmt='%m/%d/%y %H:%M:%S') conn, c = db_functions.connectDB() exists = db_functions.table_exists(c, 'soakdb_files') checked = [] # Status codes:- # 0 = new # 1 = changed # 2 = not changed if exists: with self.input().open('r') as f: files = f.readlines() for filename in files: filename_clean = filename.rstrip('\n') c.execute( 'select filename, modification_date from soakdb_files where filename like %s;', (filename_clean, )) for row in c.fetchall(): if len(row) > 0: data_file = str(row[0]) checked.append(data_file) old_mod_date = str(row[1]) current_mod_date = misc_functions.get_mod_date( data_file) if current_mod_date > old_mod_date: # logging.info(str(data_file) + ' has changed!') c.execute( 'UPDATE soakdb_files SET status_code = 1 where filename like %s;', (filename_clean, )) c.execute( 'UPDATE soakdb_files SET modification_date = %s where filename like %s;', (current_mod_date, filename_clean)) conn.commit() # start class to add row and kick off process for that file else: # logging.info(str(data_file) + ' has not changed!') c.execute( 'UPDATE soakdb_files SET status_code = 2 where filename like %s;', (filename_clean, )) conn.commit() if filename_clean not in checked: # logging.info(filename_clean + ' is a new file!') out, err, proposal = db_functions.pop_soakdb( filename_clean) db_functions.pop_proposals(proposal) c.execute( 'UPDATE soakdb_files SET status_code = 0 where filename like %s;', (filename_clean, )) c.execute('select filename from soakdb_files;') for row in c.fetchall(): if str(row[0]) not in checked: data_file = str(row[0]) file_exists = os.path.isfile(data_file) # if not file_exists: # # logging.warning(str(data_file) + ' no longer exists! - notify users!') # # else: # # logging.error(str(row[0]) + ' : something wrong!') exists = db_functions.table_exists(c, 'lab') if not exists: c.execute('UPDATE soakdb_files SET status_code = 0;') conn.commit() with self.output().open('w') as f: f.write('')
def run(self): # all data necessary for uploading hits crystal_data_dump_dict = { 'crystal_name': [], 'protein': [], 'smiles': [], 'bound_conf': [], 'modification_date': [], 'strucid': [] } # all data necessary for uploading leads project_data_dump_dict = { 'protein': [], 'pandda_path': [], 'reference_pdb': [], 'strucid': [] } outcome_string = '(%3%|%4%|%5%|%6%)' conn, c = db_functions.connectDB() c.execute( '''SELECT crystal_id, bound_conf FROM refinement WHERE outcome SIMILAR TO %s''', (str(outcome_string), )) rows = c.fetchall() print( str(len(rows)) + ' crystals were found to be in refinement or above') for row in rows: c.execute( '''SELECT smiles, protein FROM lab WHERE crystal_id = %s''', (str(row[0]), )) lab_table = c.fetchall() if len(str(row[0])) < 3: continue if len(lab_table) > 1: print('WARNING: ' + str(row[0]) + ' has multiple entries in the lab table') # print lab_table for entry in lab_table: if len(str(entry[1])) < 2 or 'None' in str(entry[1]): protein_name = str(row[0]).split('-')[0] else: protein_name = str(entry[1]) if len(str(row[1])) < 5: print('No bound conf for ' + str(row[0])) continue crystal_data_dump_dict['protein'].append(protein_name) crystal_data_dump_dict['smiles'].append(entry[0]) crystal_data_dump_dict['crystal_name'].append(row[0]) crystal_data_dump_dict['bound_conf'].append(row[1]) crystal_data_dump_dict['strucid'].append('') try: modification_date = misc_functions.get_mod_date(str( row[1])) except: modification_date = '' crystal_data_dump_dict['modification_date'].append( modification_date) c.execute( '''SELECT pandda_path, reference_pdb FROM dimple WHERE crystal_id = %s''', (str(row[0]), )) pandda_info = c.fetchall() for pandda_entry in pandda_info: # project_data_dump_dict['crystal_name'].append(row[0]) project_data_dump_dict['protein'].append(protein_name) project_data_dump_dict['pandda_path'].append(pandda_entry[0]) project_data_dump_dict['reference_pdb'].append(pandda_entry[1]) project_data_dump_dict['strucid'].append('') project_table = pandas.DataFrame.from_dict(project_data_dump_dict) crystal_table = pandas.DataFrame.from_dict(crystal_data_dump_dict) protein_list = set(list(project_data_dump_dict['protein'])) print protein_list for protein in protein_list: self.add_to_postgres(project_table, protein, ['reference_pdb'], project_data_dump_dict, 'proasis_leads') self.add_to_postgres(crystal_table, protein, ['crystal_name', 'smiles', 'bound_conf'], crystal_data_dump_dict, 'proasis_hits') with self.output().open('wb') as f: f.write('')
def run(self): conn, c = db_functions.connectDB() exists = db_functions.table_exists(c, 'soakdb_files') checked = [] # Status codes:- # 0 = new # 1 = changed # 2 = not changed if exists: with self.input().open('r') as f: files = f.readlines() for filename in files: filename_clean = filename.rstrip('\n') c.execute( 'select filename, modification_date from soakdb_files where filename like %s;', (filename_clean, )) for row in c.fetchall(): if len(row) > 0: data_file = str(row[0]) checked.append(data_file) old_mod_date = str(row[1]) current_mod_date = misc_functions.get_mod_date( data_file) if current_mod_date > old_mod_date: c.execute( 'UPDATE soakdb_files SET status_code = 1 where filename like %s;', (filename_clean, )) c.execute( 'UPDATE soakdb_files SET modification_date = %s where filename like %s;', (current_mod_date, filename_clean)) conn.commit() # start class to add row and kick off process for that file else: c.execute( 'UPDATE soakdb_files SET status_code = 2 where filename like %s;', (filename_clean, )) conn.commit() if filename_clean not in checked: out, err, proposal = db_functions.pop_soakdb( filename_clean) db_functions.pop_proposals(proposal) c.execute( 'UPDATE soakdb_files SET status_code = 0 where filename like %s;', (filename_clean, )) c.execute('select filename from soakdb_files;') for row in c.fetchall(): if str(row[0]) not in checked: data_file = str(row[0]) file_exists = os.path.isfile(data_file) exists = db_functions.table_exists(c, 'lab') if not exists: c.execute('UPDATE soakdb_files SET status_code = 0;') conn.commit() with self.output().open('w') as f: f.write('')