def get_structure(code, cutoff, dir_search, BRENDA_PARSER): proteins = BRENDA_PARSER.get_proteins(code) substrate, counted = choose_substrate(proteins) if substrate == None: print("No suitable substrate found, skip..") return False #for s in counted: #print(str(counted[s]), " : \t", s ) print("\nmost common: ", substrate) try: CID = pcp.get_cids(substrate, 'name', 'substance', list_return='flat')[0] except: print("CID not found..") return False if cutoff: if check_size_of_substrate(CID, cutoff) == False: return False file = (f'{dir_search}/{CID}.json') #file = (f'{dir_search}/{str(substrate).strip()}.json') try: pcp.download('JSON', file, CID, 'cid') except: return False return True
def get_structure(code,cutoff,dir_search,BRENDA_PARSER): print("GET STRUCTURE FILES...") proteins = BRENDA_PARSER.get_proteins(code) substrate, counted = choose_substrate(proteins) #substrate = str(substrate).rstrip() if substrate == None: print("No suitable substrate found, skip..") return False for s in counted: print(str(counted[s]), " : \t", s ) print("\nmost common: ", substrate) if substrate == "several substrates": print("\nProtein uses several substrates, skip it..") return False if substrate == "more": print("\nProtein uses several substrates, skip it..") return False if check_size_of_substrate(substrate,cutoff) == False: return False #file = (f'{dir_search}/{code}_{str(substrate).strip()}.json') file = (f'{dir_search}/{str(substrate).strip()}.json') try: pcp.download('JSON', file, substrate, 'name') except: print("substrate not found..") return False return True
def import_from_pubchem(): compounds = read_csv() # cria uma planilha no mesmo local do arquivo .py workbook = xlsxwriter.Workbook(filename='to_database.xlsx') # cria uma aba worksheet = workbook.add_worksheet(name='results') row = 1 print('\nEstabelecendo conexão com o PubChem...') # para cada composto na tabela for comp in compounds: # pega dados no pubChem results = pcp.get_compounds(comp[0], 'smiles') # baixa a imagem de composto pcp.download('PNG', os.path.join(CURR_PATH, 'images', comp[0] + '.png'), comp[0], 'smiles', overwrite=True) # para cada resultado, escreve na planilha nova o SMILES, o aroma, o nome IUPAC e a fórmula molecular for c in results: print('\nComposto ' + c.iupac_name) worksheet.write(row, 0, comp[0]) worksheet.write(row, 1, comp[1]) worksheet.write(row, 2, c.iupac_name) worksheet.write(row, 3, c.molecular_formula) row += 1 workbook.close() print('Pronto! Compostos Atualizados')
def getSMILES(): f_r = open("drug_list.txt", "r") f_e = open("drug_miss.txt", "w") import pubchempy as pcp for line in f_r.readlines(): items = line.strip().split("#") error = items[1] print("downloading " + items[1] + " " + items[2]) try: pcp.download('CSV', "temp/" + items[1] + "-name.csv", items[2], 'name', operation='property/CanonicalSMILES') except: print("name " + items[2]) error += " name " + items[2] try: pcp.download('CSV', "temp/" + items[1] + "-cid.csv", [int(items[3])], operation='property/CanonicalSMILES') except: print("cid " + items[3]) error += ", \t\t cid " + items[3] if error != items[1]: f_e.write(error + "\n") f_e.close() f_r.close()
def _obtain_entry_api(self, search_text, name, output_format) -> Optional[str]: cid = None cids = pcp.get_cids(search_text, "name", record_type="3d") if len(cids) == 0: print("No exact match found, please try the web search") else: cid = str(cids[0]) if output_format.lower() == "smiles": compound = pcp.Compound.from_cid(int(cid)) print("SMILES code:", compound.canonical_smiles) elif output_format.lower() == "pdb": sdf_file = os.path.join(self.write_dir, name + "_" + cid + ".sdf") pdb_file = os.path.join(self.write_dir, name + "_" + cid + ".pdb") pcp.download("SDF", sdf_file, cid, record_type="3d", overwrite=True) sdf_to_pdb(sdf_file, pdb_file) else: pcp.download( output_format.upper(), os.path.join( self.write_dir, name + "_" + cid + "." + output_format.lower()), cid, record_type="3d", overwrite=True, ) return cid
def get_structure(BRENDA_PARSER, code, cutoff, dir_search): dir_path = os.path.dirname(os.path.realpath(__file__)) print("GET STRUCTURE FILES...") proteins = BRENDA_PARSER.get_proteins(code) substrate, counted = choose_substrate(proteins) if substrate == None: print("No suitable substrate found, skip..") return False for s in counted: print(str(counted[s]), " : \t", s) print("\nmost common: ", substrate) if substrate == "several substrates": print("\nProtein uses several substrates, skip it..") return False if substrate == "more": print("\nProtein uses several substrates, skip it..") return False if check_size_of_substrate(substrate, cutoff) == False: print('\nStrukture of Substrate is too big, skip it..') return False file = (f'{code}_{substrate}.json') try: pcp.download('JSON', file, substrate, 'name') except: print("substrate not found..") return False os.replace(f"{dir_path}/{file}", f"{dir_path}/{dir_search}/{file}") return True
def download_sdf_from_cid(cid, save_dir): try: save_file = "%s/CID_%d.sdf" % (save_dir, cid) if not os.path.exists(save_file): pcp.download('SDF', save_file, cid, 'cid') return except: return
def img_downloader(cid, nsc): """ download the png image from pubchem, the naming of the image: nsc_number.png """ if (nsc != 'not_found'): pcp.download('PNG', 'graph/' + nsc + '.png', cid, overwrite=True) print("Item " + str(cnt[0]) + " is completed") cnt[0] += 1
def downloadData(): i = CID_LOW ctr = 1 while i <= CID_HIGH: pcp.download('CSV', '{}-{}.csv'.format(CSVFILE, ctr), range(i, i + 500), operation = 'property/{}'.format(LABELS)) print 'step {}/{}'.format(ctr, int((CID_HIGH - CID_LOW) / STEP)) i += 500 ctr += 1
def _get_mol_image(self, mol_name, path='.', image_size=200): file_path = os.path.join(path, mol_name + '.png') image_size_str = str(image_size) + "x" + str(image_size) try: os.remove(mol_name + '.png') except: pass pcp.download('PNG', file_path, mol_name, 'name', image_size=image_size_str)
def pubchem_cid_to_sdf(cid, cleanup_3d=True): """ Go from pubmed CID to """ with tempfile.TemporaryDirectory() as tempdir: fname = f'{tempdir}/test.sdf' pcp.download('SDF', fname, cid, 'cid', overwrite=True) suppl = Chem.SDMolSupplier(fname, sanitize=True) mol = suppl[0] mol = Chem.AddHs(mol) if cleanup_3d: AllChem.EmbedMolecule(mol, AllChem.ETKDG()) return mol
def getPubChemKey(): f = open("smile_inconsistant.csv", "r") dbid = [] cid1 = [] cid2 = [] for line in f.readlines(): items = line.strip().split(",") dbid.append(items[0]) cid1.append(items[1].strip("\"")) cid2.append(items[2].strip("\"")) import pubchempy as pcp pcp.download('CSV', 'name_keys.csv', cid1, operation='property/InChIKey') pcp.download('CSV', 'cid_keys.csv', cid2, operation='property/InChIKey')
def get_sdf(study_location, cid, iupac, sdf_file_list, final_inchi): if study_location and cid: if not iupac or len(iupac) < 1: iupac = 'no name given' classifyre_id = '' logger.info(" -- Getting SDF for CID " + str(cid) + " for name: " + iupac) print(" -- Getting SDF for CID " + str(cid) + " for name: " + iupac) file_name = str(cid) + ' - ' + iupac + '.sdf' pcp.download('SDF', study_location + '/' + file_name, cid, overwrite=True) if final_inchi: classifyre_id = classyfire(final_inchi) sdf_file_list.append([file_name, classifyre_id]) return sdf_file_list
def pubchemsearch(ID, key): newdir = "/Users/ahmed.mahmoud/Documents/" + ID + "_" + str(i) os.makedirs(newdir, 0755) results = pcp.get_compounds(ID, key) print 'There are ' + str(len(results)) + " Hits That Match to " + ID + ": " print results count = 1 for c in results: dash = "------------------------------------------\n" print "###########################################\n" print " Hit " + str(count) + ": SMILES Annotation for " + str(c) + "\n" print str(c.isomeric_smiles) + "\n" print dash print " Hit " + str(count) + ": Formula for " + str(c) + "\n" print str(c.molecular_formula) + "\n" print dash print " Hit " + str(count) + ": Weight for " + str(c) + "\n" print str(c.molecular_weight) + "\n" print dash print " Hit " + str(count) + ": IUPAC for " + str(c) + "\n" print str(c.iupac_name) + "\n" print dash print " Hit " + str(count) + ": Fingerprint for " + str(c) + "\n" print str(c.fingerprint) + "\n" print " Hit " + str(count) + ": Synonyms for " + str(c) + "\n" print str(c.synonyms) + "\n" print "###########################################\n" text = "###########################################\n" + " Hit " + str( count) + ": SMILES Annotation for " + str(c) + "\n" + str( c.isomeric_smiles ) + "\n" + dash + " Hit " + str(count) + ": Formula for " + str( c ) + "\n" + str(c.molecular_formula) + "\n" + dash + " Hit " + str( count) + ": Weight for " + str(c) + "\n" + str( c.molecular_weight) + "\n" + dash + " Hit " + str( count) + ": IUPAC for " + str(c) + "\n" + dash + str( c.iupac_name) + "\n" + " Hit " + str( count) + ": Fingerprint for " + str( c) + "\n" + str( c.fingerprint) + "\n" + " Hit " + str( count) + ": Synonyms for " + str( c) + "\n" + str(c.synonyms) with open(newdir + "/" + str(c) + ".txt", 'ab') as out: out.write(newdir + text) pcp.download('PNG', newdir + "/" + str(c) + '.png', ID, key) count = count + 1
def download_files_rest(self, cids, pngs=True, download_parents=False): """ Generalized function for downloading files (both SDF and PNG, for quick reference of structure and for full coordinate and bonding information), which calls either download_files_rest or download_files_pug, depending on if REST is being used. :param cids: A dict {"category": {id:[ids]}}, where each category is a molecular type of interest. :param pngs: If True, PNG files will be downloaded alongside SDF files. :param download_parents: If True, then files will be downloaded for parent molecules, in addition to the molecules returned from their queries. :return: """ order = 0 formats = ["SDF"] if pngs: formats.append("PNG") for cat in cids.keys(): download_ids = [] if self.sub_dirs is not None: cat_path = os.path.join(self.base_dir, self.sub_dirs[cat]) else: cat_path = os.path.join(self.base_dir) for parent in cids[cat].keys(): if download_parents: download_ids.append(parent) for cid in cids[cat][parent]: download_ids.append(cid) for format in formats: for cid in download_ids: filename = str(cid) + "_" + str( order) + "." + format.lower() filepath = os.path.join(cat_path, filename) pcp.download(format, filepath, cid, overwrite=True) order += 1
def downloadPNG(): f = open("data/SmileByName.txt", 'r') for line in f.readlines(): items = line.strip().split(",") print(items[0]) if items[1].strip() == '': continue pcp.download("PNG", "data/ByName/" + items[0].strip(), items[1].strip(), 'cid') f.close() print("ByName finished") f = open("data/SmileByCid.txt", 'r') for line in f.readlines(): items = line.strip().split(",") print(items[0]) if items[1].strip() == '': continue pcp.download("PNG", "data/ByCid/" + items[0].strip(), items[1].strip(), 'cid') f.close() print("ByCid finished")
def get_descriptors_pubchem(cid): c = pcp.get_compounds(cid) if not c: return 0 sdf_pth = 'data/{}.sdf'.format(cid) if not os.path.exists(sdf_pth): pcp.download('SDF', sdf_pth, cid, 'cid') suppl = Chem.SDMolSupplier(sdf_pth) mol = next(suppl) d = run_all_functions_in_module(Descriptors, mol, ['PropertyFunctor'], '_') d.update(run_all_functions_in_module(Descriptors3D, mol, None, '_')) return d
def calculate_drug_pixel_data(drugs_with_smiles_file): print("Calculating pixel data...") drugs_with_smiles_df = pd.read_csv(drugs_with_smiles_file) cid_lst = list(drugs_with_smiles_df['cid']) pixels_dict = OrderedDict() for i in range(10000): pixels_dict[f'pixel{i}'] = [] for cid in cid_lst: # Download the picture of the compound from PubChem pcp.download('PNG', 'drug.png', int(cid), 'cid', overwrite=True) # Convert to single-channel greyscale img = Image.open('drug.png').convert('L') # Get the pixel data as a numpy array pixels = np.array(img) # The background for these images is grey and not white # Turn all grey pixels into white pixels pixels[pixels == 245] = 255 # Make any non-grey pixel completely black # This ensures that all atoms and bonds have the same pixel intensity pixels[pixels < 245] = 0 # Downsample using antialiasing to 100 by 100 pixels img = Image.fromarray(pixels) img = img.resize((100, 100), Image.ANTIALIAS) # Grab pixel data again pixels = np.array(img) # Flatten pixels = pixels.flatten() # Scale pixels = scale_array(pixels) for i, pixel in enumerate(pixels): pixels_dict[f'pixel{i}'].append(pixel) drug_id_lst = list(drugs_with_smiles_df['drug_id']) pixels_dict['cid'] = cid_lst pixels_dict.move_to_end('cid', last=False) pixels_dict['drug_id'] = drug_id_lst pixels_dict.move_to_end('drug_id', last=False) drug_pixel_df = pd.DataFrame(data=pixels_dict) drug_pixel_df.to_csv('./Data/Clean/drug_pixels.csv', index=False)
import pandas as pd import sys import matplotlib.pyplot as plt from functools import reduce import pubchempy # look for specific structures based on fingerprints #import and drop bad columns invitrodbv2_fp = pd.read_csv( '/home/rlougee/Desktop/primary_data/invitrodbv2_fullfp.tsv', sep='\t') invitrodbv2_fp = invitrodbv2_fp.dropna(axis=1) def smile_from_txp(txplist): str = """invitrodbv2_fp[""" for i in txplist: str += "(invitrodbv2_fp['{}']==1)&".format(i) # print(str) str = str[:-1] + ']' return eval(str) # print(invitrodbv2_fp.columns) # print(smile_from_txp(['Txp-123', 'Txp-124']).columns) for n, i in enumerate(smile_from_txp(['Txp-338'])['smiles']): pubchempy.download('png', '/home/rlougee/Desktop/CID_pix/{}.png'.format(n), i, 'smiles')
label) #save Ensembl protein ID (ENSP) for applying to node2vec with open(args.input + '/cv_' + str(i) + args.data + '_proIDs.txt', mode='w') as f: f.write('\n'.join(data['protein'])) #save pubchem ID for applying to node2vec cid = np.array(data['chemical'], dtype='int32') np.save(args.input + '/cv_' + str(i) + args.data + '_chemIDs.npy', cid) #convert pubchem ID to CanonicalSMILES c_id = data.chemical.tolist() pcp.download('CSV', args.input + '/cv_' + str(i) + '/ismilesref.csv', c_id, operation='property/IsomericSMILES', overwrite=True) smileb = pd.read_csv(args.input + '/cv_' + str(i) + '/ismilesref.csv') smib = [] for j in smileb['IsomericSMILES']: smib.append( Chem.MolToSmiles(Chem.MolFromSmiles(j), kekuleSmiles=False, isomericSmiles=True)) with open(args.input + '/cv_' + str(i) + args.data + '.smiles', mode='w') as f: f.write('\n'.join(smib)) #convert CanonicalSMILES to ecfp file_smiles = args.input + '/cv_' + str(i) + args.data + '.smiles' smi = Chem.SmilesMolSupplier(file_smiles, delimiter='\t', titleLine=False)
a = pcp.get_compounds(i, 'name') #Retrieve the PUBCHEM ID of the compounds Compounds[i] = a len(Compounds) no_result = [] for i in (Compounds): if (Compounds[i] == []): no_result.append( i) #List of compounds for whom PUBCHEM IDs were not found len(no_result) for i in deque( Compounds.keys() ): #Deque helps to remove elements from any part of the dictionary; remove the no_result IDs from original dictionary for j in no_result: if i == j: del Compounds[i] len(Compounds) for i in Compounds: try: pcp.download( 'SDF', f'{i}.sdf', i, 'name', record_type='3d' ) #Download the .sdf format of the compounds for whom 3D structure is available except: print(i) no_result #Print out the compouds for which there was no result from pubchempy search
# Task 1 import pubchempy as pcp import sys from rdkit import Chem cids_str = sys.argv cids = [] # Convert input cid list to integer for cid in cids_str[1:]: cid = int(cid) cids.append(cid) # Download file in SDF format pcp.download('SDF', 'output.sdf', cids, 'cid', overwrite=True) # Download file in CSV format with some chosen features pcp.download('CSV', 'output.csv', cids, operation='property/\ MolecularFormula,\ MolecularWeight,\ CanonicalSMILES,\ IUPACName,\ XLogP,\ ExactMass,\ MonoisotopicMass,\ TPSA,\ Complexity,\
.format(x)) df = pd.read_csv('{}_swissADME.csv'.format(x)) count = 0 for i in range(len(df)): smiles = df['SMILES'][i] try: cid = pcp.get_compounds(identifier=smiles, namespace='smiles')[0].cid if type(df['Similar Molecule'][i]) == float: name = str(smiles) + '.sdf' else: name = str(df['Similar Molecule'][i]) + '.sdf' print(i, name) except: print('error: {}'.format(str(df['Similar Molecule'][i]))) continue try: pcp.download('SDF', name, cid, 'cid') except: print('error - Duplicate: {}'.format(df['Similar Molecule'][i])) continue #x = pcp.get_compounds(identifier = 'Nc1nc2n(COCCO)cnc2c(=O)[nH]1', # namespace = 'smiles')[0] #cid = x.cid #y = pcp.get_sdf(identifier = str(cid)) #pcp.download('SDF' , 'Acyclovir.sdf' , cid , 'cid')
def detect_fg(img_path=None,out_path = 'detected_img.png',save_file=False,threshold=0.50,save_fig=False,coumpound_id_number=None): script_start = time.time() #some initial stuff from prettytable import PrettyTable iupacname = '' # selecting random molecule if img_path is None if img_path==None: ## getting a random cid. cid = random.choice(range(random.choice(range(25,15000)),random.choice(range(15000,150000)))) c = pcp.Compound.from_cid(cid) ## some text printed print('No image path given, selecting a random molecule from pubchem with cid : ' +str(cid) +'\n\n Here are some details about the compound : \n') # the random molecule is downloaded. pcp.download('PNG', '/content/random_compound.png', cid, overwrite=True) # image path is set. img_path = '/content/random_compound.png' # table with chemical characteristics of the compound. table0 = PrettyTable(['Property','Value']) table0.add_row(['Molecular Formula',c.molecular_formula]) table0.add_row(['Molecular Weight',c.molecular_weight]) table0.add_row(['IUPAC name',textwrap.fill(c.iupac_name,25)]) table0.add_row(['Isomeric smiles',textwrap.fill(c.isomeric_smiles,25)]) print(table0) # if image path is not None, check if the file name only has a cid. if img_path.split('/')[-1].split('.')[0].isdigit() and coumpound_id_number==None: cid = int(img_path.split('/')[-1].split('.')[0]) print('Getting compound details for the cid number : '+str(cid)+' from PubChem.') c = pcp.Compound.from_cid(cid) table0 = PrettyTable(['Property','Value']) table0.add_row(['Molecular Formula',c.molecular_formula]) table0.add_row(['Molecular Weight',c.molecular_weight]) table0.add_row(['IUPAC name',textwrap.fill(c.iupac_name,25)]) table0.add_row(['Isomeric smiles',textwrap.fill(c.isomeric_smiles,25)]) print(table0) if coumpound_id_number !=None: cid = coumpound_id_number c = pcp.Compound.from_cid(cid) print('Compound details for cid number : '+str(cid)+' obtained from PubChem\n') table0 = PrettyTable(['Property','Value']) table0.add_row(['Molecular Formula',c.molecular_formula]) table0.add_row(['Molecular Weight',c.molecular_weight]) table0.add_row(['IUPAC name',textwrap.fill(c.iupac_name,25)]) table0.add_row(['Isomeric smiles',textwrap.fill(c.isomeric_smiles,25)]) print(table0) # starting detection print('\nDetecting on '+img_path.split('/')[-1]+'\n') img = keras.preprocessing.image.load_img(img_path, target_size=None) image_np = keras.preprocessing.image.img_to_array(img) # Convert image to grayscale image_np = np.tile( np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8) #generate the input tensor input_tensor = tf.convert_to_tensor( np.expand_dims(image_np, 0), dtype=tf.float32) #detect start_time = time.time() # start_time detections, predictions_dict, shapes = detect_fn(input_tensor) end_time = time.time() # end_time time_taken = end_time-start_time #making a copy of image label_id_offset = 1 image_np_with_detections = image_np.copy() # setting some arrays detection_scores = detections['detection_scores'][0] detection_classes = detections['detection_classes'][0] # Making a table of detections table1 = PrettyTable(['Functional Group', 'Confidence (%)', 'Bounding Box Coordinates']) for i in range(len(detection_scores)): if detection_scores[i].numpy() > threshold: table1.add_row([category_index[detection_classes[i].numpy()+1]['name'], detection_scores[i].numpy()*100, str(np.round(detections['detection_boxes'][0].numpy()[i]*100,3))]) # legend_array.append(category_index[detection_classes[i].numpy()+1]['name'] # +' : '+str(detection_scores[i].numpy()*100)) # print(category_index[detection_classes[i].numpy()+1]['name'] # +' : '+str(detection_scores[i].numpy()*100)+' : '+str(np.round(detections['detection_boxes'][0].numpy()[i]*100,3))) print(table1) #plotting img_detected = viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'][0].numpy(), (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), detections['detection_scores'][0].numpy(), category_index, use_normalized_coordinates=True, max_boxes_to_draw=200, min_score_thresh=threshold, agnostic_mode=False, ) if save_file==True: cv2.imwrite(out_path,img_detected) fig,ax = plt.subplots(1,2,figsize=(14,6)) ax[0].imshow(img) ax[0].set_title('Original Image') ax[1].imshow(image_np_with_detections) ax[1].set_title('Image with detections') if save_fig==True: fig.savefig(out_path) # plt.tight_layout() plt.show() script_end = time.time() print('Time taken for detection : '+str(round(time_taken,4))+'s\n') print('Time taken for whole script : '+str(round(script_end-script_start,4))+'s')
## GET TRAINING DATA FEATURES get_sdf = True predictor_dict = {} try: os.mkdir(PATH+'/temp_training_cpd_sdf/') except OSError: pass with open(_training_data) as fin: header = fin.readline().strip() for line in fin: line = line.strip() larray = line.split('\t') # print (line) if get_sdf: try: pcp.download('SDF', PATH+'/temp_training_cpd_sdf/{}.sdf'.format(larray[1]), larray[1], overwrite=True) predictor_dict.setdefault(larray[1], float(larray[2])) except (pcp.PubChemHTTPError, httplib.BadStatusLine, urllib2.URLError): print line + ' passed' pass ###MEDIAN, MEAN, AND STANDARD DEVIATION OF TRAINING VALUES median_value_training = statistics.median(predictor_dict.values()) mean_value_training = statistics.mean(predictor_dict.values()) stdev_value_training = statistics.stdev(predictor_dict.values()) print "The median value is: ", median_value_training print "The mean value is: ", mean_value_training print "The standard deviation value is: ", stdev_value_training
def get_mol_SDF(self, mol_name, path='.'): file_path = os.path.join(path, mol_name + '.sdf') pcp.download('SDF', file_path, mol_name, 'name', overwrite=True)
def load_img(self): img_path = f'/chemical_pics/{self.compound.cid}.png' if not os.path.isfile('./media' + img_path): pcp.download('PNG', './media' + img_path, self.compound.cid, 'cid') return img_path
import pubchempy as pcp from PIL import Image import numpy as np pcp.download('PNG', 'drug.png', 10096043, 'cid', overwrite=True) img = Image.open('drug.png').convert('L') img.save('./greyscale.png') pixels = np.array(img) pixels[pixels == 245] = 255 pixels[pixels < 245] = 0 pixels = pixels.flatten()
#print(cids) #c = pcp.Compound.from_cid(cids[0]) #structure = c.inchi #print(structure) for cmp in cmps: print(cmp) # We'll just grab the first cid cid = pcp.get_cids(cmp, 'name')[0] c = pcp.Compound.from_cid(cid) print(c.cid) pcp.download('PNG', 'images/' + cmp.replace(" ", "_") + '.png', c.cid, 'cid', overwrite=True) m = Chem.MolFromInchi(c.inchi) #atoms_list = list(m.GetAtoms()) #atoms = [] #for i in range(len(atoms_list)): # atoms.append(atoms_list[i]) #print("Atoms: ", atoms) print("Alcohol: ", id_fg.is_alcohol(m)) print("COOH: ", id_fg.is_cooh(m)) print("Ketone: ", id_fg.is_ketone(m)) print("Ether: ", id_fg.is_ether(m)) print('Ester: ', id_fg.is_ester(m)) print("Anhydride: ", id_fg.is_anhydride(m)) print("Aldehyde: ", id_fg.is_aldehyde(m))
""" Fix/clean the FAb apo protein and save it """ fixer = PDBFixer(PDB_DIR + '/' + SEED_PDB + '.fab.pdb') fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(True) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) with open(f'{PDB_DIR}/{SEED_PDB}.fab.fixed.pdb', 'w+') as outfile: PDBFile.writeFile(fixer.topology, fixer.positions, outfile) """ Download/save target ligand (PubChem CID: 2978) """ # cpd_2978 = pcp.Compound.from_cid(TARGET_CID) pcp.download( 'SDF', f'{SDF_DIR}/{TARGET_CID}.sdf', TARGET_CID, overwrite=True) """ Align target with 1MFA ligand by substructure match """ target_2978 = PandasTools.LoadSDF( f'{SDF_DIR}/{TARGET_CID}.sdf', smilesName='SMILES', molColName='Mol') molREFRC = AllChem.MolFromPDBFile(PDB_DIR + '/' + SEED_PDB + '.lig.pdb') molPROBE = Chem.AddHs(target_2978.Mol[0]) AllChem.EmbedMolecule(molPROBE) AllChem.UFFOptimizeMolecule(molPROBE) mols = [molREFRC, molPROBE] mcs = rdFMCS.FindMCS( mols, threshold=0.8, completeRingsOnly=True, ringMatchesRingOnly=True) mcsPattern = Chem.MolFromSmarts(mcs.smartsString)