Пример #1
0
def get_structure(code, cutoff, dir_search, BRENDA_PARSER):
    proteins = BRENDA_PARSER.get_proteins(code)
    substrate, counted = choose_substrate(proteins)

    if substrate == None:
        print("No suitable substrate found, skip..")
        return False
    #for s in counted:
    #print(str(counted[s]), " : \t", s )
    print("\nmost common: ", substrate)

    try:
        CID = pcp.get_cids(substrate, 'name', 'substance',
                           list_return='flat')[0]
    except:
        print("CID not found..")
        return False

    if cutoff:
        if check_size_of_substrate(CID, cutoff) == False:
            return False

    file = (f'{dir_search}/{CID}.json')
    #file = (f'{dir_search}/{str(substrate).strip()}.json')
    try:
        pcp.download('JSON', file, CID, 'cid')
    except:
        return False
    return True
Пример #2
0
def get_structure(code,cutoff,dir_search,BRENDA_PARSER):
    print("GET STRUCTURE FILES...")
    proteins = BRENDA_PARSER.get_proteins(code)
    substrate, counted = choose_substrate(proteins)
    #substrate = str(substrate).rstrip()
    if substrate == None:
        print("No suitable substrate found, skip..")
        return False
    for s in counted:
        print(str(counted[s]), " : \t", s )
    print("\nmost common: ", substrate)
    if substrate == "several substrates":
        print("\nProtein uses several substrates, skip it..")
        return False
    if substrate == "more":
        print("\nProtein uses several substrates, skip it..")
        return False
    if check_size_of_substrate(substrate,cutoff) == False:
        return False
    #file = (f'{dir_search}/{code}_{str(substrate).strip()}.json')
    file = (f'{dir_search}/{str(substrate).strip()}.json')

    try:
        pcp.download('JSON', file, substrate, 'name')
    except:
        print("substrate not found..")
        return False
    return True
Пример #3
0
def import_from_pubchem():

    compounds = read_csv()
    # cria uma planilha no mesmo local do arquivo .py
    workbook = xlsxwriter.Workbook(filename='to_database.xlsx')
    # cria uma aba
    worksheet = workbook.add_worksheet(name='results')
    row = 1

    print('\nEstabelecendo conexão com o PubChem...')
    # para cada composto na tabela
    for comp in compounds:

        # pega dados no pubChem
        results = pcp.get_compounds(comp[0], 'smiles')
        # baixa a imagem de composto
        pcp.download('PNG',
                     os.path.join(CURR_PATH, 'images', comp[0] + '.png'),
                     comp[0],
                     'smiles',
                     overwrite=True)

        # para cada resultado, escreve na planilha nova o SMILES, o aroma, o nome IUPAC e a fórmula molecular
        for c in results:

            print('\nComposto ' + c.iupac_name)
            worksheet.write(row, 0, comp[0])
            worksheet.write(row, 1, comp[1])
            worksheet.write(row, 2, c.iupac_name)
            worksheet.write(row, 3, c.molecular_formula)

            row += 1

    workbook.close()
    print('Pronto! Compostos Atualizados')
def getSMILES():
    f_r = open("drug_list.txt", "r")
    f_e = open("drug_miss.txt", "w")
    import pubchempy as pcp
    for line in f_r.readlines():
        items = line.strip().split("#")
        error = items[1]
        print("downloading " + items[1] + "  " + items[2])
        try:
            pcp.download('CSV',
                         "temp/" + items[1] + "-name.csv",
                         items[2],
                         'name',
                         operation='property/CanonicalSMILES')
        except:
            print("name " + items[2])
            error += " name " + items[2]
        try:
            pcp.download('CSV',
                         "temp/" + items[1] + "-cid.csv", [int(items[3])],
                         operation='property/CanonicalSMILES')
        except:
            print("cid " + items[3])
            error += ", \t\t cid " + items[3]
        if error != items[1]:
            f_e.write(error + "\n")
    f_e.close()
    f_r.close()
Пример #5
0
 def _obtain_entry_api(self, search_text, name,
                       output_format) -> Optional[str]:
     cid = None
     cids = pcp.get_cids(search_text, "name", record_type="3d")
     if len(cids) == 0:
         print("No exact match found, please try the web search")
     else:
         cid = str(cids[0])
         if output_format.lower() == "smiles":
             compound = pcp.Compound.from_cid(int(cid))
             print("SMILES code:", compound.canonical_smiles)
         elif output_format.lower() == "pdb":
             sdf_file = os.path.join(self.write_dir,
                                     name + "_" + cid + ".sdf")
             pdb_file = os.path.join(self.write_dir,
                                     name + "_" + cid + ".pdb")
             pcp.download("SDF",
                          sdf_file,
                          cid,
                          record_type="3d",
                          overwrite=True)
             sdf_to_pdb(sdf_file, pdb_file)
         else:
             pcp.download(
                 output_format.upper(),
                 os.path.join(
                     self.write_dir,
                     name + "_" + cid + "." + output_format.lower()),
                 cid,
                 record_type="3d",
                 overwrite=True,
             )
     return cid
Пример #6
0
def get_structure(BRENDA_PARSER, code, cutoff, dir_search):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    print("GET STRUCTURE FILES...")
    proteins = BRENDA_PARSER.get_proteins(code)
    substrate, counted = choose_substrate(proteins)
    if substrate == None:
        print("No suitable substrate found, skip..")
        return False
    for s in counted:
        print(str(counted[s]), " : \t", s)
    print("\nmost common: ", substrate)
    if substrate == "several substrates":
        print("\nProtein uses several substrates, skip it..")
        return False
    if substrate == "more":
        print("\nProtein uses several substrates, skip it..")
        return False
    if check_size_of_substrate(substrate, cutoff) == False:
        print('\nStrukture of Substrate is too big, skip it..')
        return False
    file = (f'{code}_{substrate}.json')
    try:
        pcp.download('JSON', file, substrate, 'name')
    except:
        print("substrate not found..")
        return False
    os.replace(f"{dir_path}/{file}", f"{dir_path}/{dir_search}/{file}")
    return True
Пример #7
0
def download_sdf_from_cid(cid, save_dir):
    try:
        save_file = "%s/CID_%d.sdf" % (save_dir, cid)
        if not os.path.exists(save_file):
            pcp.download('SDF', save_file, cid, 'cid')
        return
    except:
        return
 def img_downloader(cid, nsc):
     """
         download the png image from pubchem, the naming of the image: nsc_number.png
     """
     if (nsc != 'not_found'):
         pcp.download('PNG', 'graph/' + nsc + '.png', cid, overwrite=True)
         print("Item " + str(cnt[0]) + " is completed")
         cnt[0] += 1
Пример #9
0
def downloadData():
    i = CID_LOW
    ctr = 1
    while i <= CID_HIGH:
        pcp.download('CSV', '{}-{}.csv'.format(CSVFILE, ctr), range(i, i + 500), 
                operation = 'property/{}'.format(LABELS))
        print 'step {}/{}'.format(ctr, int((CID_HIGH - CID_LOW) / STEP))
        i += 500
        ctr += 1
Пример #10
0
 def _get_mol_image(self, mol_name, path='.', image_size=200):
     file_path = os.path.join(path, mol_name + '.png')
     image_size_str = str(image_size) + "x" + str(image_size)
     try:
         os.remove(mol_name + '.png')
     except:
         pass
     pcp.download('PNG',
                  file_path,
                  mol_name,
                  'name',
                  image_size=image_size_str)
Пример #11
0
def pubchem_cid_to_sdf(cid, cleanup_3d=True):
    """
    Go from pubmed CID to 
    """
    with tempfile.TemporaryDirectory() as tempdir:
        fname = f'{tempdir}/test.sdf'
        pcp.download('SDF', fname, cid, 'cid', overwrite=True)
        suppl = Chem.SDMolSupplier(fname, sanitize=True)
        mol = suppl[0]
        mol = Chem.AddHs(mol)
        if cleanup_3d:
            AllChem.EmbedMolecule(mol, AllChem.ETKDG())
        return mol
def getPubChemKey():
    f = open("smile_inconsistant.csv", "r")
    dbid = []
    cid1 = []
    cid2 = []
    for line in f.readlines():
        items = line.strip().split(",")
        dbid.append(items[0])
        cid1.append(items[1].strip("\""))
        cid2.append(items[2].strip("\""))
    import pubchempy as pcp
    pcp.download('CSV', 'name_keys.csv', cid1, operation='property/InChIKey')
    pcp.download('CSV', 'cid_keys.csv', cid2, operation='property/InChIKey')
Пример #13
0
def get_sdf(study_location, cid, iupac, sdf_file_list, final_inchi):
    if study_location and cid:
        if not iupac or len(iupac) < 1:
            iupac = 'no name given'
        classifyre_id = ''
        logger.info("    -- Getting SDF for CID " + str(cid) + " for name: " + iupac)
        print("    -- Getting SDF for CID " + str(cid) + " for name: " + iupac)
        file_name = str(cid) + ' - ' + iupac + '.sdf'
        pcp.download('SDF', study_location + '/' + file_name, cid, overwrite=True)

        if final_inchi:
            classifyre_id = classyfire(final_inchi)

        sdf_file_list.append([file_name, classifyre_id])

    return sdf_file_list
Пример #14
0
def pubchemsearch(ID, key):
    newdir = "/Users/ahmed.mahmoud/Documents/" + ID + "_" + str(i)
    os.makedirs(newdir, 0755)
    results = pcp.get_compounds(ID, key)
    print 'There are ' + str(len(results)) + " Hits That Match to " + ID + ": "
    print results
    count = 1
    for c in results:
        dash = "------------------------------------------\n"
        print "###########################################\n"
        print " Hit " + str(count) + ": SMILES Annotation for " + str(c) + "\n"
        print str(c.isomeric_smiles) + "\n"
        print dash
        print " Hit " + str(count) + ": Formula for " + str(c) + "\n"
        print str(c.molecular_formula) + "\n"
        print dash
        print " Hit " + str(count) + ": Weight for " + str(c) + "\n"
        print str(c.molecular_weight) + "\n"
        print dash
        print " Hit " + str(count) + ": IUPAC for " + str(c) + "\n"
        print str(c.iupac_name) + "\n"
        print dash
        print " Hit " + str(count) + ": Fingerprint for " + str(c) + "\n"
        print str(c.fingerprint) + "\n"
        print " Hit " + str(count) + ": Synonyms for " + str(c) + "\n"
        print str(c.synonyms) + "\n"
        print "###########################################\n"
        text = "###########################################\n" + " Hit " + str(
            count) + ": SMILES Annotation for " + str(c) + "\n" + str(
                c.isomeric_smiles
            ) + "\n" + dash + " Hit " + str(count) + ": Formula for " + str(
                c
            ) + "\n" + str(c.molecular_formula) + "\n" + dash + " Hit " + str(
                count) + ": Weight for " + str(c) + "\n" + str(
                    c.molecular_weight) + "\n" + dash + " Hit " + str(
                        count) + ": IUPAC for " + str(c) + "\n" + dash + str(
                            c.iupac_name) + "\n" + " Hit " + str(
                                count) + ": Fingerprint for " + str(
                                    c) + "\n" + str(
                                        c.fingerprint) + "\n" + " Hit " + str(
                                            count) + ": Synonyms for " + str(
                                                c) + "\n" + str(c.synonyms)
        with open(newdir + "/" + str(c) + ".txt", 'ab') as out:
            out.write(newdir + text)
            pcp.download('PNG', newdir + "/" + str(c) + '.png', ID, key)
        count = count + 1
Пример #15
0
    def download_files_rest(self, cids, pngs=True, download_parents=False):
        """
        Generalized function for downloading files (both SDF and PNG, for quick
        reference of structure and for full coordinate and bonding information),
        which calls either download_files_rest or download_files_pug, depending
        on if REST is being used.

        :param cids: A dict {"category": {id:[ids]}}, where each category is a
        molecular type of interest.
        :param pngs: If True, PNG files will be downloaded alongside SDF files.
        :param download_parents: If True, then files will be downloaded for
        parent molecules, in addition to the molecules returned from their
        queries.
        :return:
        """

        order = 0

        formats = ["SDF"]
        if pngs:
            formats.append("PNG")

        for cat in cids.keys():
            download_ids = []

            if self.sub_dirs is not None:
                cat_path = os.path.join(self.base_dir, self.sub_dirs[cat])
            else:
                cat_path = os.path.join(self.base_dir)

            for parent in cids[cat].keys():

                if download_parents:
                    download_ids.append(parent)

                for cid in cids[cat][parent]:
                    download_ids.append(cid)

            for format in formats:
                for cid in download_ids:
                    filename = str(cid) + "_" + str(
                        order) + "." + format.lower()
                    filepath = os.path.join(cat_path, filename)
                    pcp.download(format, filepath, cid, overwrite=True)
                    order += 1
Пример #16
0
def downloadPNG():
    f = open("data/SmileByName.txt", 'r')
    for line in f.readlines():
        items = line.strip().split(",")
        print(items[0])
        if items[1].strip() == '':
            continue
        pcp.download("PNG", "data/ByName/" + items[0].strip(), items[1].strip(), 'cid')
    f.close()
    print("ByName finished")
    f = open("data/SmileByCid.txt", 'r')
    for line in f.readlines():
        items = line.strip().split(",")
        print(items[0])
        if items[1].strip() == '':
            continue
        pcp.download("PNG", "data/ByCid/" + items[0].strip(), items[1].strip(), 'cid')
    f.close()
    print("ByCid finished")
Пример #17
0
def get_descriptors_pubchem(cid):
    c = pcp.get_compounds(cid)

    if not c:
        return 0

    sdf_pth = 'data/{}.sdf'.format(cid)

    if not os.path.exists(sdf_pth):
        pcp.download('SDF', sdf_pth, cid, 'cid')

    suppl = Chem.SDMolSupplier(sdf_pth)

    mol = next(suppl)

    d = run_all_functions_in_module(Descriptors, mol, ['PropertyFunctor'], '_')
    d.update(run_all_functions_in_module(Descriptors3D, mol, None, '_'))

    return d
Пример #18
0
def calculate_drug_pixel_data(drugs_with_smiles_file):
    print("Calculating pixel data...")
    drugs_with_smiles_df = pd.read_csv(drugs_with_smiles_file)
    cid_lst = list(drugs_with_smiles_df['cid'])
    pixels_dict = OrderedDict()
    for i in range(10000):
        pixels_dict[f'pixel{i}'] = []
    for cid in cid_lst:
        # Download the picture of the compound from PubChem
        pcp.download('PNG', 'drug.png', int(cid), 'cid', overwrite=True)
        # Convert to single-channel greyscale
        img = Image.open('drug.png').convert('L')
        # Get the pixel data as a numpy array
        pixels = np.array(img)
        # The background for these images is grey and not white
        # Turn all grey pixels into white pixels
        pixels[pixels == 245] = 255
        # Make any non-grey pixel completely black
        # This ensures that all atoms and bonds have the same pixel intensity
        pixels[pixels < 245] = 0
        # Downsample using antialiasing to 100 by 100 pixels
        img = Image.fromarray(pixels)
        img = img.resize((100, 100), Image.ANTIALIAS)
        # Grab pixel data again
        pixels = np.array(img)
        # Flatten
        pixels = pixels.flatten()
        # Scale
        pixels = scale_array(pixels)
        for i, pixel in enumerate(pixels):
            pixels_dict[f'pixel{i}'].append(pixel)

    drug_id_lst = list(drugs_with_smiles_df['drug_id'])
    pixels_dict['cid'] = cid_lst
    pixels_dict.move_to_end('cid', last=False)
    pixels_dict['drug_id'] = drug_id_lst
    pixels_dict.move_to_end('drug_id', last=False)
    drug_pixel_df = pd.DataFrame(data=pixels_dict)
    drug_pixel_df.to_csv('./Data/Clean/drug_pixels.csv', index=False)
Пример #19
0
import pandas as pd
import sys
import matplotlib.pyplot as plt
from functools import reduce
import pubchempy

# look for specific structures based on fingerprints

#import and drop bad columns
invitrodbv2_fp = pd.read_csv(
    '/home/rlougee/Desktop/primary_data/invitrodbv2_fullfp.tsv', sep='\t')
invitrodbv2_fp = invitrodbv2_fp.dropna(axis=1)


def smile_from_txp(txplist):
    str = """invitrodbv2_fp["""
    for i in txplist:
        str += "(invitrodbv2_fp['{}']==1)&".format(i)
        # print(str)
    str = str[:-1] + ']'
    return eval(str)


# print(invitrodbv2_fp.columns)
# print(smile_from_txp(['Txp-123', 'Txp-124']).columns)
for n, i in enumerate(smile_from_txp(['Txp-338'])['smiles']):
    pubchempy.download('png', '/home/rlougee/Desktop/CID_pix/{}.png'.format(n),
                       i, 'smiles')
Пример #20
0
            label)

    #save Ensembl protein ID (ENSP) for applying to node2vec
    with open(args.input + '/cv_' + str(i) + args.data + '_proIDs.txt',
              mode='w') as f:
        f.write('\n'.join(data['protein']))

    #save pubchem ID for applying to node2vec
    cid = np.array(data['chemical'], dtype='int32')
    np.save(args.input + '/cv_' + str(i) + args.data + '_chemIDs.npy', cid)

    #convert pubchem ID to CanonicalSMILES
    c_id = data.chemical.tolist()
    pcp.download('CSV',
                 args.input + '/cv_' + str(i) + '/ismilesref.csv',
                 c_id,
                 operation='property/IsomericSMILES',
                 overwrite=True)
    smileb = pd.read_csv(args.input + '/cv_' + str(i) + '/ismilesref.csv')
    smib = []
    for j in smileb['IsomericSMILES']:
        smib.append(
            Chem.MolToSmiles(Chem.MolFromSmiles(j),
                             kekuleSmiles=False,
                             isomericSmiles=True))
    with open(args.input + '/cv_' + str(i) + args.data + '.smiles',
              mode='w') as f:
        f.write('\n'.join(smib))
    #convert CanonicalSMILES to ecfp
    file_smiles = args.input + '/cv_' + str(i) + args.data + '.smiles'
    smi = Chem.SmilesMolSupplier(file_smiles, delimiter='\t', titleLine=False)
    a = pcp.get_compounds(i, 'name')  #Retrieve the PUBCHEM ID of the compounds
    Compounds[i] = a
len(Compounds)

no_result = []
for i in (Compounds):
    if (Compounds[i] == []):
        no_result.append(
            i)  #List of compounds for whom PUBCHEM IDs were not found

len(no_result)

for i in deque(
        Compounds.keys()
):  #Deque helps to remove elements from any part of the dictionary; remove the no_result IDs from original dictionary
    for j in no_result:
        if i == j:
            del Compounds[i]

len(Compounds)

for i in Compounds:
    try:
        pcp.download(
            'SDF', f'{i}.sdf', i, 'name', record_type='3d'
        )  #Download the .sdf format of the compounds for whom 3D structure is available
    except:
        print(i)

no_result  #Print out the compouds for which there was no result from pubchempy search
# Task 1

import pubchempy as pcp
import sys
from rdkit import Chem

cids_str = sys.argv
cids = []

# Convert input cid list to integer
for cid in cids_str[1:]:
    cid = int(cid)
    cids.append(cid)

# Download file in SDF format
pcp.download('SDF', 'output.sdf', cids, 'cid', overwrite=True)

# Download file in CSV format with some chosen features
pcp.download('CSV',
             'output.csv',
             cids,
             operation='property/\
MolecularFormula,\
MolecularWeight,\
CanonicalSMILES,\
IUPACName,\
XLogP,\
ExactMass,\
MonoisotopicMass,\
TPSA,\
Complexity,\
    .format(x))

df = pd.read_csv('{}_swissADME.csv'.format(x))
count = 0
for i in range(len(df)):
    smiles = df['SMILES'][i]
    try:
        cid = pcp.get_compounds(identifier=smiles, namespace='smiles')[0].cid
        if type(df['Similar Molecule'][i]) == float:
            name = str(smiles) + '.sdf'
        else:
            name = str(df['Similar Molecule'][i]) + '.sdf'
        print(i, name)
    except:
        print('error: {}'.format(str(df['Similar Molecule'][i])))
        continue

    try:
        pcp.download('SDF', name, cid, 'cid')
    except:
        print('error - Duplicate: {}'.format(df['Similar Molecule'][i]))
        continue

#x = pcp.get_compounds(identifier = 'Nc1nc2n(COCCO)cnc2c(=O)[nH]1',
#                      namespace = 'smiles')[0]
#cid = x.cid

#y = pcp.get_sdf(identifier = str(cid))

#pcp.download('SDF' , 'Acyclovir.sdf' , cid , 'cid')
Пример #24
0
def detect_fg(img_path=None,out_path = 'detected_img.png',save_file=False,threshold=0.50,save_fig=False,coumpound_id_number=None):
 
  script_start = time.time()
  
  #some initial stuff
  from prettytable import PrettyTable
  iupacname = ''
  
  # selecting random molecule if img_path is None
  if img_path==None:
 
    ## getting a random cid.
    cid = random.choice(range(random.choice(range(25,15000)),random.choice(range(15000,150000))))
    c = pcp.Compound.from_cid(cid)
    
    ## some text printed
    print('No image path given, selecting a random molecule from pubchem with cid : '
    +str(cid)
    +'\n\n Here are some details about the compound : \n')

    # the random molecule is downloaded.
    pcp.download('PNG', '/content/random_compound.png', cid, overwrite=True)

    # image path is set.
    img_path = '/content/random_compound.png'

    # table with chemical characteristics of the compound.
    table0 = PrettyTable(['Property','Value'])
    table0.add_row(['Molecular Formula',c.molecular_formula])
    table0.add_row(['Molecular Weight',c.molecular_weight])
    table0.add_row(['IUPAC name',textwrap.fill(c.iupac_name,25)])
    table0.add_row(['Isomeric smiles',textwrap.fill(c.isomeric_smiles,25)])
    print(table0)
  
  # if image path is not None, check if the file name only has a cid.
  if img_path.split('/')[-1].split('.')[0].isdigit() and coumpound_id_number==None:
    cid = int(img_path.split('/')[-1].split('.')[0])
    print('Getting compound details for the cid number : '+str(cid)+' from PubChem.')
    c = pcp.Compound.from_cid(cid)
    
    table0 = PrettyTable(['Property','Value'])
    table0.add_row(['Molecular Formula',c.molecular_formula])
    table0.add_row(['Molecular Weight',c.molecular_weight])
    table0.add_row(['IUPAC name',textwrap.fill(c.iupac_name,25)])
    table0.add_row(['Isomeric smiles',textwrap.fill(c.isomeric_smiles,25)])
    print(table0)
  
  if coumpound_id_number !=None:
    cid = coumpound_id_number
    c = pcp.Compound.from_cid(cid)
    print('Compound details for cid number : '+str(cid)+' obtained from PubChem\n')
    table0 = PrettyTable(['Property','Value'])
    table0.add_row(['Molecular Formula',c.molecular_formula])
    table0.add_row(['Molecular Weight',c.molecular_weight])
    table0.add_row(['IUPAC name',textwrap.fill(c.iupac_name,25)])
    table0.add_row(['Isomeric smiles',textwrap.fill(c.isomeric_smiles,25)])
    print(table0)

  
  # starting detection
  
  
  print('\nDetecting on '+img_path.split('/')[-1]+'\n')
  
  img = keras.preprocessing.image.load_img(img_path, target_size=None)
  image_np = keras.preprocessing.image.img_to_array(img)

  # Convert image to grayscale

  image_np = np.tile(
      np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8)
  
  #generate the input tensor
  input_tensor = tf.convert_to_tensor(
      np.expand_dims(image_np, 0), dtype=tf.float32)

  #detect
  start_time = time.time() # start_time
  detections, predictions_dict, shapes = detect_fn(input_tensor)
  end_time = time.time() # end_time

  time_taken = end_time-start_time
  #making a copy of image
  label_id_offset = 1
  image_np_with_detections = image_np.copy()

  # setting some arrays
  detection_scores = detections['detection_scores'][0]
  detection_classes = detections['detection_classes'][0]
  
  # Making a table of detections
  table1 = PrettyTable(['Functional Group',
                        'Confidence (%)',
                        'Bounding Box Coordinates'])
  
  for i in range(len(detection_scores)):
    if detection_scores[i].numpy() > threshold:
      table1.add_row([category_index[detection_classes[i].numpy()+1]['name'],
                      detection_scores[i].numpy()*100,
                      str(np.round(detections['detection_boxes'][0].numpy()[i]*100,3))])
      
      # legend_array.append(category_index[detection_classes[i].numpy()+1]['name']
      #       +' : '+str(detection_scores[i].numpy()*100))
      # print(category_index[detection_classes[i].numpy()+1]['name']
      #       +' : '+str(detection_scores[i].numpy()*100)+' : '+str(np.round(detections['detection_boxes'][0].numpy()[i]*100,3)))
  
  print(table1)   
  
  
  #plotting
  img_detected = viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detections,
        detections['detection_boxes'][0].numpy(),
        (detections['detection_classes'][0].numpy() + label_id_offset).astype(int),
        detections['detection_scores'][0].numpy(),
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=200,
        min_score_thresh=threshold,
        agnostic_mode=False,
  )

  if save_file==True:
    cv2.imwrite(out_path,img_detected)
  
  fig,ax = plt.subplots(1,2,figsize=(14,6))
  ax[0].imshow(img)
  ax[0].set_title('Original Image')
  ax[1].imshow(image_np_with_detections)
  ax[1].set_title('Image with detections')
  if save_fig==True:
    fig.savefig(out_path)
  # plt.tight_layout()
  plt.show()
  script_end = time.time()
  print('Time taken for detection : '+str(round(time_taken,4))+'s\n')
  print('Time taken for whole script : '+str(round(script_end-script_start,4))+'s')
Пример #25
0
## GET TRAINING DATA FEATURES
get_sdf = True
predictor_dict = {}
try:
    os.mkdir(PATH+'/temp_training_cpd_sdf/')
except OSError:
    pass
with open(_training_data) as fin:
    header = fin.readline().strip()
    for line in fin:
        line = line.strip()
        larray = line.split('\t')
        # print (line)
        if get_sdf:
            try:
                pcp.download('SDF', PATH+'/temp_training_cpd_sdf/{}.sdf'.format(larray[1]), larray[1], overwrite=True)
                predictor_dict.setdefault(larray[1], float(larray[2]))
            except (pcp.PubChemHTTPError, httplib.BadStatusLine, urllib2.URLError):
                print line + ' passed'
                pass

###MEDIAN, MEAN, AND STANDARD DEVIATION OF TRAINING VALUES
median_value_training = statistics.median(predictor_dict.values())
mean_value_training = statistics.mean(predictor_dict.values()) 
stdev_value_training = statistics.stdev(predictor_dict.values()) 

print "The median value is: ", median_value_training 
print "The mean value is: ", mean_value_training 
print "The standard deviation value is: ", stdev_value_training

Пример #26
0
 def get_mol_SDF(self, mol_name, path='.'):
     file_path = os.path.join(path, mol_name + '.sdf')
     pcp.download('SDF', file_path, mol_name, 'name', overwrite=True)
Пример #27
0
 def load_img(self):
     img_path = f'/chemical_pics/{self.compound.cid}.png'
     if not os.path.isfile('./media' + img_path):
         pcp.download('PNG', './media' + img_path, self.compound.cid, 'cid')
     return img_path
Пример #28
0
import pubchempy as pcp
from PIL import Image
import numpy as np

pcp.download('PNG', 'drug.png', 10096043, 'cid', overwrite=True)
img = Image.open('drug.png').convert('L')
img.save('./greyscale.png')
pixels = np.array(img)
pixels[pixels == 245] = 255
pixels[pixels < 245] = 0
pixels = pixels.flatten()

Пример #29
0
#print(cids)

#c = pcp.Compound.from_cid(cids[0])

#structure = c.inchi
#print(structure)

for cmp in cmps:
    print(cmp)
    # We'll just grab the first cid
    cid = pcp.get_cids(cmp, 'name')[0]
    c = pcp.Compound.from_cid(cid)
    print(c.cid)
    pcp.download('PNG',
                 'images/' + cmp.replace(" ", "_") + '.png',
                 c.cid,
                 'cid',
                 overwrite=True)
    m = Chem.MolFromInchi(c.inchi)
    #atoms_list = list(m.GetAtoms())
    #atoms = []
    #for i in range(len(atoms_list)):
    #    atoms.append(atoms_list[i])
    #print("Atoms: ", atoms)
    print("Alcohol: ", id_fg.is_alcohol(m))
    print("COOH: ", id_fg.is_cooh(m))
    print("Ketone: ", id_fg.is_ketone(m))
    print("Ether: ", id_fg.is_ether(m))
    print('Ester: ', id_fg.is_ester(m))
    print("Anhydride: ", id_fg.is_anhydride(m))
    print("Aldehyde: ", id_fg.is_aldehyde(m))
Пример #30
0
""" Fix/clean the FAb apo protein and save it """
fixer = PDBFixer(PDB_DIR + '/' + SEED_PDB + '.fab.pdb')
fixer.findMissingResidues()
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()
fixer.removeHeterogens(True)
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.0)

with open(f'{PDB_DIR}/{SEED_PDB}.fab.fixed.pdb', 'w+') as outfile:
    PDBFile.writeFile(fixer.topology, fixer.positions, outfile)

""" Download/save target ligand (PubChem CID: 2978) """
# cpd_2978 = pcp.Compound.from_cid(TARGET_CID)
pcp.download(
    'SDF', f'{SDF_DIR}/{TARGET_CID}.sdf', TARGET_CID, overwrite=True)

""" Align target with 1MFA ligand by substructure match """
target_2978 = PandasTools.LoadSDF(
    f'{SDF_DIR}/{TARGET_CID}.sdf', smilesName='SMILES', molColName='Mol')

molREFRC = AllChem.MolFromPDBFile(PDB_DIR + '/' + SEED_PDB + '.lig.pdb')
molPROBE = Chem.AddHs(target_2978.Mol[0])
AllChem.EmbedMolecule(molPROBE)
AllChem.UFFOptimizeMolecule(molPROBE)

mols = [molREFRC, molPROBE]
mcs = rdFMCS.FindMCS(
    mols, threshold=0.8, completeRingsOnly=True, ringMatchesRingOnly=True)
mcsPattern = Chem.MolFromSmarts(mcs.smartsString)