class TestChebiEntity(unittest.TestCase): '''COMMENT''' def setUp(self): '''COMMENT''' self.__existing = ChebiEntity('4167') self.__secondary = ChebiEntity('CHEBI:5585') def test_get_non_existing(self): '''COMMENT''' self.assertRaises(ChebiException, ChebiEntity, '-1') def test_get_id_existing(self): '''COMMENT''' self.assertTrue(self.__existing.get_id() == 'CHEBI:4167') def test_get_id_secondary(self): '''COMMENT''' self.assertTrue(self.__secondary.get_id() == 'CHEBI:5585') def test_get_formulae_existing(self): '''COMMENT''' this_formula = Formula('C6H12O6', 'KEGG COMPOUND') self.assertTrue(this_formula in self.__existing.get_formulae()) def test_get_formulae_secondary(self): '''COMMENT''' this_formula = Formula('H2O', 'ChEBI') self.assertTrue(this_formula in self.__secondary.get_formulae()) def test_get_formula_existing(self): '''COMMENT''' self.assertTrue(self.__existing.get_formula() == 'C6H12O6') def test_get_formula_secondary(self): '''COMMENT''' self.assertTrue(self.__secondary.get_formula() == 'H2O') def test_get_mass_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_mass(), 180.15588) def test_get_mass_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_mass(), 18.01530) def test_get_charge_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_charge(), 0) def test_get_charge_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_charge(), 0) def test_get_charge_secondary2(self): '''COMMENT''' self.assertEquals(-2, ChebiEntity('43474').get_charge()) def test_get_comments_existing(self): '''COMMENT''' this_chebi_entity = ChebiEntity('29044') this_comment = Comment('General', 'General', 'The substituent name \'3-oxoprop-2-enyl\' is ' 'incorrect but is used in various databases.', datetime.datetime.strptime('2005-03-18', '%Y-%M-%d')) self.assertTrue(this_comment in this_chebi_entity.get_comments()) def test_get_comments_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('11505') this_comment = Comment('General', 'General', 'The substituent name \'3-oxoprop-2-enyl\' is ' 'incorrect but is used in various databases.', datetime.datetime.strptime('2005-03-18', '%Y-%M-%d')) self.assertTrue(this_comment in this_chebi_entity.get_comments()) def test_get_source_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_source(), 'KEGG COMPOUND') def test_get_source_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_source(), 'KEGG COMPOUND') def test_get_prnt_id_existing(self): '''COMMENT''' self.assertIsNone(self.__existing.get_parent_id()) def test_get_prnt_id_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_parent_id(), 'CHEBI:15377') def test_get_name_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_name(), 'D-glucopyranose') def test_get_name_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_name(), 'water') def test_get_definition_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_definition(), 'A glucopyranose having D-configuration.') def test_get_definition_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('41140') self.assertEqual(this_chebi_entity.get_definition(), 'D-Glucopyranose with beta configuration at the ' 'anomeric centre.') def test_get_mod_on_existing(self): '''COMMENT''' self.assertTrue(self.__existing.get_modified_on() > datetime.datetime.strptime('2014-01-01', '%Y-%M-%d')) def test_get_mod_on_secondary(self): '''COMMENT''' self.assertIsNotNone(self.__secondary.get_modified_on()) def test_get_created_by_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_created_by(), 'CHEBI') def test_get_created_by_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_created_by(), 'ops$mennis') def test_get_star_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_star(), 3) def test_get_star_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_star(), 3) def test_get_db_acc_existing(self): '''COMMENT''' dat_acc = DatabaseAccession('MetaCyc accession', 'D-Glucose', 'MetaCyc') self.assertTrue(dat_acc in self.__existing.get_database_accessions()) def test_get_db_acc_secondary(self): '''COMMENT''' dat_acc = DatabaseAccession('MetaCyc accession', 'WATER', 'MetaCyc') self.assertTrue(dat_acc in self.__secondary.get_database_accessions()) def test_get_inchi_existing(self): '''COMMENT''' inchi = 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/' + \ 'h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1' self.assertEqual(self.__existing.get_inchi(), inchi) def test_get_inchi_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_inchi(), 'InChI=1S/H2O/h1H2') def test_get_inchi_key_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_inchi_key(), 'WQZGKKKJIJFFOK-GASJEMHNSA-N') def test_get_inchi_key_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_inchi_key(), 'XLYOFNOQVPJJNP-UHFFFAOYSA-N') def test_get_smiles_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_smiles(), 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O') def test_get_smiles_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_smiles(), '[H]O[H]') def test_get_mol_existing(self): '''COMMENT''' chebi_id = 73938 this_chebi_entity = ChebiEntity(str(chebi_id)) self.assertEqual(this_chebi_entity.get_mol(), _read_mol_file(chebi_id)) def test_get_mol_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_mol(), _read_mol_file(15377)) def test_get_mol_file_existing(self): '''COMMENT''' chebi_id = 73938 self.__get_mol_file(chebi_id, chebi_id) def test_get_mol_file_secondary(self): '''COMMENT''' read_id = 15377 retrieved_id = 42857 self.__get_mol_file(read_id, retrieved_id) def test_get_names_existing(self): '''COMMENT''' this_name = Name('Grape sugar', 'SYNONYM', 'KEGG COMPOUND', False, 'en') self.assertTrue(this_name in self.__existing.get_names()) def test_get_names_secondary(self): '''COMMENT''' this_name = Name('eau', 'SYNONYM', 'ChEBI', False, 'fr') self.assertTrue(this_name in self.__secondary.get_names()) def test_get_references_existing(self): '''COMMENT''' this_chebi_entity = ChebiEntity('15347') this_reference = Reference('WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID ' 'AND RELATED COMPOUNDS') self.assertTrue(this_reference in this_chebi_entity.get_references()) def test_get_references_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('22182') this_reference = Reference('WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID ' 'AND RELATED COMPOUNDS') self.assertTrue(this_reference in this_chebi_entity.get_references()) def test_get_cmp_orig_existing(self): '''COMMENT''' this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606', None, None, None, None, 'DOI', '10.1038/nbt.2488', None) self.assertTrue(this_compound_origin in self.__existing.get_compound_origins()) def test_get_cmp_orig_secondary(self): '''COMMENT''' this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606', None, None, None, None, 'DOI', '10.1038/nbt.2488', None) self.assertTrue(this_compound_origin in self.__secondary.get_compound_origins()) def test_get_out_existing(self): '''COMMENT''' this_relation = Relation('is_a', '17634', 'C') self.assertTrue(this_relation in self.__existing.get_outgoings()) def test_get_out_secondary(self): '''COMMENT''' this_relation = Relation('has_role', 'CHEBI:48360', 'C') self.assertTrue(this_relation in self.__secondary.get_outgoings()) def test_get_in_existing(self): '''COMMENT''' this_relation = Relation('has_functional_parent', 'CHEBI:15866', 'C') self.assertTrue(this_relation in self.__existing.get_incomings()) def test_get_in_secondary(self): '''COMMENT''' this_relation = Relation('is_conjugate_acid_of', '29412', 'C') self.assertTrue(this_relation in self.__secondary.get_incomings()) def __get_mol_file(self, read_id, retrieved_id): '''COMMENT''' mol_read = _read_mol_file(read_id) this_chebi_entity = ChebiEntity(str(retrieved_id)) textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r') mol_retrieved = textfile_retrieved.read() textfile_retrieved.close() self.assertEquals(mol_read, mol_retrieved)
def get_cmpd_information(molec): """Get information from CHEBI Database of a compound from CHEBI ID. Online using libChEBIpy (https://github.com/libChEBI/libChEBIpy) """ if molec.chebiID is None and molec.iupac_name is not None: # try one more time for chebi ID chebiID = get_chebiID(mol_name=molec.name, iupac_name=molec.iupac_name) if chebiID is None: print('cannot get structure from chebi') return None molec.chebiID = [chebiID] # at this point, molec.chebiID will be a list - iterarte over it # the iteration stops if any CHEBI ID produces a structure for CID in molec.chebiID: if CID == '' or ' ' in CID or 'null' in CID: print(CID, '- not a real CHEBI ID') continue # get entity with chebiID entity = ChebiEntity(CID) # check for parent ID entity, CID = convert_entity_to_parent(entity, ID=CID, CID=CID) # attemp to get structure # SMILES smile = entity.get_smiles() print('libchebipy result:', smile) if smile is not None: rdkitmol = Chem.MolFromSmiles(smile) if rdkitmol is None: print('structure could not be deciphered') molec.SMILES = smile molec.mol = None continue else: rdkitmol.Compute2DCoords() molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif smile is None: print('molecule does not have recorded structure in CHEBI DB') print('probably a generic structure - skipping.') molec.SMILES = smile molec.mol = None continue # set passed = True if this chebi ID produced a structure # would not get up to this point if it didnt # if not CIDs pass then the chebiIDs remain a list and will # fail the # next step passed = True # set molecule properties if passed: molec.chebiID = CID molec.DB_ID = CID # save InChiKey iKEY = entity.get_inchi_key() if iKEY is not None: molec.InChiKey = iKEY # save inchi inchi = entity.get_inchi() if inchi is not None: molec.InChi = inchi # set name if name is only a code at this point try: if molec.change_name is True: molec.name = entity.get_name() molec.change_name = False except AttributeError: molec.change_name = False return None
def get_cmpd_information_offline(molec): """Get information from CHEBI Database of a compound from CHEBI ID. Done Offline unless necessary. molec must have attribute 'chebiID' as integer. """ DB_prop = DB_functions.get_DB_prop('CHEBI') compounds_file = DB_prop[0] + DB_prop[1]['cmpds_file'] names_file = DB_prop[0] + DB_prop[1]['names_file'] structures_file = DB_prop[0] + DB_prop[1]['strct_file'] # set name by searching compound file res = search_for_compound_by_id(compounds_file, molec.chebiID) if res is None: print('chebiID not found:', molec.chebiID) print('no match in DB - ' 'this should not happen for CHEBI ID search') print('check this!') print('Exitting....') import sys sys.exit() else: ID, parent_id, name, star = res molec.name = name molec.change_name = False # make sure is parent compound if parent_id != 'null': res = convert_nameID_to_parent(compounds_file, nameID=ID) if res is None: print("this should not happen - error with cross reference") print('check this!') print('Exitting....') import sys sys.exit() ID, parent_id, name, star = res molec.name = name molec.change_name = False molec.chebiID = int(ID) # get structure using CHEBI ID # structures.csv - read in, get COMPOUND ID match then extract the # get SMILES structure, s_type = get_structure(structures_file, molec.chebiID) print(structure, s_type) if structure is not None: # is structure a MolBlock or Smiles if s_type == 'mol': # convert structure to SMILEs rdkitmol = Chem.MolFromMolBlock(structure) if rdkitmol is None: print('structure could not be deciphered') smile = None molec.SMILES = smile molec.mol = None print('probably a polymeric structure - skipping.') else: rdkitmol.Compute2DCoords() smile = Chem.MolToSmiles(rdkitmol) molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif s_type == 'SMILES': smile = structure rdkitmol = Chem.MolFromSmiles(smile) if rdkitmol is None: print('structure could not be deciphered') molec.SMILES = smile molec.mol = None else: rdkitmol.Compute2DCoords() molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif s_type == 'InChI': rdkitmol = Chem.MolFromInchi(structure) rdkitmol.Compute2DCoords() smile = Chem.MolToSmiles(rdkitmol) molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif s_type == 'InChIKey': rdkitmol = Chem.MolFromInchi(structure) rdkitmol.Compute2DCoords() smile = None molec.SMILES = smile molec.mol = None print('molecule given as InChIKey - ambiguous') print('probably a generic structure - skipping.') else: # try using the CHEBI API # libChEBIpy (https://github.com/libChEBI/libChEBIpy) print('testing libchebipy...') entity = ChebiEntity(molec.chebiID) smile = entity.get_smiles() print('libchebipy result:', smile) if smile is not None: rdkitmol = Chem.MolFromSmiles(smile) if rdkitmol is None: print('structure could not be deciphered') molec.SMILES = smile molec.mol = None else: rdkitmol.Compute2DCoords() molec.SMILES = smile # remove molecules with generalised atoms if '*' in smile: molec.mol = None else: molec.mol = rdkitmol elif smile is None: molec.SMILES = smile molec.mol = None print('molecule does not have recorded structure in CHEBI DB') print('probably a generic structure - skipping.') # save InChiKey iKEY = entity.get_inchi_key() if iKEY is not None: molec.InChiKey = iKEY
class TestChebiEntity(unittest.TestCase): '''COMMENT''' def setUp(self): '''COMMENT''' self.__existing = ChebiEntity('4167') self.__secondary = ChebiEntity('CHEBI:5585') def test_get_non_existing(self): '''COMMENT''' self.assertRaises(ChebiException, ChebiEntity, '-1') def test_get_id_existing(self): '''COMMENT''' self.assertTrue(self.__existing.get_id() == 'CHEBI:4167') def test_get_id_secondary(self): '''COMMENT''' self.assertTrue(self.__secondary.get_id() == 'CHEBI:5585') def test_get_formulae_existing(self): '''COMMENT''' this_formula = Formula('C6H12O6', 'KEGG COMPOUND') self.assertTrue(this_formula in self.__existing.get_formulae()) def test_get_formulae_secondary(self): '''COMMENT''' this_formula = Formula('H2O', 'ChEBI') self.assertTrue(this_formula in self.__secondary.get_formulae()) def test_get_formula_existing(self): '''COMMENT''' self.assertTrue(self.__existing.get_formula() == 'C6H12O6') def test_get_formula_secondary(self): '''COMMENT''' self.assertTrue(self.__secondary.get_formula() == 'H2O') def test_get_mass_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_mass(), 180.15588) def test_get_mass_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_mass(), 18.01530) def test_get_charge_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_charge(), 0) def test_get_charge_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_charge(), 0) def test_get_charge_secondary2(self): '''COMMENT''' self.assertEquals(-2, ChebiEntity('43474').get_charge()) def test_get_comments_existing(self): '''COMMENT''' this_chebi_entity = ChebiEntity('29044') this_comment = Comment( 'General', 'General', 'The substituent name \'3-oxoprop-2-enyl\' is ' 'incorrect but is used in various databases.', datetime.datetime.strptime('2005-03-18', '%Y-%M-%d')) self.assertTrue(this_comment in this_chebi_entity.get_comments()) def test_get_comments_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('11505') this_comment = Comment( 'General', 'General', 'The substituent name \'3-oxoprop-2-enyl\' is ' 'incorrect but is used in various databases.', datetime.datetime.strptime('2005-03-18', '%Y-%M-%d')) self.assertTrue(this_comment in this_chebi_entity.get_comments()) def test_get_source_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_source(), 'KEGG COMPOUND') def test_get_source_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_source(), 'KEGG COMPOUND') def test_get_prnt_id_existing(self): '''COMMENT''' self.assertIsNone(self.__existing.get_parent_id()) def test_get_prnt_id_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_parent_id(), 'CHEBI:15377') def test_get_name_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_name(), 'D-glucopyranose') def test_get_name_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_name(), 'water') def test_get_definition_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_definition(), 'A glucopyranose having D-configuration.') def test_get_definition_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('41140') self.assertEqual( this_chebi_entity.get_definition(), 'D-Glucopyranose with beta configuration at the ' 'anomeric centre.') def test_get_mod_on_existing(self): '''COMMENT''' self.assertTrue(self.__existing.get_modified_on() > datetime.datetime.strptime('2014-01-01', '%Y-%M-%d')) def test_get_mod_on_secondary(self): '''COMMENT''' self.assertIsNotNone(self.__secondary.get_modified_on()) def test_get_created_by_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_created_by(), 'CHEBI') def test_get_created_by_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_created_by(), 'ops$mennis') def test_get_star_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_star(), 3) def test_get_star_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_star(), 3) def test_get_db_acc_existing(self): '''COMMENT''' dat_acc = DatabaseAccession('MetaCyc accession', 'D-Glucose', 'MetaCyc') self.assertTrue(dat_acc in self.__existing.get_database_accessions()) def test_get_db_acc_secondary(self): '''COMMENT''' dat_acc = DatabaseAccession('MetaCyc accession', 'WATER', 'MetaCyc') self.assertTrue(dat_acc in self.__secondary.get_database_accessions()) def test_get_inchi_existing(self): '''COMMENT''' inchi = 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/' + \ 'h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1' self.assertEqual(self.__existing.get_inchi(), inchi) def test_get_inchi_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_inchi(), 'InChI=1S/H2O/h1H2') def test_get_inchi_key_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_inchi_key(), 'WQZGKKKJIJFFOK-GASJEMHNSA-N') def test_get_inchi_key_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_inchi_key(), 'XLYOFNOQVPJJNP-UHFFFAOYSA-N') def test_get_smiles_existing(self): '''COMMENT''' self.assertEqual(self.__existing.get_smiles(), 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O') def test_get_smiles_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_smiles(), '[H]O[H]') def test_get_mol_existing(self): '''COMMENT''' chebi_id = 73938 this_chebi_entity = ChebiEntity(str(chebi_id)) self.assertEqual(this_chebi_entity.get_mol(), _read_mol_file(chebi_id)) def test_get_mol_secondary(self): '''COMMENT''' self.assertEqual(self.__secondary.get_mol(), _read_mol_file(15377)) def test_get_mol_file_existing(self): '''COMMENT''' chebi_id = 73938 self.__get_mol_file(chebi_id, chebi_id) def test_get_mol_file_secondary(self): '''COMMENT''' read_id = 15377 retrieved_id = 42857 self.__get_mol_file(read_id, retrieved_id) def test_get_names_existing(self): '''COMMENT''' this_name = Name('Grape sugar', 'SYNONYM', 'KEGG COMPOUND', False, 'en') self.assertTrue(this_name in self.__existing.get_names()) def test_get_names_secondary(self): '''COMMENT''' this_name = Name('eau', 'SYNONYM', 'ChEBI', False, 'fr') self.assertTrue(this_name in self.__secondary.get_names()) def test_get_references_existing(self): '''COMMENT''' this_chebi_entity = ChebiEntity('15347') this_reference = Reference( 'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID ' 'AND RELATED COMPOUNDS') self.assertTrue(this_reference in this_chebi_entity.get_references()) def test_get_references_secondary(self): '''COMMENT''' this_chebi_entity = ChebiEntity('22182') this_reference = Reference( 'WO2006008754', 'Patent', '', 'NOVEL INTERMEDIATES FOR LINEZOLID ' 'AND RELATED COMPOUNDS') self.assertTrue(this_reference in this_chebi_entity.get_references()) def test_get_cmp_orig_existing(self): '''COMMENT''' this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606', None, None, None, None, 'DOI', '10.1038/nbt.2488', None) self.assertTrue( this_compound_origin in self.__existing.get_compound_origins()) def test_get_cmp_orig_secondary(self): '''COMMENT''' this_compound_origin = CompoundOrigin('H**o sapiens', 'NCBI:9606', None, None, None, None, 'DOI', '10.1038/nbt.2488', None) self.assertTrue( this_compound_origin in self.__secondary.get_compound_origins()) def test_get_out_existing(self): '''COMMENT''' this_relation = Relation('is_a', '17634', 'C') self.assertTrue(this_relation in self.__existing.get_outgoings()) def test_get_out_secondary(self): '''COMMENT''' this_relation = Relation('has_role', 'CHEBI:48360', 'C') self.assertTrue(this_relation in self.__secondary.get_outgoings()) def test_get_in_existing(self): '''COMMENT''' this_relation = Relation('has_functional_parent', 'CHEBI:15866', 'C') self.assertTrue(this_relation in self.__existing.get_incomings()) def test_get_in_secondary(self): '''COMMENT''' this_relation = Relation('is_conjugate_acid_of', '29412', 'C') self.assertTrue(this_relation in self.__secondary.get_incomings()) def __get_mol_file(self, read_id, retrieved_id): '''COMMENT''' mol_read = _read_mol_file(read_id) this_chebi_entity = ChebiEntity(str(retrieved_id)) textfile_retrieved = open(this_chebi_entity.get_mol_filename(), 'r') mol_retrieved = textfile_retrieved.read() textfile_retrieved.close() self.assertEquals(mol_read, mol_retrieved)
def hier_name_search(molecule, property, option=False): """Search for molecule property in PUBCHEM using a hierarchy of name spaces Order: 1 - pubchem ID 2 - KEGG ID 3 - chebiID 4 - chebiID to InChIKey 5 - IUPAC name 6 - name Properties: CanononicalSMILES IUPACName XLogP complexity InChiKey Tutorial: https://pubchemdocs.ncbi.nlm.nih.gov/ pug-rest-tutorial$_Toc458584413 """ QUERY_URL = ( 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' ) try: if molecule.PubChemID is not None: QUERY_URL_fin = QUERY_URL + molecule.PubChemID QUERY_URL_fin += '/property/'+property+'/TXT' result = run_request(query=QUERY_URL_fin) if result is not None: print('passed pubchemID') return result except (AttributeError, ValueError): print('failed pubchemID') pass try: if molecule.KEGG_ID is not None: QUERY_URL_fin = QUERY_URL + molecule.KEGG_ID QUERY_URL_fin += '/property/'+property+'/TXT' result = run_request(query=QUERY_URL_fin) if result is not None: print('passed KEGG ID') return result except (AttributeError, ValueError): print('failed KEGG ID') pass try: if molecule.chebiID is not None: QUERY_URL_fin = QUERY_URL + 'chebi:'+molecule.chebiID QUERY_URL_fin += '/property/'+property+'/TXT' result = run_request(query=QUERY_URL_fin) if result is not None: print('passed chebiID') return result except (AttributeError, ValueError): print('failed chebiID') pass try: if molecule.chebiID is not None: if molecule.InChiKey is None: # try using the CHEBI API # libChEBIpy (https://github.com/libChEBI/libChEBIpy) print('using libchebipy to get InChiKey...') from libchebipy import ChebiEntity entity = ChebiEntity(molecule.chebiID) iKEY = entity.get_inchi_key() print(iKEY) else: iKEY = molecule.InChiKey QUERY_URL = ( 'https://pubchem.ncbi.nlm.nih.gov/rest/' 'pug/compound/inchikey/' ) QUERY_URL_fin = QUERY_URL + iKEY QUERY_URL_fin += '/property/'+property+'/TXT' result = run_request(query=QUERY_URL_fin) if result is not None: print('passed chebiID/inchiKey') return result except (AttributeError, ValueError): print('failed chebiID/inchiKey') pass try: if molecule.InChiKey is not None: iKEY = molecule.InChiKey QUERY_URL = ( 'https://pubchem.ncbi.nlm.nih.gov/rest/' 'pug/compound/inchikey/' ) QUERY_URL_fin = QUERY_URL + iKEY QUERY_URL_fin += '/property/'+property+'/TXT' result = run_request(query=QUERY_URL_fin) if result is not None: print('passed inchiKey') return result except (AttributeError, ValueError): print('failed inchiKey') pass try: if molecule.iupac_name is not None: QUERY_URL = ( 'https://pubchem.ncbi.nlm.nih.gov/rest/pug' '/compound/name/' ) QUERY_URL_fin = QUERY_URL + molecule.iupac_name QUERY_URL_fin += '/property/'+property+'/TXT' if property == 'CanonicalSMILES': result = run_request(query=QUERY_URL_fin, smiles=True) if type(result) == tuple: # handle new line errors in SMILES text, boolean = result if boolean is True: # pick the uncharged SMILES for option, smi in enumerate(text.split('\n')): print('smiles1:', smi) if check_charge_on_SMILES(smi): # charged continue return smi, option elif type(result) == str and result is not None: print('passed name') return result else: result = run_request( query=QUERY_URL_fin, option=option ) if result is not None: print('passed IUPAC name') return result except (AttributeError, ValueError): print('failed IUPAC name') pass try: if molecule.name is not None: QUERY_URL_fin = QUERY_URL + molecule.name QUERY_URL_fin += '/property/'+property+'/TXT' if property == 'CanonicalSMILES': result = run_request(query=QUERY_URL_fin, smiles=True) if type(result) == tuple: # handle new line errors in SMILES text, boolean = result if boolean is True: # pick the uncharged SMILES for option, smi in enumerate(text.split('\n')): print('smiles1:', smi) if check_charge_on_SMILES(smi): # charged continue return smi, option elif type(result) == str and result is not None: print('passed name') return result else: result = run_request( query=QUERY_URL_fin, option=option ) if result is not None: print('passed name') return result except (AttributeError, ValueError): print('failed name') import sys sys.exit() return None
def hier_name_search_pcp(molecule, property, option=False): """Search for molecule property in PUBCHEM using a hierarchy of name spaces using pubchempy. Property can now be a list. Order: 1 - pubchem ID 2 - KEGG ID 3 - chebiID 4 - chebiID to InChIKey 5 - IUPAC name 6 - name Properties: CanononicalSMILES IUPACName XLogP complexity InChiKey PubChemID synonyms if option is not False we want to use the 'name' search only to recreate conditions of original search that gave option. """ if type(property) is not list: property = [property] try: if molecule.PubChemID is not None and option is False: result = run_request_pcp(ident=molecule.PubChemID, namespace='cid') if result is not None: print('> passed pubchemID') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): pass try: if molecule.KEGG_ID is not None and option is False: result = run_request_pcp(ident=molecule.KEGG_ID, namespace='name') if result is not None: print('> passed KEGG ID') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): pass try: if molecule.chebiID is not None and option is False: result = run_request_pcp(ident='chebi:'+molecule.chebiID, namespace='name') if result is not None: print('> passed chebiID') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): pass try: if molecule.chebiID is not None and option is False: if molecule.InChiKey is None: # try using the CHEBI API # libChEBIpy (https://github.com/libChEBI/libChEBIpy) print('> attempting libchebipy to get InChiKey...') from libchebipy import ChebiEntity entity = ChebiEntity(molecule.chebiID) iKEY = entity.get_inchi_key() print(iKEY) else: iKEY = molecule.InChiKey result = run_request_pcp(ident=iKEY, namespace='inchikey') if result is not None: print('> passed chebiID/inchiKey') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): pass try: if molecule.InChiKey is not None and option is False: result = run_request_pcp(ident=molecule.InChiKey, namespace='inchikey') if result is not None: print('> passed inchiKey') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): pass try: if molecule.iupac_name is not None and option is False: result = run_request_pcp(ident=molecule.iupac_name, namespace='name', option=option) if result is not None: print('> passed IUPAC name') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): pass try: print('> trying name... for:', property) if molecule.name is not None: if property[0] == 'CanonicalSMILES': result = run_request_pcp(ident=molecule.name, namespace='name', smiles=True) if type(result) == tuple: # handle new line errors in SMILES text, boolean = result if boolean is True: # pick the uncharged SMILES for option, Compound in enumerate(text): synon = [ i.lower() for i in Compound.synonyms ] if molecule.name.lower() in synon: # ignore charged species smi = Compound.canonical_smiles if check_charge_on_SMILES(smi): continue return smi, option return None elif type(result) == str and result is not None: print('> passed name') print('I am interested in what this result is:') print(result) import sys sys.exit() return result else: result = run_request_pcp(ident=molecule.name, namespace='name', option=option) if result is not None: print('> passed name') if len(property) > 1: return [ extract_property(i, result) for i in property ] else: return [ extract_property(i, result) for i in property ][0] except (AttributeError, ValueError): print('> failed all searches...') return None