def run_n2s(): n2s = {} if os.path.isfile('n2s.pickle'): with open('n2s.pickle') as fin: n2s = pickle.load(fin) print('Starting with %s names' % len(n2s)) result_dir = '../examples/mp/results' for filename in os.listdir(result_dir): if not filename.endswith('.json'): continue print(filename) with open('%s/%s' % (result_dir, filename)) as fin: results = json.loads(fin.read().decode('utf8')) for compound in results: if 'names' not in compound or 'melting_points' not in compound: continue for name in compound['names']: if name not in n2s: results = cirpy.query(name.encode('utf8'), 'smiles', ['name_by_opsin', 'name_by_cir']) print(name) print([(r.value, r.resolver) for r in results]) n2s[name] = [(r.value, r.resolver) for r in results] with open('n2s.pickle', 'w') as fout: pickle.dump(n2s, fout)
def process_cas(request): cas_no = request.GET.get('cas_number') try: obj = Odorant.objects.get(cas_number__exact=cas_no) data = { 'object_exists': obj.get_absolute_url(), 'object_exists_name': str(obj), } return JsonResponse(data) except ObjectDoesNotExist: pass try: smiles = cirpy.query(cas_no, 'smiles')[0].value pcp_query = pcp.get_compounds(smiles, 'smiles')[0] cid_no = pcp_query.cid except IndexError: return JsonResponse({ 'error': 'No compound found for this CAS number' }) if smiles and cid_no: data = { 'chemical_name': Odorant.scrape_compound_name(cid_no), 'iupac_name': pcp_query.iupac_name, 'structure_url': 'https://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?cid={}&t=l'.format(cid_no), 'hidden_cid': cid_no, 'smiles': smiles, } return JsonResponse(data)
def test_query_dict(self): """Test dict-style access to result attributes.""" results = query('Morphine', 'inchi') self.assertEqual(len(results), 2) self.assertEqual(results[1]['value'], 'InChI=1/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13?,16-,17-/m0/s1') self.assertEqual(results[1]['notation'], 'Morphine') self.assertEqual(results[1]['resolver'], 'name_by_cir')
def test_query_dict(self): """Test dict-style access to result attributes.""" results = query('Morphine', 'inchi') self.assertEqual(len(results), 2) self.assertEqual( results[1]['value'], 'InChI=1/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13?,16-,17-/m0/s1' ) self.assertEqual(results[1]['notation'], 'Morphine') self.assertEqual(results[1]['resolver'], 'name_by_cir')
def test_morphine_inchi(self): """Test morphine query for inchi returns expected result.""" results = query('morphine', 'inchi') self.assertEqual(len(results), 2) self.assertEqual(results[1].input, 'morphine') self.assertEqual(results[1].representation, 'inchi') self.assertEqual(results[1].resolver, 'name_by_cir') self.assertEqual(results[1].input_format, 'chemical name (CIR)') self.assertEqual(results[1].notation, 'Morphine') self.assertEqual(results[1].value, 'InChI=1/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13?,16-,17-/m0/s1')
def test_morphine_inchi(self): """Test morphine query for inchi returns expected result.""" results = query('morphine', 'inchi') self.assertEqual(len(results), 2) self.assertEqual(results[1].input, 'morphine') self.assertEqual(results[1].representation, 'inchi') self.assertEqual(results[1].resolver, 'name_by_cir') self.assertEqual(results[1].input_format, 'chemical name (CIR)') self.assertEqual(results[1].notation, 'Morphine') self.assertEqual( results[1].value, 'InChI=1/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13?,16-,17-/m0/s1' )
def process_bioactive_identifier(request): cas_no = request.GET.get('cas_number') inchikey = request.GET.get('inchikey', '').strip() obj = None if cas_no: obj = Bioactive.objects.filter( chemical_properties__synonyms__icontains=cas_no).first() elif inchikey: obj = Bioactive.objects.filter(inchikey__exact=inchikey).first() if obj: data = { 'object_exists': obj.get_absolute_url(), 'object_exists_name': str(obj), } return JsonResponse(data) try: iupac_name = None if cas_no: smiles = cirpy.query(cas_no, 'smiles')[0].value if '.' in smiles: smiles = [i for i in smiles.split('.') if len(i) > 5][0] pcp_query = pcp.get_compounds(smiles, 'smiles')[0] if not pcp_query.iupac_name: iupac_name = cirpy.resolve(smiles, 'iupac_name', ['smiles']) else: pcp_query = pcp.get_compounds(inchikey, 'inchikey')[0] if not pcp_query.iupac_name: iupac_name = cirpy.resolve(inchikey, 'iupac_name', ['stdinchikey']) if not pcp_query.cid: raise IndexError except (IndexError, pcp.BadRequestError): return JsonResponse({'error': 'No compound found for this CAS number'}) data = { 'chemical_name': Bioactive.scrape_compound_name(pcp_query.cid), 'iupac_name': pcp_query.iupac_name or iupac_name or 'n/a', 'inchikey': pcp_query.inchikey, 'structure_url': 'https://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?cid={}&t=l'. format(pcp_query.cid), 'hidden_cid': pcp_query.cid, 'smiles': pcp_query.isomeric_smiles or pcp_query.canonical_smiles or '', } return JsonResponse(data)
def call_compound_data(self): for cas_no in list(self.get_unique_cas_numbers())[:4]: cirpy_query = cirpy.query(cas_no, 'smiles') if cirpy_query: smiles = cirpy_query[0].value name = cirpy.Molecule(smiles).iupac_name # usually a string but sometimes a list of strings name = name[0] if isinstance(name, list) else name if name: pcp_data = pcp.get_compounds(smiles, 'smiles') cid_no = pcp_data[0].cid chem_properties = { a: getattr(pcp_data[0], b) for a, b in (('xlogp', 'xlogp'), ('hac', 'heavy_atom_count'), ('rbc', 'rotatable_bond_count')) } chem_properties.update({ 'mw': int(pcp_data[0].molecular_weight), 'synonyms': ' '.join(pcp_data[0].synonyms[:5]), 'hetac': len(''.join( [i for i in smiles if i in [ 'O', 'N', 'S', ]])) }) self.compound_data.append({ 'cas_number': cas_no, 'smiles': smiles, 'chemical_name': CompoundMixin.scrape_compound_name(cid_no), 'iupac_name': name.lower(), 'cid_number': cid_no, 'chemical_properties': chem_properties, }) # may help avoid timeout with pcp.get_compounds sleep(5)
def test_custom_resolvers(self): """Test expected results are returned when using custom name resolvers.""" results = query('2,4,6-trinitrotoluene', 'smiles') self.assertEqual(len(results), 2) self.assertEqual( results[0], Result(input='2,4,6-trinitrotoluene', representation='smiles', resolver='name_by_opsin', input_format='IUPAC name (OPSIN)', notation='2,4,6-trinitrotoluene', value='Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O')) self.assertEqual( results[1], Result(input='2,4,6-trinitrotoluene', representation='smiles', resolver='name_by_cir', input_format='chemical name (CIR)', notation='2,4,6-Trinitrotoluene', value='Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O'))
def test_invalid_representation_query(self): """Test that HTTPError is raised when an invalid representation is specified.""" with self.assertRaises(HTTPError): query('Morphine', 'ogiuewrgpw')
def test_no_result_query(self): """Test that an empty list is returned when there are no results.""" self.assertEqual(query('sjkvhaldfu', 'smiles'), [])
def test_custom_resolvers(self): """Test expected results are returned when using custom name resolvers.""" results = query('2,4,6-trinitrotoluene', 'smiles') self.assertEqual(len(results), 2) self.assertEqual(results[0], Result(input='2,4,6-trinitrotoluene', representation='smiles', resolver='name_by_opsin', input_format='IUPAC name (OPSIN)', notation='2,4,6-trinitrotoluene', value='Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O')) self.assertEqual(results[1], Result(input='2,4,6-trinitrotoluene', representation='smiles', resolver='name_by_cir', input_format='chemical name (CIR)', notation='2,4,6-Trinitrotoluene', value='Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O'))