def fg_structure_id(): group_dict = {} groups = set(SDFread('groups.sdf')) for n, group in enumerate(groups): group_dict[n] = group with open('group_dict.pickle', 'wb') as f: pickle.dump(group_dict, f)
def fill_the_reagents_base(): """ Fill the 'Molecule' table of 'new_reagents' database with Reagents without duplicates. """ reagents = SDFread('/home/tansu/laba_proj/BB_ZINC/acbbb_p0.0.sdf').read() for mol in reagents: if not Reagents.structure_exists( mol): # проверка есть ли это в базе или нет Reagents(mol, db.User[1])
def test_combo(self, db, rdf, expected): self.populate(db, rdf) m2, m3 = db.Molecule[2], db.Molecule[3] nbt = SDFread('CGRdb/tests/data/nbt.sdf', remap=False).read()[0] m3.new_structure(nitrobrombenzene) m2.new_structure(nbt) m3.merge_molecules(bromnitrobenzene) assert count(m3._structures.reaction_indexes) == expected assert not db.Molecule.select(lambda m: m.id == 2) assert db.Molecule.select().count() == 6 assert m3._structures.count() == 4 assert db.Reaction.select().count() == 2 assert {r.id for r in db.Reaction.select()} == {2, 3}
def similarity_search_molecules_core(**kwargs): molecules = SDFread(kwargs['input']) outputdata = SDFwrite(kwargs['output']) num = kwargs['number'] rebuild = kwargs['rebuild'] with db_session(): x = TreeIndex(Molecules, reindex=rebuild) for molecule_container in molecules: a,b = TreeIndex.get_similar(x, molecule_container,num) print(a) print(b) for i in b: mol_cont = json_graph.node_link_graph(i.data) mol_cont.__class__ = MoleculeContainer outputdata.write(mol_cont)
def similarity_search_molecules_core(**kwargs): init() Molecule, Reaction, Conditions = load_databases()[ kwargs['']] # придумать аргумент команд лайн molecules = SDFread(kwargs['input']) outputdata = SDFwrite(kwargs['output']) num = kwargs['number'] rebuild = kwargs['rebuild'] with db_session(): x = TreeIndex(Molecules, reindex=rebuild) for molecule_container in molecules: a, b = TreeIndex.get_similar(x, molecule_container, num) print(a) print(b) for i in b: mol_cont = json_graph.node_link_graph(i.data) mol_cont.__class__ = MoleculeContainer outputdata.write(mol_cont)
def structure_molecule_search_core(**kwargs): molecules = SDFread(kwargs['input']) outputdata = RDFwrite(kwargs['output']) product = kwargs['product'] if kwargs['product'] == False and kwargs['reagent'] == False: product = None elif kwargs['product'] == True and kwargs['reagent'] == True: print('No,No,No') elif kwargs['product'] == True: product = True elif kwargs['reagent'] == True: product = False with db_session(): for molecule in molecules: required_reacts = Reactions.get_reactions_by_molecule( molecule, product) print(required_reacts) for reaction in required_reacts: react_cont = reaction.structure print(react_cont) outputdata.write(react_cont)
def get_results(self, structures): # prepare input file if len(structures) == 1: chemaxed = chemax_post( 'calculate/molExport', dict(structure=structures[0]['data'], parameters=self.__format)) if not chemaxed: return False additions = dict(pressure=structures[0]['pressure'], temperature=structures[0]['temperature']) for n, a in enumerate(structures[0]['additives'], start=1): additions['additive.%d' % n] = a['name'] additions['amount.%d' % n] = a['amount'] data_str = chemaxed['structure'] else: with Popen([MOLCONVERT, self.__format], stdin=PIPE, stdout=PIPE, stderr=STDOUT, cwd=self.__workpath) as convert_mol: data_str = convert_mol.communicate(input=''.join( s['data'] for s in structures).encode())[0].decode() if convert_mol.returncode != 0: return False additions = dict(pressure=[], temperature=[]) for m, s in enumerate(structures): additions['pressure'].append(s['pressure']) additions['temperature'].append(s['temperature']) for n, a in enumerate(s['additives']): additions.setdefault('additive.%d' % n, {})[m] = a['name'] additions.setdefault('amount.%d' % n, {})[m] = a['amount'] res = [] with StringIO(data_str) as f: data = (RDFread(f) if self.get_type() == ModelType.REACTION_MODELING else SDFread(f)).read() for m in self.__models: res.append(m.predict(data, **additions)) # all_y_domains = reduce(merge_wrap, (x['y_domain'] for x in res)) all_domains = reduce(self.__merge_wrap, (x['domain'] for x in res)).fillna(False) all_predictions = reduce(self.__merge_wrap, (x['prediction'] for x in res)) in_predictions = all_predictions.mask(all_domains ^ True) trust = Series(5, index=all_predictions.index) report = Series('', index=all_predictions.index) # mean predicted property avg_all = all_predictions.mean(axis=1) sigma_all = all_predictions.var(axis=1) avg_in = in_predictions.mean(axis=1) sigma_in = in_predictions.var(axis=1) avg_diff = (avg_in - avg_all).abs( ) # difference bt in AD and all predictions. NaN for empty in predictions. avg_diff_tol = avg_diff > self.TOL # ignore NaN trust.loc[avg_diff_tol] -= 1 report.loc[avg_diff_tol] += [ self.errors['diff'] % x for x in avg_diff.loc[avg_diff_tol] ] avg_in_nul = avg_in.isnull() trust.loc[avg_in_nul] -= 2 # totally not in domain report.loc[avg_in_nul] += [self.errors['zad']] * len( avg_in_nul.loc[avg_in_nul].index) avg_domain = all_domains.mean(axis=1) avg_domain_bad = (avg_domain < self.Nlim ) ^ avg_in_nul # ignore totally not in domain trust.loc[avg_domain_bad] -= 1 report.loc[avg_domain_bad] += [ self.errors['lad'] % ceil(100 * x) for x in avg_domain.loc[avg_domain_bad] ] # update avg and sigma based on consensus good = avg_domain >= self.Nlim avg_all.loc[good] = avg_in.loc[good] sigma_all.loc[good] = sigma_in.loc[good] proportion = sigma_all / self.TOL proportion_bad = proportion > 1 trust.loc[proportion_bad] -= 1 report.loc[proportion_bad] += [ self.errors['stp'] % (x * 100 - 100) for x in proportion.loc[proportion_bad] ] collector = defaultdict(list) for r, d in trust.items(): s, *n = r if isinstance(r, tuple) else (r, ) atoms = self.__report_atoms(n) collector[s].extend([ dict(key='Predicted ± sigma%s%s' % ((self.__units and ' (%s)' % self.__units or ''), atoms), value='%.2f ± %.2f' % (avg_all.loc[r], sigma_all.loc[r]), type=ResultType.TEXT), dict(key='Trust of prediction%s' % atoms, value=self.trust_desc[d], type=ResultType.TEXT), dict(key='Distrust reason%s' % atoms, value=report.loc[r], type=ResultType.TEXT) ]) if len(structures) == len(collector): out = [] for s in sorted(collector): out.append(dict(results=collector[s])) return out return False