示例#1
0
def fg_structure_id():
    group_dict = {}
    groups = set(SDFread('groups.sdf'))
    for n, group in enumerate(groups):
        group_dict[n] = group
    with open('group_dict.pickle', 'wb') as f:
        pickle.dump(group_dict, f)
示例#2
0
def fill_the_reagents_base():
    """
    Fill the 'Molecule' table of 'new_reagents' database with Reagents without duplicates.
    """
    reagents = SDFread('/home/tansu/laba_proj/BB_ZINC/acbbb_p0.0.sdf').read()
    for mol in reagents:
        if not Reagents.structure_exists(
                mol):  # проверка есть ли это в базе или нет
            Reagents(mol, db.User[1])
示例#3
0
 def test_combo(self, db, rdf, expected):
     self.populate(db, rdf)
     m2, m3 = db.Molecule[2], db.Molecule[3]
     nbt = SDFread('CGRdb/tests/data/nbt.sdf', remap=False).read()[0]
     m3.new_structure(nitrobrombenzene)
     m2.new_structure(nbt)
     m3.merge_molecules(bromnitrobenzene)
     assert count(m3._structures.reaction_indexes) == expected
     assert not db.Molecule.select(lambda m: m.id == 2)
     assert db.Molecule.select().count() == 6
     assert m3._structures.count() == 4
     assert db.Reaction.select().count() == 2
     assert {r.id for r in db.Reaction.select()} == {2, 3}
def similarity_search_molecules_core(**kwargs):
    molecules = SDFread(kwargs['input'])
    outputdata = SDFwrite(kwargs['output'])
    num = kwargs['number']
    rebuild = kwargs['rebuild']
    with db_session():
        x = TreeIndex(Molecules, reindex=rebuild)
        for molecule_container in molecules:
            a,b = TreeIndex.get_similar(x, molecule_container,num)
            print(a)
            print(b)
            for i in b:
                mol_cont = json_graph.node_link_graph(i.data)
                mol_cont.__class__ = MoleculeContainer
                outputdata.write(mol_cont)
示例#5
0
def similarity_search_molecules_core(**kwargs):
    init()
    Molecule, Reaction, Conditions = load_databases()[
        kwargs['']]  # придумать аргумент команд лайн
    molecules = SDFread(kwargs['input'])
    outputdata = SDFwrite(kwargs['output'])
    num = kwargs['number']
    rebuild = kwargs['rebuild']
    with db_session():
        x = TreeIndex(Molecules, reindex=rebuild)
        for molecule_container in molecules:
            a, b = TreeIndex.get_similar(x, molecule_container, num)
            print(a)
            print(b)
            for i in b:
                mol_cont = json_graph.node_link_graph(i.data)
                mol_cont.__class__ = MoleculeContainer
                outputdata.write(mol_cont)
def structure_molecule_search_core(**kwargs):
    molecules = SDFread(kwargs['input'])
    outputdata = RDFwrite(kwargs['output'])
    product = kwargs['product']
    if kwargs['product'] == False and kwargs['reagent'] == False:
        product = None
    elif kwargs['product'] == True and kwargs['reagent'] == True:
        print('No,No,No')
    elif kwargs['product'] == True:
        product = True
    elif kwargs['reagent'] == True:
        product = False
    with db_session():
        for molecule in molecules:
            required_reacts = Reactions.get_reactions_by_molecule(
                molecule, product)
            print(required_reacts)
            for reaction in required_reacts:
                react_cont = reaction.structure
                print(react_cont)
                outputdata.write(react_cont)
示例#7
0
    def get_results(self, structures):
        # prepare input file
        if len(structures) == 1:
            chemaxed = chemax_post(
                'calculate/molExport',
                dict(structure=structures[0]['data'],
                     parameters=self.__format))
            if not chemaxed:
                return False
            additions = dict(pressure=structures[0]['pressure'],
                             temperature=structures[0]['temperature'])
            for n, a in enumerate(structures[0]['additives'], start=1):
                additions['additive.%d' % n] = a['name']
                additions['amount.%d' % n] = a['amount']

            data_str = chemaxed['structure']
        else:
            with Popen([MOLCONVERT, self.__format],
                       stdin=PIPE,
                       stdout=PIPE,
                       stderr=STDOUT,
                       cwd=self.__workpath) as convert_mol:
                data_str = convert_mol.communicate(input=''.join(
                    s['data'] for s in structures).encode())[0].decode()
                if convert_mol.returncode != 0:
                    return False

            additions = dict(pressure=[], temperature=[])
            for m, s in enumerate(structures):
                additions['pressure'].append(s['pressure'])
                additions['temperature'].append(s['temperature'])
                for n, a in enumerate(s['additives']):
                    additions.setdefault('additive.%d' % n, {})[m] = a['name']
                    additions.setdefault('amount.%d' % n, {})[m] = a['amount']

        res = []
        with StringIO(data_str) as f:
            data = (RDFread(f) if self.get_type()
                    == ModelType.REACTION_MODELING else SDFread(f)).read()

        for m in self.__models:
            res.append(m.predict(data, **additions))

        # all_y_domains = reduce(merge_wrap, (x['y_domain'] for x in res))
        all_domains = reduce(self.__merge_wrap,
                             (x['domain'] for x in res)).fillna(False)

        all_predictions = reduce(self.__merge_wrap,
                                 (x['prediction'] for x in res))
        in_predictions = all_predictions.mask(all_domains ^ True)

        trust = Series(5, index=all_predictions.index)
        report = Series('', index=all_predictions.index)

        # mean predicted property
        avg_all = all_predictions.mean(axis=1)
        sigma_all = all_predictions.var(axis=1)

        avg_in = in_predictions.mean(axis=1)
        sigma_in = in_predictions.var(axis=1)

        avg_diff = (avg_in - avg_all).abs(
        )  # difference bt in AD and all predictions. NaN for empty in predictions.
        avg_diff_tol = avg_diff > self.TOL  # ignore NaN
        trust.loc[avg_diff_tol] -= 1
        report.loc[avg_diff_tol] += [
            self.errors['diff'] % x for x in avg_diff.loc[avg_diff_tol]
        ]

        avg_in_nul = avg_in.isnull()
        trust.loc[avg_in_nul] -= 2  # totally not in domain
        report.loc[avg_in_nul] += [self.errors['zad']] * len(
            avg_in_nul.loc[avg_in_nul].index)

        avg_domain = all_domains.mean(axis=1)
        avg_domain_bad = (avg_domain < self.Nlim
                          ) ^ avg_in_nul  # ignore totally not in domain
        trust.loc[avg_domain_bad] -= 1
        report.loc[avg_domain_bad] += [
            self.errors['lad'] % ceil(100 * x)
            for x in avg_domain.loc[avg_domain_bad]
        ]

        # update avg and sigma based on consensus
        good = avg_domain >= self.Nlim
        avg_all.loc[good] = avg_in.loc[good]
        sigma_all.loc[good] = sigma_in.loc[good]

        proportion = sigma_all / self.TOL
        proportion_bad = proportion > 1
        trust.loc[proportion_bad] -= 1
        report.loc[proportion_bad] += [
            self.errors['stp'] % (x * 100 - 100)
            for x in proportion.loc[proportion_bad]
        ]

        collector = defaultdict(list)
        for r, d in trust.items():
            s, *n = r if isinstance(r, tuple) else (r, )
            atoms = self.__report_atoms(n)
            collector[s].extend([
                dict(key='Predicted ± sigma%s%s' %
                     ((self.__units and ' (%s)' % self.__units or ''), atoms),
                     value='%.2f ± %.2f' % (avg_all.loc[r], sigma_all.loc[r]),
                     type=ResultType.TEXT),
                dict(key='Trust of prediction%s' % atoms,
                     value=self.trust_desc[d],
                     type=ResultType.TEXT),
                dict(key='Distrust reason%s' % atoms,
                     value=report.loc[r],
                     type=ResultType.TEXT)
            ])

        if len(structures) == len(collector):
            out = []
            for s in sorted(collector):
                out.append(dict(results=collector[s]))
            return out

        return False