def import_smiles(mine_db: MINE, target: str) -> None: """Imports a smiles file as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. """ # SmilesMolSupplier (rdkit) generates Mol objects from smiles file (.smi) mols = AllChem.SmilesMolSupplier(target, delimiter="\t", nameColumn=0) # Go through each generated mol file and add molecule to MINE database # Stores compound properties in dict (GetPropsAsDict() from rdkit Mol # class) for mol in mols: if mol: mine_db.insert_compound( mol, compound_dict=mol.GetPropsAsDict(), pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "SDF Imported", "Filepath": target, })
def import_sdf(mine_db: MINE, target: str) -> None: """Imports a SDF file as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. """ # SDMolSupplier (rdkit) takes entries from sdf file and returns Mol objects sdf_gen = AllChem.SDMolSupplier(target) # Go through each generated Mol object and add each to MINE database for mol in sdf_gen: mine_db.insert_compound( mol, compound_dict=mol.GetPropsAsDict(), pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "SDF Imported", "Filepath": target, })
def import_mol_dir(mine_db: MINE, target: str, name_field: str = "Name", overwrite: bool = False) -> None: """Imports a directory of molfiles as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. name_field : str, optional Field for the compound name, by default "Name". overwrite : bool, optional Replace old compounds with new ones if a collision happens, by default False. """ # For each .mol file in the directory of the target folder (path): for file in os.listdir(target): if ".mol" in file: # MolFromMolFile (rdkit) generates Mol objects from .mol files mol = AllChem.MolFromMolFile(target + "/" + file) # Mol object name becomes name of mol file without .mol extension name = file.rstrip(".mol") # Check that Mol object is successfully generated if mol: # Create hashkey for the compound cpdhash = utils.get_compound_hash(mol) # If we don't want to overwrite, and the compound (cpdhash) # already exists, then add an extra cpdhash for that molecule if not overwrite and mine_db.compounds.count({"_id": cpdhash}): mine_db.compounds.update({"_id": cpdhash}, {"$addToSet": { name_field: name }}) # If we don't care about overwriting, just insert the new # compound into the database else: mine_db.insert_compound( mol, compound_dict={ name_field: [name], "Generation": 0 }, pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "MolFiles Imported", "Filepath": target, })
def export_sdf(mine_db: MINE, dir_path: str, max_compounds: int = None) -> None: """Exports compounds from the database as an MDL SDF file. Parameters ---------- mine_db : MINE MINE object that contains the database. dir_path : str Directory for files. max_compounds : int, optional Maximum number of compounds per file, by default None. """ # Make sure that all compounds point to all their reactants if not mine_db.compounds.find_one({"Product_of": {"$exists": 1}}): mine_db.add_rxn_pointers() print( f"Exporting {mine_db.compounds.count()} compounds from {mine_db.name}" " as an SDF file") target = utils.prevent_overwrite( os.path.join(dir_path, mine_db.name) + "_1.sdf") # SDWriter (rdkit) writes Mol objects to SD files writer = AllChem.SDWriter(target) writer.SetKekulize(True) n_files = 1 for compound in mine_db.compounds.find(): # Convert SMILES string to Mol object, replacing 'CoA' and 'R' by '*' mol = AllChem.MolFromSmiles(compound["SMILES"], True, { "CoA": "*", "R": "*" }) # if Mol object successfully generated, annotate properties if mol: mol.SetProp("_id", compound["_id"]) mol.SetProp("Generation", str(compound["Generation"])) if "Reactant_in" in compound: mol.SetProp("Reactant_in", str(compound["Reactant_in"])) if "Product_of" in compound: mol.SetProp("Product_of", str(compound["Product_of"])) writer.write(mol) # Start writing a new sdf file if the maximum (set by user) has # been reached for the current file if max_compounds and (writer.NumMols() >= max_compounds): n_files += 1 target = utils.prevent_overwrite( os.path.join(dir_path, mine_db.name) + f"_(n_files).sdf") writer = AllChem.SmilesWriter(target) writer.close()
def export_smiles(mine_db: MINE, dir_path: str, max_compounds: int = None) -> None: """Exports compounds from the database as a SMILES file. Parameters ---------- mine_db : MINE MINE object that contains the database. dir_path : str Directory for files. max_compounds : int, optional Maximum number of compounds per file, by default None. """ header = ["SMILES", "_id", "Generation", "Reactant_in", "Product_of"] # Make sure that all compounds point to all their reactants if not mine_db.compounds.find_one({"Product_of": {"$exists": 1}}): mine_db.add_rxn_pointers() print( f"Exporting {mine_db.compounds.count()} compounds from {mine_db.name()}" " as SMILES file") target = open( utils.prevent_overwrite( os.path.join(dir_path, mine_db.name) + "_1.smiles"), "w") # DictWriter allows for each key:value pair of a dictionary to be written # on its own row (by writerow) writer = csv.DictWriter(target, fieldnames=header, dialect="excel-tab") n_files = 1 i = 0 for compound in mine_db.compounds.find({}, dict([(x, 1) for x in header])): writer.writerow(compound) i += 1 # If max compounds per file has been set by user and our number of # compounds that we have written so far is divisible by the max number, # then we start a new file if max_compounds and not i % max_compounds: n_files += 1 target = open( utils.prevent_overwrite( os.path.join(dir_path, mine_db.name) + f"_{n_files}.smiles"), "w", ) writer = csv.DictWriter(target, fieldnames=header, dialect="excel-tab")
def test_save_target_mine(default_rule, smiles_dict, coreactant_dict): """Test saving the target run to a MINE.""" delete_database("MINE_test") pk = pickaxe.Pickaxe(database="MINE_test", explicit_h=True) pk.operators["2.7.1.a"] = default_rule pk._load_coreactant(coreactant_dict["ATP"]) pk._load_coreactant(coreactant_dict["ADP"]) pk._add_compound("FADH", smiles_dict["FADH"], cpd_type="Starting Compound") pk.load_targets(file_dir / "../data/test_targets.csv") pk.transform_all(generations=2) pk.prune_network_to_targets() pk.save_to_mine() mine_db = MINE("MINE_test") try: assert mine_db.compounds.estimated_document_count() == 6 assert mine_db.reactions.estimated_document_count() == 4 assert mine_db.operators.estimated_document_count() == 1 assert mine_db.operators.find_one()["Reactions_predicted"] == 4 start_comp = mine_db.target_compounds.find_one() assert start_comp["InChI_key"] == "RYNUDNWPSBJQQY-UHFFFAOYSA-N" assert all([i in start_comp.keys() for i in ["_id", "SMILES", "InChI_key"]]) finally: delete_database("MINE_test")
def test_db(): """Create a test MINE database for testing.""" datafile_path = file_dir / "data/testing_db.json" try: testdb = MINE("mongotest") with open(datafile_path) as infile: jsondb = json.load(infile) for doc in jsondb[0]: if testdb.compounds.find_one({"_id": doc["_id"]}): testdb.compounds.replace_one({"_id": doc["_id"]}, doc) else: testdb.compounds.insert_one(doc) for doc in jsondb[1]: if testdb.reactions.find_one({"_id": doc["_id"]}): testdb.reactions.replace_one({"_id": doc["_id"]}, doc) else: testdb.reactions.insert_one(doc) for doc in jsondb[2]: if testdb.operators.find_one({"_id": doc["_id"]}): testdb.operators.replace_one({"_id": doc["_id"]}, doc) else: testdb.operators.insert_one(doc) except ServerSelectionTimeoutError: print("No Mongo DB server detected") yield testdb delete_database("mongotest")
def test_db(): """Create a test MINE database. Created and torn down before and after each test it is used in.""" print(os.path.dirname(__file__)) datafile_path = os.path.join(os.path.dirname(__file__), 'data/testing_db.json') delete_database("mongotest") try: testdb = MINE("mongotest") with open(datafile_path) as infile: jsondb = json.load(infile) for doc in jsondb[0]: if testdb.compounds.find_one({'_id': doc['_id']}): testdb.compounds.replace_one({'_id': doc['_id']}, doc) else: testdb.compounds.insert_one(doc) for doc in jsondb[1]: if testdb.reactions.find_one({'_id': doc['_id']}): testdb.reactions.replace_one({'_id': doc['_id']}, doc) else: testdb.reactions.insert_one(doc) for doc in jsondb[2]: if testdb.operators.find_one({'_id': doc['_id']}): testdb.operators.replace_one({'_id': doc['_id']}, doc) else: testdb.operators.insert_one(doc) except ServerSelectionTimeoutError: print('No Mongo DB server detected') yield testdb
def test_save_as_mine_multiprocess(default_rule, smiles_dict, coreactant_dict): """ GIVEN a Pickaxe expansion WHEN that expansion is saved as a MINE DB in the MongoDB THEN make sure that all features are saved in the MongoDB as expected """ delete_database('MINE_test') pk = pickaxe.Pickaxe(database='MINE_test', image_dir=DATA_DIR) pk.operators['2.7.1.a'] = default_rule pk = multiprocess(pk, smiles_dict, coreactant_dict) pk.save_to_mine(num_workers=2) mine_db = MINE('MINE_test') try: assert mine_db.compounds.estimated_document_count() == 31 assert mine_db.reactions.estimated_document_count() == 49 assert mine_db.operators.estimated_document_count() == 1 assert os.path.exists(DATA_DIR + '/X9c29f84930a190d9086a46c344020283c85fb917.svg') start_comp = mine_db.compounds.find_one({'Type': 'Starting Compound'}) assert len(start_comp['Reactant_in']) > 0 # Don't track sources of coreactants coreactant = mine_db.compounds.find_one({'Type': 'Coreactant'}) assert 'Product_of' not in coreactant assert 'Reactant_in' not in coreactant product = mine_db.compounds.find_one({'Generation': 2}) assert len(product['Product_of']) > 0 assert product['Type'] == 'Predicted' finally: delete_database('MINE_test') purge(DATA_DIR, r".*\.svg$")
def test_db(): """Create a test MINE database. Created and torn down before and after each test it is used in.""" try: testdb = MINE("mongotest") except ServerSelectionTimeoutError: print('No Mongo DB server detected') yield testdb
def save_to_MINE(self, db_id): """Save compounds to a MINE database. :param db_id: The name of the target database :type db_id: basestring """ db = MINE(db_id) bulk_c = db.compounds.initialize_unordered_bulk_op() bulk_r = db.reactions.initialize_unordered_bulk_op() # This loop performs 4 functions to reactions: # 1. Convert stoich_tuples to dicts with hashes # 2. Add reaction links to compounds # 3. Add source information to compounds # 4. Iterate the reactions predicted for each relevant reaction rule for rxn in self.reactions.values(): for x in rxn['Reactants']: self.compounds[x.c_id]['Reactant_in'].append(rxn['_id']) for x in rxn['Products']: self.compounds[x.c_id]['Product_of'].append(rxn['_id']) # Don't track sources of coreactants if x.c_id[0] == 'X': continue self.compounds[x.c_id]['Sources'].append({ "Compounds": [x.c_id for x in rxn['Reactants']], "Operators": list(rxn["Operators"]) }) # Iterate the number of reactions predicted for op in rxn['Reaction_rules']: self.rxn_rules[op][1]['Reactions_predicted'] += 1 db.insert_reaction(rxn, bulk=bulk_r) if self.reactions: bulk_r.execute() db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "Reactions Inserted" }) for comp_dict in self.compounds.values(): db.insert_compound(AllChem.MolFromSmiles(comp_dict['SMILES']), comp_dict, bulk=bulk_c) bulk_c.execute() db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "Compounds Inserted" }) for x in self.rxn_rules.values(): # There are fewer reaction rules so bulk operations are not # really faster. db.operators.save(x[1]) db.build_indexes()
def load_compound_set(self, compound_file=None, structure_field=None, id_field='id'): """If a compound file is provided, this function loads the compounds into it's internal dictionary. If not, it attempts to find the compounds in it's associated MINE database. :param compound_file: Path to a file containing compounds as tsv :type compound_file: basestring :param structure_field: the name of the column containing the structure incarnation as Inchi or SMILES (Default:'structure') :type structure_field: str :param id_field: the name of the column containing the desired compound ID (Default: 'id) :type id_field: str :return: compound SMILES :rtype: list """ compound_smiles = [] if compound_file: for line in utils.file_to_dict_list(compound_file): mol = self._mol_from_dict(line, structure_field) if not mol: continue # Add compound to internal dictionary as a starting # compound and store SMILES string to be returned smi = AllChem.MolToSmiles(mol, True) _id = line[id_field] # Do not operate on inorganic compounds if "C" in smi or "c" in smi: AllChem.SanitizeMol(mol) self._add_compound(_id, smi, mol=mol, type='Starting Compound') compound_smiles.append(smi) # If a MINE database is being used instead, search for compounds # annotated as starting compounds and return those as a list of # SMILES strings elif self.mine: db = MINE(self.mine) for compound in db.compounds.find(): _id = compound['_id'] smi = compound['SMILES'] # Assume unannotated compounds are starting compounds if 'type' not in compound: compound['Type'] = 'Starting Compound' self._add_compound(_id, smi, type=compound['Type']) compound_smiles.append(smi) else: raise ValueError('No input file or database specified for ' 'starting compounds') print("%s compounds loaded" % len(compound_smiles)) return compound_smiles
def test_mongo_cli(): """Test command line interface writing to mongo.""" mine = MINE("tests") os.chdir(file_dir / "../data/../..") rc = subprocess.call( "python minedatabase/pickaxe.py -d tests -r tests/data/test_cd_rxn_rule.tsv", shell=True, ) assert not rc try: assert mine.compounds.estimated_document_count() == 51 finally: mine.client.drop_database("tests") purge(file_dir / "..", r".*\.svg$")
def test_save_no_rxn_mine(): """ GIVEN a Pickaxe object with no expansion WHEN that Pickaxe object is saved into a MINE DB in the MongoDB THEN check that starting compounds are present and that no reactions exist """ delete_database('MINE_test') pk = pickaxe.Pickaxe(database='MINE_test') pk.load_compound_set(compound_file=DATA_DIR + '/test_compounds.tsv') pk.save_to_mine(num_workers=1) mine_db = MINE('MINE_test') try: assert mine_db.compounds.estimated_document_count() == 14 assert mine_db.reactions.estimated_document_count() == 0 finally: delete_database('MINE_test')
def make_box_plots(db_list, prop_list=('Mass', 'logP', 'NP_likeness')): df = pandas.DataFrame() for db_name in db_list: db = MINE(db_name) new_name = str(db_name.replace('exp2', 'MINE').split('-')[0]) l = [] cursor = db.compounds.find( dict([(x, { '$exists': 1 }) for x in prop_list]), dict([('_id', 0)] + [(x, 1) for x in prop_list])) for x in cursor: x['DB'] = new_name l.append(x) df = df.append(l) f, ax = plt.subplots(1, len(prop_list)) for i, prop in enumerate(prop_list): seaborn.boxplot(x='DB', y=prop, data=df, ax=ax[i], showfliers=False) plt.tight_layout() plt.savefig("MINE property comparison.png")
def make_fp_heatmap(db_name, fp_type='MACCS', n_rows=25): db = MINE(db_name) data = defaultdict(Counter) for comp in db.compounds.find({}, {"_id": 0, "Generation": 1, fp_type: 1}): if fp_type in comp and int(comp['Generation']) > -1: data[int(comp['Generation'])].update(comp[fp_type]) df = pandas.DataFrame(data) df_norm = df.div(df.max(axis=0), axis=1) if not n_rows: df_top = df_norm else: df_norm['range'] = df_norm.max(axis=1) - df_norm.min(axis=1) df_top = df_norm.sort_values('range', ascending=False).head( int(n_rows)).ix[:, :-1] hm = seaborn.heatmap(df_top) hm.collections[0].colorbar.set_label("Prevalence") plt.xlabel('Generation') plt.ylabel(fp_type + " bit") plt.yticks(rotation=0) plt.savefig(db_name + '_fp_heatmap.png')
def test_save_as_mine(default_rule, smiles_dict, coreactant_dict): """Test saving compounds to database. GIVEN a Pickaxe expansion WHEN that expansion is saved as a MINE DB in the MongoDB THEN make sure that all features are saved in the MongoDB as expected """ DATA_DIR = (file_dir / "../data").resolve() delete_database("MINE_test") pk = pickaxe.Pickaxe(database="MINE_test", image_dir=DATA_DIR, explicit_h=True) pk.operators["2.7.1.a"] = default_rule pk._load_coreactant(coreactant_dict["ATP"]) pk._load_coreactant(coreactant_dict["ADP"]) pk._add_compound("FADH", smiles_dict["FADH"], cpd_type="Starting Compound") pk.transform_all(generations=2) pk.save_to_mine(processes=1) mine_db = MINE("MINE_test") try: assert mine_db.compounds.estimated_document_count() == 31 assert mine_db.reactions.estimated_document_count() == 49 assert mine_db.operators.estimated_document_count() == 1 assert mine_db.operators.find_one()["Reactions_predicted"] == 49 assert os.path.exists( DATA_DIR / "X9c29f84930a190d9086a46c344020283c85fb917.svg" ) start_comp = mine_db.compounds.find_one({"Type": "Starting Compound"}) assert len(start_comp["Reactant_in"]) > 0 # Don't track sources of coreactants coreactant = mine_db.compounds.find_one({"Type": "Coreactant"}) assert "Product_of" not in coreactant assert "Reactant_in" not in coreactant product = mine_db.compounds.find_one({"Generation": 2}) assert len(product["Product_of"]) > 0 assert product["Type"] == "Predicted" finally: delete_database("MINE_test") purge(DATA_DIR, r".*\.svg$")
def make_violin_plots(db_list, prop_list=('Mass', 'logP', 'NP_likeness')): df = pandas.DataFrame() for db_name in db_list: db = MINE(db_name) l = [] cursor = db.compounds.find({"Type": { '$ne': 'Coreactant' }}, dict([('_id', 0), ('Type', 1)] + [(x, 1) for x in prop_list])) for x in cursor: x['DB'] = str(db_name.strip('exp2')) l.append(x) df = df.append(l) f, ax = plt.subplots(1, len(prop_list)) for i, prop in enumerate(prop_list): seaborn.violinplot(split=True, hue='Type', x='DB', y=prop, data=df, ax=ax[i]) if i > 0: ax[i].legend_.remove() plt.tight_layout() plt.savefig("MINE property comparison.png")
def __init__(self, rule_list=None, coreactant_list=None, explicit_h=True, kekulize=True, neutralise=True, errors=True, racemize=False, database=None, image_dir=None): """This class generates new compounds from user-specified starting compounds using a set of SMARTS-based reaction rules. It may be initialized with a text file containing the reaction rules and coreactants or this may be done on an ad hock basis. :param rule_list: Path to a list of reaction rules in TSV form :type rule_list: str :param coreactant_list: Path to list of coreactants in TSV form :type coreactant_list: str :param explicit_h: Explicitly represent bound hydrogen atoms :type explicit_h: bool :param kekulize: Kekulize structures before applying reaction rules :type kekulize: bool :param neutralise: Remove charges on structure before applying reaction rules :type neutralise: bool :param errors: Print underlying RDKit warnings and halt on error :type errors: bool :param racemize: Enumerate all possible chiral forms of a molecule if unspecified stereocenters exist :type racemize: bool :param database: Name of desired Mongo Database :type database: str :param image_dir: Path to desired image folder :type image_dir: str """ self.rxn_rules = {} self.coreactants = {} self._raw_compounds = {} self.compounds = {} self.reactions = {} self.generation = 0 self.explicit_h = explicit_h self.kekulize = kekulize self.racemize = racemize self.neutralise = neutralise self.image_dir = image_dir self.errors = errors self.fragmented_mols = False self.radical_check = False self.structure_field = None # Make sure that if a database is to be used, that the database is empty if database: self.mine = database db = MINE(database) if db.compounds.count(): print( "Warning: expansion will overwrite existing compounds and" " operators!") else: self.mine = None # Use RDLogger to catch errors in log file. SetLevel indicates mode ( # 0 - debug, 1 - info, 2 - warning, 3 - critical). Default is no errors. from rdkit import RDLogger lg = RDLogger.logger() if not errors: lg.setLevel(4) # Load coreactants (if any) into Pickaxe object if coreactant_list: with open(coreactant_list) as infile: for coreactant in infile: self._load_coreactant(coreactant) # Load rules (if any) into Pickaxe object if rule_list: self.load_rxn_rules(rule_list)
mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "MolFiles Imported", "Filepath": target }) if __name__ == '__main__': # User inputs task as first argument (export-sdf, export-smi, export-mol, # import-sdf, import-smi, or import-mol) task = sys.argv[1] # User inputs database name as second argument db_name = sys.argv[2] # User inputs file path as third argument path = sys.argv[3] database = MINE(db_name) if task == 'export-sdf': # If a maximum molecules per file is specified (fourth argument # entered by user), then pass that to the export function. if len(sys.argv) == 5: export_sdf(database, path, int(sys.argv[4])) # Otherwise, assume an unlimited number of molecules per file else: export_sdf(database, path) elif task == 'export-smi': # If a maximum molecules per file is specified (fourth argument # entered by user), then pass that to the export function. if len(sys.argv) == 5: export_smiles(database, path, int(sys.argv[4])) # Otherwise, assume an unlimited number of molecules per file else:
abrv[row['Abbreviation'].strip()] = c_id if pic_dir: rc = subprocess.call("/Applications/ChemAxon/JChem/bin/molconvert -o %s/temp.png png:-a,w500 -s " "'%s'" % (pic_dir, row['SMILES'].strip()), shell=True) if not rc: os.rename(pic_dir + "temp.png", pic_dir + c_id + ".png") else: print("Failed to parse %s" % row['SMILES']) else: print('SMILES missing from %s' % row.name) reactions['Type of Reaction'].fillna('ffill', inplace=True) for i, row in reactions.iterrows(): if row['Equation (Abbreviations)']: rxn = row[['Metabolite', 'Equation (full names)']].to_dict() if isinstance(row['PMID or doi'], str): rxn['References'] = row['PMID or doi'].strip().split('; ') else: rxn['References'] = [str(row['PMID or doi'])] rxn['Type'] = str(row['Type of Reaction']).strip() rxn['Notes'] = str(row['Comments']).strip() rxn['Reactants'], rxn['Products'] = utils.parse_text_rxn(row['Equation (Abbreviations)'], ' = ', ' + ', abrv) rxn['InChI_hash'] = utils._calculate_rxn_hash(mine_db, rxn['Reactants'], rxn['Products']) mine_db.insert_reaction(rxn) else: print('RXN missing from %s' % row.name) if __name__ == '__main__': mine = MINE(sys.argv[1]) load_cdmine_rxns(mine, sys.argv[2])
def delete_database(name): mine = MINE(name) mine.client.drop_database(name) mine.client.close()
import pandas import seaborn import matplotlib.pyplot as plt import numpy from minedatabase.databases import MINE import sys db = MINE(sys.argv[1]) fields = ['Compounds', 'Compound_ids', 'Reactions', 'Operators'] def pw_jaccard(series, reduce=numpy.median): pw = [] for i, x in enumerate(series): tc = [] for j, y in enumerate(series): if i != j: tc.append(len(x & y) / float(len(x | y))) pw.append(reduce(tc)) return pw keys = {} results = [] for model in db.models.find(): results.append([model['_id']] + [ set([y[0] for y in model[x]]) if isinstance(model[x][0], list ) else set(model[x]) for x in fields ])
print(r_atoms, p_atoms) raise ValueError('Unbalanced Reaction: %s' % rxn['MetaCyc ID']) if sorted(rxn['Reactants']) == sorted(rxn['Products']): raise ValueError('No Change: %s' % rxn['MetaCyc ID']) except ValueError as e: print(e) continue mine_db.insert_reaction(rxn) """reactions = pd.read_csv(csv_path, sep='\t', error_bad_lines=False).fillna("") for i, row in reactions.iterrows(): rxn = row[['MetaCyc ID']].to_dict() rxn['Metabolite'], rxn['Type'] = "", "" """ def add_metacyc_comps(metacyc_db, mine_db): c_ids = set(mine_db.reactions.distinct("Reactants.c_id")) c_ids |= set(mine_db.reactions.distinct("Products.c_id")) for _id in c_ids: if not mine_db.compounds.count({"_id": _id}): comp = metacyc_db.compounds.find_one({"_id": _id}) mine_db.compounds.insert(comp) if __name__ == '__main__': AllChem.WrapLogs() db = MINE(sys.argv[1]) hash_dict = dict_from_sdf(sys.argv[2]) add_metacyc_rxns(db, sys.argv[3], hash_dict) add_metacyc_comps(db, MINE(sys.argv[1]))
mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "MolFiles Imported", "Filepath": target }) if __name__ == '__main__': # User inputs task as first argument (export-sdf, export-smi, export-mol, # import-sdf, import-smi, or import-mol) TASK = sys.argv[1] # User inputs database name as second argument DB_NAME = sys.argv[2] # User inputs file path as third argument PATH = sys.argv[3] database = MINE(DB_NAME) # pylint: disable=invalid-name if TASK == 'export-sdf': # If a maximum molecules per file is specified (fourth argument # entered by user), then pass that to the export function. if len(sys.argv) == 5: export_sdf(database, PATH, int(sys.argv[4])) # Otherwise, assume an unlimited number of molecules per file else: export_sdf(database, PATH) elif TASK == 'export-smi': # If a maximum molecules per file is specified (fourth argument # entered by user), then pass that to the export function. if len(sys.argv) == 5: export_smiles(database, PATH, int(sys.argv[4])) # Otherwise, assume an unlimited number of molecules per file else:
def delete_database(name): """Delete database.""" mine = MINE(name) mine.client.drop_database(name) mine.client.close()
'cpd02857', 'cpd00031', 'cpd00038', 'cpd00126', 'cpd00241', 'cpd00295', 'cpd02552', 'cpd00338', 'cpd00683', 'cpd00171', 'cpd00198', 'cpd00238', 'cpd01977', 'cpd00051', 'cpd02069', ] db = MINE('plant_spontanious') rxn_ids = set() for cpd_id in top_30: cpd = db.compounds.find_one({"DB_links": { 'Model_SEED': cpd_id }}, {'Reactant_in': 1}) if cpd: print(cpd_id) rxn_ids.update(cpd.get('Reactant_in')) else: print("Can't find: {}".format(cpd_id)) print("Printing {} rxns".format(len(rxn_ids))) export_inchi_rxns(db, "./", list(rxn_ids))
if cleanup: os.remove(os.path.join(result_dir, spec_file)) if __name__ == "__main__": # pylint: disable=invalid-name # collect user input if sys.argv[1] == "calculate": db_name = sys.argv[2] file_dir = sys.argv[3] job_comp_number = int(sys.argv[4]) spec_type = sys.argv[5] if len(sys.argv) == 7: job_template = sys.argv[6] else: job_template = None db = MINE(db_name) start_cfm_jobs(file_dir, db, spec_type, job_template=job_template, job_comp_number=job_comp_number) if sys.argv[1] == 'load': result_dir = sys.argv[2] spec_type = sys.argv[3] dbs = [MINE(x) for x in sys.argv[4:]] load_cfm_results(result_dir, dbs, spec_type=spec_type)