def test_predict_compound_names(self): try: self.args.test_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'delaney_toy_smiles_names.csv') self.args.use_compound_names = True modify_predict_args(self.args) make_predictions(self.args) except: self.fail('predict_compound_names')
def predict_outside(args_dict): """ Used for calling this script from another python script. :dict args_dict: dict of args to use """ sys.argv = create_args(args_dict, 'predict.py') args = PredictArgs().parse_args() make_predictions(args)
def mlCharges(mols): """ Calc ML charges """ #MUST use mols with hydrogens! smiles = [Chem.MolToSmiles(mol) for mol in mols] path = pkg_resources.resource_filename(__name__, 'QM_137k.pt') args = Namespace(batch_size=50, checkpoint_dir=None, checkpoint_path=path, checkpoint_paths=[path], cuda=False, features_generator=None, features_path=None, gpu=None, max_data_size=None, no_features_scaling=False, preds_path=None, test_path=None, use_compound_names=False) with open(os.devnull, 'w') as devnull: with contextlib.redirect_stdout(devnull): test_preds, test_smiles = make_predictions(args, smiles=smiles) n_atoms, n_bonds = zip(*[num_atoms_bonds(x) for x in smiles]) partial_charge = test_preds[0] partial_charge = np.split(partial_charge.flatten(), np.cumsum(np.array(n_atoms)))[:-1] charges = [] for i, mol in enumerate(mols): try: reorder_list = get_reorder_list(mol) charges.append([ partial_charge[i][reorder_list[x]] for x in range(mol.GetNumAtoms()) ]) except ValueError: #Could not get prediction, default to Gasteiger print( "Warning: could not obtain prediction, defaulting to Gasteiger charges for one molecule" ) AllChem.ComputeGasteigerCharges(mol) charges.append([ a.GetDoubleProp('_GasteigerCharge') for a in mol.GetAtoms() ]) return charges
def predict(): """Renders the predict page and makes predictions if the method is POST.""" if request.method == 'GET': return render_predict() # Get arguments ckpt_id = request.form['checkpointName'] if request.form['textSmiles'] != '': smiles = request.form['textSmiles'].split() elif request.form['drawSmiles'] != '': smiles = [request.form['drawSmiles']] else: # Upload data file with SMILES data = request.files['data'] data_name = secure_filename(data.filename) data_path = os.path.join(app.config['TEMP_FOLDER'], data_name) data.save(data_path) # Check if header is smiles possible_smiles = get_header(data_path)[0] smiles = [possible_smiles ] if Chem.MolFromSmiles(possible_smiles) is not None else [] # Get remaining smiles smiles.extend(get_smiles(data_path)) models = db.get_models(ckpt_id) model_paths = [ os.path.join(app.config['CHECKPOINT_FOLDER'], f'{model["id"]}.pt') for model in models ] task_names = load_task_names(model_paths[0]) num_tasks = len(task_names) gpu = request.form.get('gpu') train_args = load_args(model_paths[0]) # Build arguments arguments = [ '--test_path', 'None', '--preds_path', os.path.join(app.config['TEMP_FOLDER'], app.config['PREDICTIONS_FILENAME']), '--checkpoint_paths', *model_paths ] if gpu is not None: if gpu == 'None': arguments.append('--no_cuda') else: arguments += ['--gpu', gpu] # Handle additional features if train_args.features_path is not None: # TODO: make it possible to specify the features generator if trained using features_path arguments += [ '--features_generator', 'rdkit_2d_normalized', '--no_features_scaling' ] elif train_args.features_generator is not None: arguments += ['--features_generator', *train_args.features_generator] if not train_args.features_scaling: arguments.append('--no_features_scaling') # Parse arguments args = PredictArgs().parse_args(arguments) # Run predictions preds = make_predictions(args=args, smiles=smiles) if all(p is None for p in preds): return render_predict(errors=['All SMILES are invalid']) # Replace invalid smiles with message invalid_smiles_warning = 'Invalid SMILES String' preds = [ pred if pred is not None else [invalid_smiles_warning] * num_tasks for pred in preds ] return render_predict( predicted=True, smiles=smiles, num_smiles=min(10, len(smiles)), show_more=max(0, len(smiles) - 10), task_names=task_names, num_tasks=len(task_names), preds=preds, warnings=["List contains invalid SMILES strings"] if None in preds else None, errors=["No SMILES strings given"] if len(preds) == 0 else None)
def predict_desc(args): import chemprop chemprop_root = os.path.dirname(os.path.dirname(chemprop.__file__)) #trick chemprop args.test_path = 'foo' args.preds_path = 'foo' args.checkpoint_path = os.path.join(chemprop_root, 'trained_model', 'QM_137k.pt') modify_predict_args(args) def num_atoms_bonds(smiles): m = Chem.MolFromSmiles(smiles) m = Chem.AddHs(m) return len(m.GetAtoms()), len(m.GetBonds()) # predict descriptors for reactants in the reactions reactivity_data = pd.read_csv(args.data_path, index_col=0) reactants = set() for _, row in reactivity_data.iterrows(): rs, _, _ = row['rxn_smiles'].split('>') rs = rs.split('.') for r in rs: reactants.add(r) reactants = list(reactants) print('Predicting descriptors for reactants...') test_preds, test_smiles = make_predictions(args, smiles=reactants) partial_charge = test_preds[0] partial_neu = test_preds[1] partial_elec = test_preds[2] NMR = test_preds[3] bond_order = test_preds[4] bond_distance = test_preds[5] n_atoms, n_bonds = zip(*[num_atoms_bonds(x) for x in reactants]) partial_charge = np.split(partial_charge.flatten(), np.cumsum(np.array(n_atoms)))[:-1] partial_neu = np.split(partial_neu.flatten(), np.cumsum(np.array(n_atoms)))[:-1] partial_elec = np.split(partial_elec.flatten(), np.cumsum(np.array(n_atoms)))[:-1] NMR = np.split(NMR.flatten(), np.cumsum(np.array(n_atoms)))[:-1] bond_order = np.split(bond_order.flatten(), np.cumsum(np.array(n_bonds)))[:-1] bond_distance = np.split(bond_distance.flatten(), np.cumsum(np.array(n_bonds)))[:-1] df = pd.DataFrame({ 'smiles': reactants, 'partial_charge': partial_charge, 'fukui_neu': partial_neu, 'fukui_elec': partial_elec, 'NMR': NMR, 'bond_order': bond_order, 'bond_length': bond_distance }) invalid = check_chemprop_out(df) # FIXME remove invalid molecules from reaction dataset print(invalid) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) df.to_pickle(os.path.join(args.output_dir, 'reactants_descriptors.pickle')) save_dir = args.model_dir if not args.predict: df, scalers = min_max_normalize(df) pickle.dump(scalers, open(os.path.join(save_dir, 'scalers.pickle'), 'wb')) else: scalers = pickle.load( open(os.path.join(save_dir, 'scalers.pickle'), 'rb')) df, _ = min_max_normalize(df, scalers) df.to_pickle( os.path.join(args.output_dir, 'reactants_descriptors_norm.pickle')) return df
def num_atoms_bonds(smiles): m = Chem.MolFromSmiles(smiles) m = Chem.AddHs(m) return len(m.GetAtoms()), len(m.GetBonds()) if __name__ == '__main__': args = parse_predict_args() test_df = pd.read_csv(args.test_path, index_col=0) smiles = test_df.smiles.tolist() start = time.time() test_preds, test_smiles = make_predictions(args, smiles=smiles) end = time.time() print('time:{}s'.format(end - start)) partial_charge = test_preds[0] partial_neu = test_preds[1] partial_elec = test_preds[2] NMR = test_preds[3] bond_order = test_preds[4] bond_distance = test_preds[5] n_atoms, n_bonds = zip(*[num_atoms_bonds(x) for x in smiles]) partial_charge = np.split(partial_charge.flatten(),
def test_predict(self): try: modify_predict_args(self.args) make_predictions(self.args) except: self.fail('predict')
# -*- coding: utf-8 -*- """ Created on Wed Dec 4 19:42:36 2019 @author: SY """ import warnings warnings.filterwarnings('ignore') import pandas as pd from chemprop.parsing import parse_train_args, modify_train_args from chemprop.train import make_predictions if __name__ == '__main__': args = parse_train_args() # args.checkpoint_dir = './ckpt' modify_train_args(args) df = pd.read_csv(args.data_path) pred, smiles = make_predictions(args, df.smiles.tolist()) df = pd.DataFrame({'smiles': smiles}) for i in range(len(pred[0])): df[f'pred_{i}'] = [item[i] for item in pred] df.to_csv(f'./predict.csv', index=False)
"""Loads a trained model checkpoint and makes predictions on a dataset.""" from chemprop.parsing import parse_predict_args from chemprop.train import make_predictions if __name__ == '__main__': args = parse_predict_args() make_predictions(args)