def get_configs_and_model(folder_path): """Go through folder with results and retrieve configs and the pickled model.""" configs = [os.path.join(folder_path, cfg) for cfg in os.listdir(folder_path) if 'cfg' in cfg] data_cfg = parse_data_config([dc for dc in configs if 'rat' in dc or 'human' in dc][0]) repr_cfg = parse_representation_config([rc for rc in configs if 'maccs' in rc or 'morgan' in rc or 'krfp' in rc][0]) task_cfg = parse_task_config([tc for tc in configs if 'regression' in tc or 'classification' in tc][0]) model_cfg = parse_model_config([mc for mc in configs if 'nb.cfg' in mc or 'svm.cfg' in mc or 'trees.cfg' in mc][0]) model_pickle = [os.path.join(folder_path, pkl) for pkl in os.listdir(folder_path) if 'model.pickle' in pkl][0] return data_cfg, repr_cfg, task_cfg, model_cfg, model_pickle
import numpy as np import pandas as pd from metstab_shap.grid import SVC_rbf from metstab_shap.config import utils_section, csv_section from metstab_shap.config import parse_data_config, parse_representation_config, parse_task_config from metstab_shap.data import load_data # load data (and change to classification if needed) data_cfg = parse_data_config('configs/data/human.cfg') repr_cfg = parse_representation_config('configs/repr/krfp.cfg') task_cfg = parse_task_config('configs/task/classification.cfg') x, y, _, test_x, test_y, smiles, test_smiles = load_data( data_cfg, **repr_cfg[utils_section]) # change y in case of classification if 'classification' == task_cfg[utils_section]['task']: log_scale = True if 'log' == data_cfg[csv_section]['scale'].lower().strip( ) else False y = task_cfg[utils_section]['cutoffs'](y, log_scale) test_y = task_cfg[utils_section]['cutoffs'](test_y, log_scale) training_features = x training_target = y testing_features = test_x # Average CV score on the training set was: 0.8592512301458379 exported_pipeline = SVC_rbf(C=5.0, cache_size=100, gamma=0.01, kernel="rbf", max_iter=2000,
import pandas as pd from sklearn.ensemble import ExtraTreesRegressor from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import Binarizer, MinMaxScaler from sklearn.tree import DecisionTreeRegressor from tpot.builtins import StackingEstimator, ZeroCount from tpot.export_utils import set_param_recursive from metstab_shap.config import utils_section, csv_section from metstab_shap.config import parse_data_config, parse_representation_config, parse_task_config from metstab_shap.data import load_data # load data (and change to classification if needed) data_cfg = parse_data_config('configs/data/rat.cfg') repr_cfg = parse_representation_config('configs/repr/maccs.cfg') task_cfg = parse_task_config('configs/task/regression.cfg') x, y, _, test_x, test_y, smiles, test_smiles = load_data( data_cfg, **repr_cfg[utils_section]) training_features = x training_target = y testing_features = test_x # Average CV score on the training set was: -0.15289999993179348 exported_pipeline = make_pipeline( ZeroCount(), MinMaxScaler(), StackingEstimator(estimator=DecisionTreeRegressor(max_depth=5, max_features=0.25, min_samples_leaf=3, min_samples_split=14, splitter="best")),
saving_dir = sys.argv[6] try: os.makedirs(saving_dir) except FileExistsError: pass # setup logger (everything that goes through logger or stderr will be saved in a file and sent to stdout) logger_wrapper = LoggerWrapper(saving_dir) sys.stderr.write = logger_wrapper.log_errors logger_wrapper.logger.info(f'Running {sys.argv[1:-1]}') # Load configs model_cfg = parse_model_config(sys.argv[1]) data_cfg = parse_data_config(sys.argv[2]) repr_cfg = parse_representation_config(sys.argv[3]) task_cfg = parse_task_config(sys.argv[4]) tpot_cfg = parse_tpot_config(sys.argv[5]) save_configs(sys.argv[1:-1], saving_dir) # # Nicely handling interruptions from neptune UI def neptune_aborter(): # closes TPOT from UI, the best found pipeline will be saved logging.getLogger('').info("neptune_aborter: sending Ctrl + C.") os.kill(os.getpid(), signal.SIGINT) # nexp = None # uncomment if you don't want to use Neptune # leave the code below uncommented if you want to use Neptune nexp = neptune.create_experiment( name=saving_dir, params={ 'dataset':