# Resolver global, by value, by id. ('log', {'key': None}), ('log', {'key_id': None}), # Resolver conf, by value, by id. ('log2', {'x_id': None}), ('log2', {'x': None}), ], 'priority': 2, } }, } if __name__ == '__main__': # Execute configuration(s). objects = pycnfg.run(CNFG, resolve_none=True) # => 42 # => 42 # => 42 # => 42 # => 42 # => 42 # Storage for produced object(s). print(objects) # => {'logger__default': <Logger default (INFO)>, # 'path__default': 'pycnfg/examples/complex', # 'x__1': 'a', # 'x__2': 'c', # 'y__conf': {'b': 2, 'c': 42, 'print': 252}}
} }, 'key': { 'conf': { 'init': 'b', }, }, 'val': { 'conf': { 'init': 24, }, 'conf2': { 'init': '42', } }, } if __name__ == '__main__': for cnfg in [CNFG_1, CNFG_2, CNFG_3, CNFG_4]: # Execute configuration(s). objects = pycnfg.run(cnfg, dcnfg={}, update_expl=True, resolve_none=True) # => 42 # Storage for produced object(s). print(objects['section_id__configuration_id']) # => {'a': 7, 'b': 42} print('=' * 79)
'return_train_score': True, }, }), # Validate 'sgd' pipeline on 'train' and 'test' subsets of # 'train' dataset with 'r2' scorer (after optimization). ('validate', { 'pipeline_id': 'pipeline__sgd', 'dataset_id': 'dataset__train', 'subset_id': ['train', 'test'], 'metric_id': ['metric__r2', 'metric__mse'], }), # Predict with 'sgd' pipeline on whole 'test' dataset. ('predict', { 'pipeline_id': 'pipeline__sgd', 'dataset_id': 'dataset__test', 'subset_id': '', }), # Dump 'sgd' pipeline on disk. ('dump', { 'pipeline_id': 'pipeline__sgd', 'dirpath': None }), ], }, }, } if __name__ == '__main__': objects = pycnfg.run(CNFG, dcnfg={})
}, }, # Separate section for 'gs_params' kwarg. 'gs_params': { 'conf': { 'priority': 3, 'init': { 'n_iter': None, 'n_jobs': 1, 'refit': 'metric__r2', 'cv': sklearn.model_selection.KFold(n_splits=3, shuffle=True, random_state=42), 'verbose': 1, 'pre_dispatch': 'n_jobs', 'return_train_score': True, }, }, }, } if __name__ == '__main__': objects = pycnfg.run(CNFG, dcnfg=mlshell.CNFG)
'y': { 'conf': { 'init': {'b': 2, 'c': 42}, 'producer': CustomProducer, 'steps': [ ('__init__', {'path_id': 'path__default', 'logger_id': 'logger__default'}), ('set', {'key': 'x__1', 'val': 7}), ('log', {'key': 'x__2'}), ('log', {'key_id': 'x__2'}), ], 'priority': 2, } }, } if __name__ == '__main__': # Execute configuration(s). objects = pycnfg.run(CNFG) # => 42 # => 42 # Storage for produced object(s). print(objects) # => {'logger__default': <Logger default (INFO)>, # 'path__default': 'pycnfg/examples/complex', # 'x__1': 'a', # 'x__2': 'c', # 'y__conf': {'b': 2, 'c': 42, 'a': 7}}
def test_run(id_, args, kwargs, expected): """ - Delete previous test output if exist. - Start mlshell.run.py. - Check current output with original. """ # Remove results for test. results_path = expected['results_path'] shutil.rmtree(results_path, ignore_errors=True) # Start code. # [future] attempts to run classification with n_jobs>1 # global cnfg_default # sys.modules['cnfg_default'] = get_params(args[0], 'cnfg_default') # import cnfg_default # #from cnfg_default import custom_score_metric objects = pycnfg.run(oid='default', *args, **kwargs) tmp = {k: type(v).__name__ for k, v in objects.items()} print('OBJECTS:') print(tmp) # Compare results: # * Compare objects (keys and str of values). objects_ = expected['objects'] objects = {k: type(v).__name__ for k, v in objects.items()} if objects != objects_: print(set(objects.items()) ^ set(objects_.items())) assert objects == objects_ # for k, v in objects.items(): # assert k in objects_ # assert type(v).__name__ == objects_[k] # * Compare predictions csv(all available). pred_path = glob.glob(f"{results_path}/models/*_pred.csv") pred_path_ = glob.glob(expected['pred_path']) assert len(pred_path) == len(pred_path_) for act, exp in zip(sorted(pred_path), sorted(pred_path_)): file_diff(act, exp) assert filecmp.cmp(act, exp) # * Compare test logs. logs_path = glob.glob(f"{results_path}/logs*/*_test.log")[0] logs_path_ = expected['logs_path'] file_diff(logs_path, logs_path_) assert filecmp.cmp(logs_path, logs_path_) # * Compare runs dataframe, non-universe columns. runs_path = f"{results_path}/runs" runs_path_ = expected['runs_path'] df = runs_loader(runs_path) df_ = runs_loader(runs_path_) # First False/True for each element, then check all by columns. # col1 True # col2 False # dtype: bool df_diff = df.eq(df_).all() # Column names that are not equal. columns = sorted(list(df_diff[df_diff == False].dropna().index)) # columns_eq = sorted(list(df_diff[df_diff==True].dropna().index)) columns_ = expected['columns_diff'] print('DIFF:\n', columns) time.sleep(1) # assert columns == columns_ # * Compare model. model_path = glob.glob(f"{results_path}/models/*.model") model_path_ = glob.glob(expected['model_path']) assert len(model_path) == len(model_path_) for act, exp in zip(sorted(model_path), sorted(model_path_)): assert filecmp.cmp(act, exp) return # Otherwise can`t pickle when n_jobs>1, need import to address scope. # custom_score_metric = get_params('classification/conf.py', # 'custom_score_metric') # sys.modules[f'custom_score_metric'] = custom_score_metric
def test_run(id_, args, kwargs, expected): objects = pycnfg.run(*args, **kwargs) assert objects == expected, f"test={id_}"