def main(): models = ModelService() datasets = DatasetService() query = { "dataset": "merged_new", "target": "class" } all_models = models.query_models(query=query) for m in all_models: ds = datasets.get_dataset(name=m.dataset, symbol=m.symbol) fs = DatasetService.get_feature_selection(ds=ds, method='importances_shap', target=m.target) if not fs: logging.error(f"Dataset {m.dataset}{m.symbol} -> {m.target} does not have feature selection") continue if not m.parameters: logging.error(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} does not have parameters") continue for mp in m.parameters: count = 0 for f in mp.features: if not f in fs.features: logging.error(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} parameter search done without fixing features!") else: count += 1 logging.info(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} GRIDSEARCH {mp.parameter_search_method} done with {count} features")
def main(queryfile: str, features: Optional[str] = None, halving: Optional[bool] = False, save: Optional[bool] = True): service = GridSearchService() models = ModelService() with open(queryfile, 'r') as f: query = json.load(f) search_models = models.query_models(query) logging.info("[i] {} models to train".format(len(search_models))) for i, m in enumerate(search_models): # if m.parameters: # logging.info("==[{}/{}]== MODEL: {} {} {} {} ==> SKIP".format(i+1, len(search_models), m.symbol, m.dataset, m.target, m.pipeline)) # continue # Skip this as search has already been performed logging.info("==[{}/{}]== MODEL: {} {} {} {} =====".format( i + 1, len(search_models), m.symbol, m.dataset, m.target, m.pipeline)) mp = service.create_parameters_search(m, split=0.7, features=features) logging.info("[{}] Start random search".format(get_timestamp())) mp = service.random_search(m, mp, sync=True, verbose=1, n_jobs=8, halving=halving, save=save) logging.info("[{}] End random search".format(get_timestamp()))
def grid_search_batch(batch: Optional[str] = None, task_key: Optional[str] = None, split: Optional[float] = 0.7, query: dict = Body(...), model_service: ModelService = Depends(ModelService), service: GridSearchService = Depends(GridSearchService), tasks: TaskService = Depends(TaskService)): try: models = model_service.query_models(query) tests = [(model, service.create_parameters_search(model, split, task_key=task_key)) for model in models] return [ tasks.send(task_name='gridsearch', task_args={ 'model': model.dict(), 'search_parameters': search_parameters.dict() }, name='grid_search-{}-{}-{}-{}'.format( model.symbol, model.pipeline, model.dataset, model.target), batch=batch, countdown=30) for model, search_parameters in tests ] except MessageException as e: raise HTTPException(status_code=400, detail=e.message)
def feature_selection_batch( method: str, batch: Optional[str] = None, task_key: Optional[str] = None, split: Optional[float] = 0.7, query: dict = Body(...), model_service: ModelService = Depends(ModelService), service: FeatureSelectionService = Depends(FeatureSelectionService), tasks: TaskService = Depends(TaskService)): try: models = model_service.query_models(query) # This will only keep 1 copy for each (symbol, dataset, target) tuple d_models = { '{}-{}-{}'.format(m.symbol, m.dataset, m.target): m for m in models } models = [v for k, v in d_models.items()] def get_name_from_model(_model): return 'feature_selection-{}-{}-{}-{}'.format( _model.symbol, _model.pipeline, _model.dataset, _model.target) tests = [(model, service.create_features_search(model, split, method, task_key=task_key)) for model in models] return [ tasks.send(task_name='featureselection', task_args={ 'model': model.dict(), 'search_parameters': search_parameters.dict() }, name=get_name_from_model(model), batch=batch, countdown=30) for i, (model, search_parameters) in enumerate(tests) ] except MessageException as e: raise HTTPException(status_code=400, detail=e.message)
def main(queryfile: str, features: Optional[str] = None, parameters: Optional[str] = None, save: Optional[bool] = True): models = ModelService() with open(queryfile, 'r') as f: query = json.load(f) if save: models.clear_tests(query) test_models = models.query_models(query) logging.info("[i] {} models to test".format(len(test_models))) failed = [] for i, m in enumerate(test_models): logging.info("==[{}/{}]== MODEL: {} {} {} {} =====".format( i + 1, len(test_models), m.symbol, m.dataset, m.target, m.pipeline)) #t1 = models.create_model_test(model=m, split=0.7, step={'days': 1}, window={'days': 60}, parameters=parameters, features=features) t2 = models.create_model_test(model=m, split=0.7, step={'days': 1}, window={'days': 90}, parameters=parameters, features=features) t3 = models.create_model_test(model=m, split=0.7, step={'days': 1}, window={'days': 180}, parameters=parameters, features=features) t4 = models.create_model_test(model=m, split=0.7, step={'days': 1}, window={'days': 240}, parameters=parameters, features=features) try: # Test T1 # logging.info("[{}] {} Start T1".format(get_timestamp(), m.symbol)) # models.test_model(m, t1, sync=True) # Test T2 logging.info("[{}] {} Start T2".format(get_timestamp(), m.symbol)) models.test_model(m, t2, sync=True) # Test T3 logging.info("[{}] {} Start T3".format(get_timestamp(), m.symbol)) models.test_model(m, t3, sync=True) logging.info("[{}] {} Start T4".format(get_timestamp(), m.symbol)) models.test_model(m, t4, sync=True) except MessageException as e: logging.error("[!] " + e.message) # failed.append((m.dict(), t1.dict(), t2.dict(), t3.dict())) failed.append((m.dict(), t2.dict(), t3.dict(), t4.dict())) pass except Exception as e: logging.exception("[!] " + str(e)) # failed.append((m.dict(), t1.dict(), t2.dict(), t3.dict())) failed.append((m.dict(), t2.dict(), t3.dict(), t4.dict())) pass logging.info("[{}] Done".format(m.symbol)) with open('test-failed.json', 'w') as f: json.dump(failed, f)