示例#1
0
def main():
    models = ModelService()
    datasets = DatasetService()
    query = {
        "dataset": "merged_new",
        "target": "class"
    }
    all_models = models.query_models(query=query)
    for m in all_models:
        ds = datasets.get_dataset(name=m.dataset, symbol=m.symbol)
        fs = DatasetService.get_feature_selection(ds=ds, method='importances_shap', target=m.target)
        if not fs:
            logging.error(f"Dataset {m.dataset}{m.symbol} -> {m.target} does not have feature selection")
            continue

        if not m.parameters:
            logging.error(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} does not have parameters")
            continue

        for mp in m.parameters:
            count = 0
            for f in mp.features:
                if not f in fs.features:
                    logging.error(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} parameter search done without fixing features!")
                else:
                    count += 1
            logging.info(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} GRIDSEARCH {mp.parameter_search_method} done with {count} features")
示例#2
0
def main(queryfile: str,
         features: Optional[str] = None,
         halving: Optional[bool] = False,
         save: Optional[bool] = True):
    service = GridSearchService()
    models = ModelService()
    with open(queryfile, 'r') as f:
        query = json.load(f)

    search_models = models.query_models(query)
    logging.info("[i] {} models to train".format(len(search_models)))
    for i, m in enumerate(search_models):
        # if m.parameters:
        #     logging.info("==[{}/{}]== MODEL: {} {} {} {} ==> SKIP".format(i+1, len(search_models), m.symbol, m.dataset, m.target, m.pipeline))
        #     continue  # Skip this as search has already been performed
        logging.info("==[{}/{}]== MODEL: {} {} {} {} =====".format(
            i + 1, len(search_models), m.symbol, m.dataset, m.target,
            m.pipeline))
        mp = service.create_parameters_search(m, split=0.7, features=features)
        logging.info("[{}] Start random search".format(get_timestamp()))
        mp = service.random_search(m,
                                   mp,
                                   sync=True,
                                   verbose=1,
                                   n_jobs=8,
                                   halving=halving,
                                   save=save)
        logging.info("[{}] End random search".format(get_timestamp()))
示例#3
0
def grid_search_batch(batch: Optional[str] = None,
                      task_key: Optional[str] = None,
                      split: Optional[float] = 0.7,
                      query: dict = Body(...),
                      model_service: ModelService = Depends(ModelService),
                      service: GridSearchService = Depends(GridSearchService),
                      tasks: TaskService = Depends(TaskService)):
    try:
        models = model_service.query_models(query)
        tests = [(model,
                  service.create_parameters_search(model,
                                                   split,
                                                   task_key=task_key))
                 for model in models]
        return [
            tasks.send(task_name='gridsearch',
                       task_args={
                           'model': model.dict(),
                           'search_parameters': search_parameters.dict()
                       },
                       name='grid_search-{}-{}-{}-{}'.format(
                           model.symbol, model.pipeline, model.dataset,
                           model.target),
                       batch=batch,
                       countdown=30) for model, search_parameters in tests
        ]
    except MessageException as e:
        raise HTTPException(status_code=400, detail=e.message)
示例#4
0
def feature_selection_batch(
    method: str,
    batch: Optional[str] = None,
    task_key: Optional[str] = None,
    split: Optional[float] = 0.7,
    query: dict = Body(...),
    model_service: ModelService = Depends(ModelService),
    service: FeatureSelectionService = Depends(FeatureSelectionService),
    tasks: TaskService = Depends(TaskService)):
    try:
        models = model_service.query_models(query)
        # This will only keep 1 copy for each (symbol, dataset, target) tuple
        d_models = {
            '{}-{}-{}'.format(m.symbol, m.dataset, m.target): m
            for m in models
        }
        models = [v for k, v in d_models.items()]

        def get_name_from_model(_model):
            return 'feature_selection-{}-{}-{}-{}'.format(
                _model.symbol, _model.pipeline, _model.dataset, _model.target)

        tests = [(model,
                  service.create_features_search(model,
                                                 split,
                                                 method,
                                                 task_key=task_key))
                 for model in models]
        return [
            tasks.send(task_name='featureselection',
                       task_args={
                           'model': model.dict(),
                           'search_parameters': search_parameters.dict()
                       },
                       name=get_name_from_model(model),
                       batch=batch,
                       countdown=30)
            for i, (model, search_parameters) in enumerate(tests)
        ]
    except MessageException as e:
        raise HTTPException(status_code=400, detail=e.message)
示例#5
0
def main(queryfile: str,
         features: Optional[str] = None,
         parameters: Optional[str] = None,
         save: Optional[bool] = True):
    models = ModelService()
    with open(queryfile, 'r') as f:
        query = json.load(f)
    if save:
        models.clear_tests(query)
    test_models = models.query_models(query)
    logging.info("[i] {} models to test".format(len(test_models)))
    failed = []
    for i, m in enumerate(test_models):
        logging.info("==[{}/{}]== MODEL: {} {} {} {} =====".format(
            i + 1, len(test_models), m.symbol, m.dataset, m.target,
            m.pipeline))
        #t1 = models.create_model_test(model=m, split=0.7, step={'days': 1}, window={'days': 60}, parameters=parameters, features=features)
        t2 = models.create_model_test(model=m,
                                      split=0.7,
                                      step={'days': 1},
                                      window={'days': 90},
                                      parameters=parameters,
                                      features=features)
        t3 = models.create_model_test(model=m,
                                      split=0.7,
                                      step={'days': 1},
                                      window={'days': 180},
                                      parameters=parameters,
                                      features=features)
        t4 = models.create_model_test(model=m,
                                      split=0.7,
                                      step={'days': 1},
                                      window={'days': 240},
                                      parameters=parameters,
                                      features=features)
        try:
            # Test T1
            # logging.info("[{}] {} Start T1".format(get_timestamp(), m.symbol))
            # models.test_model(m, t1, sync=True)
            # Test T2
            logging.info("[{}] {} Start T2".format(get_timestamp(), m.symbol))
            models.test_model(m, t2, sync=True)
            # Test T3
            logging.info("[{}] {} Start T3".format(get_timestamp(), m.symbol))
            models.test_model(m, t3, sync=True)
            logging.info("[{}] {} Start T4".format(get_timestamp(), m.symbol))
            models.test_model(m, t4, sync=True)
        except MessageException as e:
            logging.error("[!] " + e.message)
            # failed.append((m.dict(), t1.dict(), t2.dict(), t3.dict()))
            failed.append((m.dict(), t2.dict(), t3.dict(), t4.dict()))
            pass
        except Exception as e:
            logging.exception("[!] " + str(e))
            # failed.append((m.dict(), t1.dict(), t2.dict(), t3.dict()))
            failed.append((m.dict(), t2.dict(), t3.dict(), t4.dict()))
            pass

        logging.info("[{}] Done".format(m.symbol))
    with open('test-failed.json', 'w') as f:
        json.dump(failed, f)