示例#1
0
def test_json_all(arg):
    log_info_repo(arg)
    # log("os.getcwd", os.getcwd())
    root = os_package_root_path()
    root = root.replace("\\", "//")
    log(root)
    path = str( os.path.join(root, "dataset/json/") )
    log(path)

    log("############ List of files ################################")
    #model_list = get_recursive_files2(root, r'/*/*.ipynb')
    model_list  = get_recursive_files2(path, r'/*/.json')
    model_list2 = get_recursive_files2(path, r'/*/*.json')
    model_list  = model_list + model_list2
    print("List of JSON Files", model_list)


    for js_file in model_list:
        log("\n\n\n", "************", "JSON File", js_file)
        cfg = json.load(open(js_file, mode='r'))
        for kmode, ddict in cfg.items():
            cmd = f"ml_models --do fit --config_file {js_file}  --config_mode {kmode} "
            log_separator()
            log( cmd)
            os.system(cmd)
示例#2
0
def test_import(arg=None):
    #import tensorflow as tf
    #import torch

    #log(np, np.__version__)
    #log(tf, tf.__version__)    #### Import internally Create Issues
    #log(torch, torch.__version__)
    #log(dsa2)

    from importlib import import_module

    log_info_repo(arg)

    block_list = ["raw"]
    log_separator()
    log("test_import")

    file_list = os_get_file(folder=None, block_list=[], pattern=r"/*.py")
    print(file_list)

    for f in file_list:
        try:
            f = "dsa2." + f.replace("\\", ".").replace(".py", "").replace("/", ".")

            import_module(f)
            print(f)
        except Exception as e:
            log("Error", f, e)
示例#3
0
def register(run_name,
             params,
             metrics,
             signature,
             model_class,
             tracking_uri="sqlite:///local.db"):
    """
    :run_name: Name of model
    :log_params: dict with model params
    :metrics: dict with model evaluation metrics
    :signature: Its a signature that describes model input and output Schema
    :model_class: Type of class model
    :return:
    """
    mlflow.set_tracking_uri(tracking_uri)
    with mlflow.start_run(run_name=run_name) as run:
        run_id = run.info.run_uuid
        experiment_id = run.info.experiment_id

        sk_model = load(params['path_train_model'] + "/model.pkl")
        mlflow.log_params(params)

        metrics.apply(lambda x: mlflow.log_metric(x.metric_name, x.metric_val),
                      axis=1)

        mlflow.sklearn.log_model(sk_model,
                                 run_name,
                                 signature=signature,
                                 registered_model_name="sklearn_" + run_name +
                                 "_" + model_class)

        log("MLFLOW identifiers", run_id, experiment_id)

    mlflow.end_run()
示例#4
0
def log_remote_start(arg=None):
   ## Download remote log on disk
   s = """ cd /home/runner/work/dsa2/  && git clone [email protected]:arita37/logs.git  &&  ls && pwd
       """

   cmd = " ; ".join(s.split("\n"))
   log(cmd)
   os.system(cmd)
示例#5
0
def test_list(mlist):
    #log("os.getcwd", os.getcwd())

    path = dsa2.__path__[0]
    # mlist = str_list.split(",")
    test_list = [f"python {path}/{model}" for model in mlist]

    for cmd in test_list:
          log_separator()
          log( cmd)
          os.system(cmd)
示例#6
0
def main():
    arg = cli_load_arguments()
    log(arg.do, arg.config_file, arg)

    #### Input is String list of model name
    if ".py" in arg.do:
        s = arg.do
        test_list(s.split(","))

    else:
        log("ml_test --do " + arg.do)
        globals()[arg.do](arg)
示例#7
0
文件: core_run.py 项目: Ruhul964/dsa2
    def objective_fun(mdict):
        if debug : log(mdict)#
        ddict       = run_train(config_name="", config_path="", n_sample= n_sample,
                                mode="run_preprocess", model_dict=mdict,
                                return_mode='dict')

        # print(ddict['stats']['metrics_test'].to_dict('records')[0])
        ddict['stats'][metric_name] = ddict['stats']['metrics_test'].to_dict('records')[0]['metric_val']

        if debug : print(ddict)
        res = ddict['stats'][metric_name]
        return res
示例#8
0
    def objective_fun(mdict):
        if debug : log(mdict)#
        ddict       = run_train(config_name="", config_path="", n_sample= n_sample,
                                mode="run_preprocess", model_dict=mdict,
                                return_mode='dict')

        # print(ddict['stats']['metrics_test'].to_dict('records')[0])
        #res = ddict['stats']['metrics_test'].to_dict('records')[0]['metric_val']
        df  =  ddict['stats']['metrics_test']

        #### Beware of the sign
        res = -np.mean(df[ df['metric_name'] == metric_name ]['metric_val'].values)
        return res
示例#9
0
文件: core_run.py 项目: Ruhul964/dsa2
def get_global_pars(config_uri=""):
  log("#### Model Params Dynamic loading  ##########################################")
  from source.util_feature import load_function_uri
  print("config_uri",config_uri)
  model_dict_fun = load_function_uri(uri_name=config_uri )

  #### Get dict + Update Global variables
  try :
     model_dict     = model_dict_fun()   ### params
  except :
     model_dict  = model_dict_fun

  return model_dict
示例#10
0
文件: run.py 项目: HammadTufail/dsa2
def train(config='', nsample=None):

    config_uri, config_name = get_config_path(config)

    mdict = get_global_pars(config_uri)
    m = mdict['global_pars']
    log(mdict)
    from source import run_train
    run_train.run_train(
        config_name=config_name,
        config_path=m['config_path'],
        n_sample=nsample if nsample is not None else m['n_sample'],
    )
示例#11
0
def test_all(arg=None):
    log_info_repo(arg)
    from time import sleep
    # log("os.getcwd", os.getcwd())

    path = dsa2.__path__[0]
    log("############Check model ################################")
    model_list = model_get_list(folder=None, block_list=[])
    log(model_list)

    ## Block list
    # root = os_package_root_path()
    cfg = json.load(open( path_norm(arg.config_file), mode='r'))['test_all']
    block_list = cfg['model_blocked']
    model_list = [t for t in model_list if t not in block_list]
    log("Used", model_list)

    path = path.replace("\\", "//")
    test_list = [f"python {path}/" + t.replace(".", "//") + ".py" for t in model_list]

    for cmd in test_list:
        log_separator()
        log( cmd)
        os.system(cmd)
        log_remote_push()
        sleep(5)
示例#12
0
文件: core_run.py 项目: Ruhul964/dsa2
def data_profile2(config=''):
    """
    :param config:
    :return:
    """
    config_uri, config_name = get_config_path(config)
    from source.run_feature_profile import run_profile
    mdict = get_global_pars( config_uri)
    m     = mdict['global_pars']
    log(mdict)

    run_profile(path_data   = m['path_data_train'],
               path_output  = m['path_model'] + "/profile/",  
               n_sample     = 5000,
              ) 
示例#13
0
def train_sampler(config='', nsample=None):
    """  train a model with  confi_name  and nsample
    :param config:
    :param nsample:
    :return:
    """
    config_uri, config_name = get_config_path(config)

    mdict = get_global_pars(  config_uri)
    m     = mdict['global_pars']
    log(mdict)
    from source import run_sampler
    run_sampler.run_train(config_name       =  config_name,
                        config_path       =  m['config_path'],
                        n_sample          =  nsample if nsample is not None else m['n_sample'],
                        # use_mlmflow       =  False
                        )
示例#14
0
def log_remote_push(arg=None):
   ### Pushing to dsa2_store   with --force
   # tag ="ml_store" & arg.name
   tag = "m_" + str(arg.name)
   s = f""" cd /home/runner/work/dsa2/dsa2_store/
           pip3 freeze > deps.txt
           ls
           git config --local user.email "*****@*****.**" && git config --local user.name "arita37"        
           git add --all &&  git commit -m "{tag}" 
           git pull --all     
           git push --all -f
           cd /home/runner/work/dsa2/dsa2/
       """

   cmd = " ; ".join(s.split("\n"))
   log(cmd)
   os.system(cmd)
示例#15
0
文件: run.py 项目: HammadTufail/dsa2
def preprocess(config='', nsample=None):
    """


    """
    config_uri, config_name = get_config_path(config)
    mdict = get_global_pars(config_uri)
    m = mdict['global_pars']
    log(mdict)

    from source import run_preprocess
    run_preprocess.run_preprocess(
        config_name=config_name,
        config_path=m['config_path'],
        n_sample=nsample if nsample is not None else m['n_sample'],

        ### Optonal
        mode='run_preprocess')
示例#16
0
文件: run.py 项目: HammadTufail/dsa2
def predict(config='', nsample=None):

    config_uri, config_name = get_config_path(config)

    mdict = get_global_pars(config_uri)
    m = mdict['global_pars']
    log(mdict)

    from source import run_inference
    run_inference.run_predict(
        config_name=config_name,
        config_path=m['config_path'],
        n_sample=nsample if nsample is not None else m['n_sample'],

        #### Optional
        path_data=m['path_pred_data'],
        path_output=m['path_pred_output'],
        model_dict=None)
示例#17
0
def test_dataloader(arg=None):
    log_info_repo(arg)
    # log("os.getcwd", os.getcwd())
    path = dsa2.__path__[0]
    cfg  = json_load(path_norm(arg.config_file))

    log("############Check model ################################")
    path = path.replace("\\", "//")
    test_list = [
       f"python {path}/dataloader.py --do test "   ,

       f"python {path}/preprocess/generic.py --do test "   ,

    ]

    for cmd in test_list:
          log_separator()
          log( cmd)
          os.system(cmd)
示例#18
0
def test_benchmark(arg=None):
    log_info_repo(arg)
    # log("os.getcwd", os.getcwd())

    path = dsa2.__path__[0]
    log("############Check model ################################")
    path = path.replace("\\", "//")
    test_list = [ f"python {path}/benchmark.py --do timeseries "   ,
                  f"python {path}/benchmark.py --do vision_mnist "   ,
                  f"python {path}/benchmark.py --do fashion_vision_mnist "   ,
                  f"python {path}/benchmark.py --do text_classification "   ,
                  f"python {path}/benchmark.py --do nlp_reuters "   ,

    ]

    for cmd in test_list:
        log_separator()
        print( cmd)
        os.system(cmd)
示例#19
0
def transform(config='', nsample=None):
    """
    :param config:
    :param nsample:
    :return:
    """
    config_uri, config_name = get_config_path(config)

    mdict = get_global_pars( config_uri)
    m     = mdict['global_pars']
    log(mdict)


    from source import run_sampler
    run_sampler.run_transform(config_name = config_name,
                              config_path = m['config_path'],
                              n_sample    = nsample if nsample is not None else m['n_sample'],

                              #### Optional
                              path_data   = m['path_pred_data'],
                              path_output = m['path_pred_output'],
                              model_dict  = None
                              )
示例#20
0
def test_model_structure():
    log("os.getcwd", os.getcwd())
    log(dsa2)

    path = dsa2.__path__[0]

    log("############Check structure ############################")
    cmd = f"ztest_structure.py"
    os.system(cmd)
示例#21
0
def test_cli(arg=None):
    log("# Testing Command Line System  ")
    log_info_repo(arg)

    import dsa2, os
    path = dsa2.__path__[0]   ### Root Path
    # if arg is None :
    #  fileconfig = path_norm( f"{path}/config/cli_test_list.md" )
    # else :
    #  fileconfig = path_norm(  arg.config_file )

    # fileconfig = path_norm( f"{path}/../config/cli_test_list.md" )
    fileconfig = path_norm( f"{path}/../README_usage_CLI.md" )
    print("Using :", fileconfig)

    def is_valid_cmd(cmd) :
       cmd = cmd.strip()
       if len(cmd) > 15 :
          if cmd.startswith("ml_models ") or cmd.startswith("ml_benchmark ") or cmd.startswith("ml_optim ")  :
              return True
       return False


    with open( fileconfig, mode="r" ) as f:
        cmd_list = f.readlines()
    print(cmd_list[:3])


    #### Parse the CMD from the file .md and Execute
    for ss in cmd_list:
        cmd = ss.strip()
        if is_valid_cmd(cmd):
          cmd =  cmd  + to_logfile("cli", '+%Y-%m-%d_%H')
          log_separator()
          print( cmd, flush=True)
          os.system(cmd)
示例#22
0
def hyperparam_wrapper(config_full="",
                       ntrials=2, n_sample=5000, debug=1,
                       path_output         = "data/output/titanic1/",
                       path_optuna_storage = 'data/output/optuna_hyper/optunadb.db',
                       metric_name='accuracy_score', mdict_range=None):

    from source.util_feature import load_function_uri
    from source.run_train import  run_train
    from source.run_hyperopt import run_hyper_optuna
    import json

    ##############################################################################
    ####### model_dict initial dict of params  ###################################
    config_name = config_full.split("::")[-1]
    mdict       = load_function_uri(config_full) #titanic1()
    mdict       = mdict()

    ####### Objective   ##########################################################
    def objective_fun(mdict):
        if debug : log(mdict)#
        ddict       = run_train(config_name="", config_path="", n_sample= n_sample,
                                mode="run_preprocess", model_dict=mdict,
                                return_mode='dict')

        # print(ddict['stats']['metrics_test'].to_dict('records')[0])
        #res = ddict['stats']['metrics_test'].to_dict('records')[0]['metric_val']
        df  =  ddict['stats']['metrics_test']

        #### Beware of the sign
        res = -np.mean(df[ df['metric_name'] == metric_name ]['metric_val'].values)
        return res

    ##### Optuna Params   ####################################################
    engine_pars = {'metric_target' :'loss',
                   'study_name'    : config_name  ,
                   'storage'       : "sqlite:///:memory:" }
                    # f"sqlite:///" + os.path.abspath(path_optuna_storage).replace("\\", "/") }

    ##### Running the optim
    best_dict   = run_hyper_optuna(objective_fun, mdict, mdict_range, engine_pars, ntrials= ntrials)


    ##### Export
    os.makedirs(path_output, exist_ok=True)
    json.dump(best_dict, open(path_output + "/hyper_params_best.json", mode='a'))

    log(engine_pars['storage'])
    log(best_dict)
    log(path_output)
示例#23
0
def test_json(arg):
    log_info_repo(arg)
    log("os.getcwd", os.getcwd())

    path = dsa2.__path__[0]
    cfg = json.load(open(arg.config_file, mode='r'))

    mlist = cfg['model_list']
    log(mlist)
    test_list = [f"python {path}/{model}" for model in mlist]

    for cmd in test_list:
          log_separator()
          log( cmd)
          os.system(cmd)
示例#24
0
文件: run.py 项目: HammadTufail/dsa2
def check(config='titanic_classifier.py::titanic_lightgbm'):
    mdict = get_global_pars(config)
    m = mdict['global_pars']
    log(mdict)
    pass
示例#25
0
def test_functions(arg=None):
  from dsa2.util import load_function_uri

  path = path_norm("dataset/test_json/test_functions.json")
  dd   = json.load(open( path ))['test']

  for p in dd  :
     try :
         log("\n\n","#"*20, p)

         myfun = load_function_uri( p['uri'])
         log(myfun)

         w  = p.get('args', [])
         kw = p.get('kw_args', {} )

         if len(kw) == 0 and len(w) == 0   : log( myfun())

         elif  len(kw) > 0 and len(w) > 0  : log( myfun( *w,  ** kw ))

         elif  len(kw) > 0 and len(w) == 0 : log( myfun( ** kw ))

         elif  len(kw) == 0 and len(w) > 0 : log( myfun( *w ))


     except Exception as e:
        log(e, p )
示例#26
0
文件: core_run.py 项目: Ruhul964/dsa2
def check(config='outlier_predict.py::titanic_lightgbm'):
    mdict = get_global_pars(config)
    m     = mdict['global_pars']
    log(mdict)
    pass
示例#27
0
def test_pullrequest(arg=None):
    """
      Scan files in /pullrequest/ and run test on it.
    """
    log_info_repo(arg)

    from pathlib import Path
    # log("os.getcwd", os.getcwd())
    path = str( os.path.join(Path(dsa2.__path__[0] ).parent , "pullrequest/") )
    log(path)

    log("############Check model ################################")
    file_list = get_recursive_files(path , r"*.py" )
    log(file_list)

    ## Block list
    block_list = []
    test_list = [t for t in file_list if t not in block_list]
    log("Used", test_list)


    log("########### Run Check ##############################")
    test_import(arg=None)
    sleep(20)
    os.system("ml_optim")
    os.system("ml_dsa2")



    for file in test_list:
        file = file +  to_logfile(prefix="", dateformat='' )
        cmd = f"python {file}"
        log_separator()
        log( cmd)
        os.system(cmd)
        sleep(5)


    #### Check the logs   ###################################
    with open("log_.txt", mode="r")  as f :
       lines = f.readlines()

    for x in lines :
        if "Error" in x :
           raise Exception(f"Unknown dataset type", x)