Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices=['classification', 'regression'])
    parser.add_argument('--model-dir')
    parser.add_argument('--train-csv')
    parser.add_argument('--test-csv')
    parser.add_argument('--prediction-csv')
    args = parser.parse_args()

    automl = AutoML(args.model_dir)

    if args.train_csv is not None:
        automl.train(args.train_csv, args.mode)
        automl.save()
    elif args.test_csv is not None:
        automl.load()
        automl.predict(args.test_csv, args.prediction_csv)
    else:
        exit(1)
Exemplo n.º 2
0
def validate_dataset(alias: str, mode: str, train_limit: int) -> np.float64:
    Log.print(alias)

    automl = AutoML("models/check_{}".format(alias))

    automl.config["time_limit"] = train_limit
    automl.train("data/check_{}/train.csv".format(alias), mode)

    automl.config["time_limit"] = 300
    automl.config["start_time"] = time.time()
    _, score = automl.predict("data/check_{}/test.csv".format(alias), "predictions/check_{}.csv".format(alias))

    return score
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices=['classification', 'regression'])
    parser.add_argument('--model-dir')
    parser.add_argument('--train-csv')
    parser.add_argument('--test-csv')
    parser.add_argument('--prediction-csv')
    args = parser.parse_args()

    automl = AutoML(args.model_dir)

    if args.train_csv is not None:
        automl.train(args.train_csv, args.mode)
        automl.save()
    elif args.test_csv is not None:
        automl.load()
        automl.predict(args.test_csv, args.prediction_csv)

    if "TIME_LIMIT" in os.environ:
        u.log("Time limit is {}".format(os.environ["TIME_LIMIT"]))

    else:
        exit(1)
Exemplo n.º 4
0
def validate_dataset(alias: str, mode: str, train_limit: int) -> np.float64:

    start_time = time.time()

    log(alias)

    automl = AutoML("models/check_{}".format(alias))

    automl.config["time_limit"] = train_limit
    # automl.load()

    automl.train("data/check_{}/train.csv".format(alias), mode)

    score_train_val = None
    if 'leak' not in automl.config:

        config = automl.config
        if config['mode']=='regression':
            best_oof = np.min([i['score_oof'] for i in config["lgb_cv_models"]])
        else:
            best_oof = np.max([i['score_oof'] for i in config["lgb_cv_models"]])

    out_log = pd.DataFrame(automl.config['log'])
    out_log.to_csv(f'models/check_{alias}/log_train.csv')
    print(pd.DataFrame(automl.config['log']))    

    end_time = time.time()
    train_time = end_time - start_time
    start_time = end_time


    automl.config["time_limit"] = 300
    _, score_test_val = automl.predict("data/check_{}/test.csv".format(alias), "predictions/check_{}.csv".format(alias))

    out_log = pd.DataFrame(automl.config['log'])
    out_log.to_csv(f'models/check_{alias}/log_test.csv')
    print(pd.DataFrame(automl.config['log']))

    end_time = time.time()
    test_time = end_time - start_time

    return best_oof, score_test_val, train_time, test_time
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices=['classification', 'regression'])
    parser.add_argument('--model-dir')
    parser.add_argument('--train-csv')
    parser.add_argument('--test-csv')
    parser.add_argument('--prediction-csv')
    args = parser.parse_args()

    automl = AutoML(args.model_dir)

    time_limit = int(os.environ['TIME_LIMIT'])
    automl.config['time_limit'] = time_limit
    log(f'{args.model_dir} - time_limit: {time_limit}')

    def automl_train():
        automl.train(args.train_csv, args.mode)
        # automl.save()

        out_log = pd.DataFrame(automl.config['log'])
        out_log.to_csv(f'{args.model_dir}/log_train.csv')
        print(pd.DataFrame(automl.config['log']))
        print('='*20)        

    def automl_predict():
        automl.load()
        automl.predict(args.test_csv, args.prediction_csv)


    time_left_for_task = time_limit - DELAY_TO_SIGKILL - BUFFER_BEFORE_SENDING_SIGTERM
    print('time_left_for_task :', time_left_for_task)

    if args.train_csv is not None:
    
        target_proc = automl_train

    elif args.test_csv is not None:

        target_proc = automl_predict

    else:
        exit(1)

    p = Process(target=target_proc,
                kwargs={
                        })
    p.start()
    p.join(time_left_for_task)
    pid = p.pid
    if p.is_alive():
        parent = psutil.Process(pid)
        for child in parent.children(recursive=True):
            child.kill()
        parent.kill()

    log("Starting Shutdown!")

    program_exp = re.compile(r"main\.py").search
    contacts = send_signal_to_our_processes(sig=SIGTERM, filter=program_exp)
    log("Sending SIG=%d to %s" % (SIGTERM, str(contacts)))

    time.sleep(DELAY_TO_SIGKILL)

    contacts = send_signal_to_our_processes(sig=SIGKILL, filter=program_exp)
    log("Sending SIG=%d to %s" % (SIGKILL, str(contacts)))                
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices=['classification', 'regression'])
    parser.add_argument('--model-dir')
    parser.add_argument('--train-csv')
    parser.add_argument('--test-csv')
    parser.add_argument('--prediction-csv')
    args = parser.parse_args()

    automl = AutoML(args.model_dir)

    log('####### cur time = ' +
        str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
    # path_pred
    automl.config['path_pred'] = args.model_dir

    if args.train_csv is not None:
        log('automl train...')
        automl.train(args.train_csv, args.mode)
        automl.save()
    elif args.test_csv is not None:
        log('automl predict...')
        automl.load()
        automl.predict(args.test_csv, args.prediction_csv)
    else:
        exit(1)
    # end
    log('####### cur time = ' +
        str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
Exemplo n.º 7
0
def main():

    # python main.py --mode regression --train-csv '../sdsj2018_automl_check_datasets/check_1_r/train.csv' --model-dir '../model/'
    # python main.py --test-csv '../sdsj2018_automl_check_datasets/check_1_r/test.csv' --prediction-csv '../sdsj2018_automl_check_datasets/check_1_r/prediction.csv' --model-dir '../model/'

    parser = argparse.ArgumentParser()

    # parser.add_argument('--train-csv'
    #     , default="../../sdsj2018_automl_check_datasets/check_3_r/train.csv"
    #     )
    # parser.add_argument('--test-csv'
    #     , default='../../sdsj2018_automl_check_datasets/check_3_r/test.csv'
    #                     )
    # parser.add_argument('--prediction-csv'
    #     , default='../../sdsj2018_automl_check_datasets/check_3_r/prediction.csv'
    #     )
    # parser.add_argument('--mode', choices=['classification', 'regression']
    #     , default="classification"
    #     )
    # parser.add_argument('--model-dir', default="../../model/")
        
    parser.add_argument('--mode', choices=['classification', 'regression'])
    parser.add_argument('--model-dir')
    parser.add_argument('--train-csv')
    parser.add_argument('--test-csv')
    parser.add_argument('--prediction-csv')

    parser.add_argument('--verbose', default=2)
    args = parser.parse_args()
    verbose = int(args.verbose)

    if not args.train_csv is None:
        params_autoML = \
                { 'memory': {'max_size_mb': 2*1024,
                             'max_size_train_samples': 100000
                            },
                  'field_target_name': 'target',
        
                  'pipeline' : {
                        'rename\nid_columns': {'node':'rename_id_columns',
                              'parents':None, #'args':{}
                              },
                        'check\ncolumns\nexists_start': {'node':'check_columns_exists',
                              'parents':'rename\nid_columns',
                              'args':{'key_stage':'preprocess_begin', 'drop_columns_test':True}
                              },
                        'drop_columns': {'node':'drop_columns',
                             'parents':'check\ncolumns\nexists_start', # 'args':{}
                             },
                        'fillna': {'node':'fillna',
                             'parents':'drop_columns', # 
                             'args':{'args':{"number_": {'agg': 'median'},
                                     "datetime_": {'agg': 'median'},
                                     "string_": {'value': ''},
                                    }},
                             },
                        'to_int8': {'node':'to_int8',
                             'parents':'fillna', # 'args':{}
                             },
                        'non_negative\ntarget_detect': {'node':'non_negative_target_detect',
                             'parents':'to_int8', # 'args':{}
                             },
                        'subsample0': {'node':'subsample',
                             'parents':'non_negative\ntarget_detect', # 'args':{}
                             },
                        'transform\ndatetime': {'node':'transform_datetime',
                             'parents':'subsample0', # 'args':{}
                             },
                        'transform\ncategorical': {'node':'transform_categorical',
                             'parents':'transform\ndatetime', # 'args':{}
                             },
                        # 'scale': {'node':'scale',
                        #       'parents':'transform\ncategorical', # 'args':{}
                        #       },
                        # # # 'feature_generation': {'node':'feature_generation',
                        # # #      'parents':'scale', # 'args':{}
                        # # #      },
                        'subsample1': {'node':'subsample',
                             'parents':'transform\ncategorical', #'scale', # 'args':{}
                             },
                        'columns_float64\nto_float32': {'node':'columns_float64_to_32',
                             'parents':'subsample1', # 'args':{}
                             },
                        'check\ncolumns\nexists_end': {'node':'check_columns_exists',
                             'parents':'columns_float64\nto_float32',
                             'args':{'key_stage':'preprocess_end', 'drop_columns_test':True}
                             },
                        # # 'split_X_y': {'node':'split_X_y',
                        # #       'parents':'check\ncolumns\nexists_end', # 'args':{}
                        # #       },
                        # 'bayes': {'node':'model',
                        #       'parents':'check\ncolumns\nexists_end',
                        #       'args':{'models': ['bayes']}
                        #       },
                        'lightgbm\nend': {'node':'model',
                              'parents':'columns_float64\nto_float32', #'bayes',
                              'args':{'models': ['lightgbm']}
                              },
                        # '': {'node':'',
                        #      'parents':'', # 'args':{}
                        #      },
                        # '': {'node':'',
                        #      'parents':'', # 'args':{}
                        #      },
                        }
                }

        automl = AutoML(args.model_dir, params=params_autoML, verbose=verbose)
        # automl.pipeline_draw(view=True)
        automl.train(args.train_csv, args.mode)
        automl.save()
    elif args.test_csv is not None:
        
        automl = AutoML(args.model_dir, params={}, verbose=verbose)
        automl.load()
        _, score = automl.predict(args.test_csv, args.prediction_csv)
        if verbose: print('score', score)
    else:
        exit(1)
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', choices=['classification', 'regression'])
    parser.add_argument('--model-dir')
    parser.add_argument('--train-csv')
    parser.add_argument('--test-csv')
    parser.add_argument('--prediction-csv')
    parser.add_argument('--test-target-csv')

    args = parser.parse_args()
    if args.model_dir is None:
        tests = {
            1: 'regression',
            2: 'regression',
            3: 'regression',
            4: 'classification',
            5: 'classification',
            6: 'classification',
            7: 'classification',
            8: 'classification',
        }

        for i in [3]:  # tests.keys():

            folder = r'..\check_' + str(i) + '_' + tests[i][0] + '\\'
            argv = [
                '--train-csv',
                folder + 'train.csv',
                '--test-csv',
                folder + 'test.csv',
                '--prediction-csv',
                folder + 'prediction.csv',
                '--test-target-csv',
                folder + 'test-target.csv',
                '--model-dir',
                '.',
                # '--nrows', '5000' if i in [3, 4, 5, 6, 7] else '500' if i in [8] else '-1',
                '--mode',
                tests[i]
            ]
            args = parser.parse_args(argv)

            logf('processing', folder)

            automl = AutoML(args.model_dir)

            if args.train_csv is not None:
                automl.train(args.train_csv, args.mode)
                automl.save()
                log_trail('-', '\n')

            if args.test_csv is not None:
                automl.load()
                automl.predict(args.test_csv, args.prediction_csv)
                log_trail('-', '\n')

    else:
        automl = AutoML(args.model_dir)

        if args.train_csv is not None:
            automl.train(args.train_csv, args.mode)
            automl.save()
            log_trail('-', '\n')

        if args.test_csv is not None:
            automl.load()
            automl.predict(args.test_csv, args.prediction_csv)
            log_trail('-', '\n')

    log_trail('=', '\n\n')