def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['classification', 'regression']) parser.add_argument('--model-dir') parser.add_argument('--train-csv') parser.add_argument('--test-csv') parser.add_argument('--prediction-csv') args = parser.parse_args() automl = AutoML(args.model_dir) if args.train_csv is not None: automl.train(args.train_csv, args.mode) automl.save() elif args.test_csv is not None: automl.load() automl.predict(args.test_csv, args.prediction_csv) else: exit(1)
def validate_dataset(alias: str, mode: str, train_limit: int) -> np.float64: Log.print(alias) automl = AutoML("models/check_{}".format(alias)) automl.config["time_limit"] = train_limit automl.train("data/check_{}/train.csv".format(alias), mode) automl.config["time_limit"] = 300 automl.config["start_time"] = time.time() _, score = automl.predict("data/check_{}/test.csv".format(alias), "predictions/check_{}.csv".format(alias)) return score
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['classification', 'regression']) parser.add_argument('--model-dir') parser.add_argument('--train-csv') parser.add_argument('--test-csv') parser.add_argument('--prediction-csv') args = parser.parse_args() automl = AutoML(args.model_dir) if args.train_csv is not None: automl.train(args.train_csv, args.mode) automl.save() elif args.test_csv is not None: automl.load() automl.predict(args.test_csv, args.prediction_csv) if "TIME_LIMIT" in os.environ: u.log("Time limit is {}".format(os.environ["TIME_LIMIT"])) else: exit(1)
def validate_dataset(alias: str, mode: str, train_limit: int) -> np.float64: start_time = time.time() log(alias) automl = AutoML("models/check_{}".format(alias)) automl.config["time_limit"] = train_limit # automl.load() automl.train("data/check_{}/train.csv".format(alias), mode) score_train_val = None if 'leak' not in automl.config: config = automl.config if config['mode']=='regression': best_oof = np.min([i['score_oof'] for i in config["lgb_cv_models"]]) else: best_oof = np.max([i['score_oof'] for i in config["lgb_cv_models"]]) out_log = pd.DataFrame(automl.config['log']) out_log.to_csv(f'models/check_{alias}/log_train.csv') print(pd.DataFrame(automl.config['log'])) end_time = time.time() train_time = end_time - start_time start_time = end_time automl.config["time_limit"] = 300 _, score_test_val = automl.predict("data/check_{}/test.csv".format(alias), "predictions/check_{}.csv".format(alias)) out_log = pd.DataFrame(automl.config['log']) out_log.to_csv(f'models/check_{alias}/log_test.csv') print(pd.DataFrame(automl.config['log'])) end_time = time.time() test_time = end_time - start_time return best_oof, score_test_val, train_time, test_time
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['classification', 'regression']) parser.add_argument('--model-dir') parser.add_argument('--train-csv') parser.add_argument('--test-csv') parser.add_argument('--prediction-csv') args = parser.parse_args() automl = AutoML(args.model_dir) time_limit = int(os.environ['TIME_LIMIT']) automl.config['time_limit'] = time_limit log(f'{args.model_dir} - time_limit: {time_limit}') def automl_train(): automl.train(args.train_csv, args.mode) # automl.save() out_log = pd.DataFrame(automl.config['log']) out_log.to_csv(f'{args.model_dir}/log_train.csv') print(pd.DataFrame(automl.config['log'])) print('='*20) def automl_predict(): automl.load() automl.predict(args.test_csv, args.prediction_csv) time_left_for_task = time_limit - DELAY_TO_SIGKILL - BUFFER_BEFORE_SENDING_SIGTERM print('time_left_for_task :', time_left_for_task) if args.train_csv is not None: target_proc = automl_train elif args.test_csv is not None: target_proc = automl_predict else: exit(1) p = Process(target=target_proc, kwargs={ }) p.start() p.join(time_left_for_task) pid = p.pid if p.is_alive(): parent = psutil.Process(pid) for child in parent.children(recursive=True): child.kill() parent.kill() log("Starting Shutdown!") program_exp = re.compile(r"main\.py").search contacts = send_signal_to_our_processes(sig=SIGTERM, filter=program_exp) log("Sending SIG=%d to %s" % (SIGTERM, str(contacts))) time.sleep(DELAY_TO_SIGKILL) contacts = send_signal_to_our_processes(sig=SIGKILL, filter=program_exp) log("Sending SIG=%d to %s" % (SIGKILL, str(contacts)))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['classification', 'regression']) parser.add_argument('--model-dir') parser.add_argument('--train-csv') parser.add_argument('--test-csv') parser.add_argument('--prediction-csv') args = parser.parse_args() automl = AutoML(args.model_dir) log('####### cur time = ' + str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))) # path_pred automl.config['path_pred'] = args.model_dir if args.train_csv is not None: log('automl train...') automl.train(args.train_csv, args.mode) automl.save() elif args.test_csv is not None: log('automl predict...') automl.load() automl.predict(args.test_csv, args.prediction_csv) else: exit(1) # end log('####### cur time = ' + str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
def main(): # python main.py --mode regression --train-csv '../sdsj2018_automl_check_datasets/check_1_r/train.csv' --model-dir '../model/' # python main.py --test-csv '../sdsj2018_automl_check_datasets/check_1_r/test.csv' --prediction-csv '../sdsj2018_automl_check_datasets/check_1_r/prediction.csv' --model-dir '../model/' parser = argparse.ArgumentParser() # parser.add_argument('--train-csv' # , default="../../sdsj2018_automl_check_datasets/check_3_r/train.csv" # ) # parser.add_argument('--test-csv' # , default='../../sdsj2018_automl_check_datasets/check_3_r/test.csv' # ) # parser.add_argument('--prediction-csv' # , default='../../sdsj2018_automl_check_datasets/check_3_r/prediction.csv' # ) # parser.add_argument('--mode', choices=['classification', 'regression'] # , default="classification" # ) # parser.add_argument('--model-dir', default="../../model/") parser.add_argument('--mode', choices=['classification', 'regression']) parser.add_argument('--model-dir') parser.add_argument('--train-csv') parser.add_argument('--test-csv') parser.add_argument('--prediction-csv') parser.add_argument('--verbose', default=2) args = parser.parse_args() verbose = int(args.verbose) if not args.train_csv is None: params_autoML = \ { 'memory': {'max_size_mb': 2*1024, 'max_size_train_samples': 100000 }, 'field_target_name': 'target', 'pipeline' : { 'rename\nid_columns': {'node':'rename_id_columns', 'parents':None, #'args':{} }, 'check\ncolumns\nexists_start': {'node':'check_columns_exists', 'parents':'rename\nid_columns', 'args':{'key_stage':'preprocess_begin', 'drop_columns_test':True} }, 'drop_columns': {'node':'drop_columns', 'parents':'check\ncolumns\nexists_start', # 'args':{} }, 'fillna': {'node':'fillna', 'parents':'drop_columns', # 'args':{'args':{"number_": {'agg': 'median'}, "datetime_": {'agg': 'median'}, "string_": {'value': ''}, }}, }, 'to_int8': {'node':'to_int8', 'parents':'fillna', # 'args':{} }, 'non_negative\ntarget_detect': {'node':'non_negative_target_detect', 'parents':'to_int8', # 'args':{} }, 'subsample0': {'node':'subsample', 'parents':'non_negative\ntarget_detect', # 'args':{} }, 'transform\ndatetime': {'node':'transform_datetime', 'parents':'subsample0', # 'args':{} }, 'transform\ncategorical': {'node':'transform_categorical', 'parents':'transform\ndatetime', # 'args':{} }, # 'scale': {'node':'scale', # 'parents':'transform\ncategorical', # 'args':{} # }, # # # 'feature_generation': {'node':'feature_generation', # # # 'parents':'scale', # 'args':{} # # # }, 'subsample1': {'node':'subsample', 'parents':'transform\ncategorical', #'scale', # 'args':{} }, 'columns_float64\nto_float32': {'node':'columns_float64_to_32', 'parents':'subsample1', # 'args':{} }, 'check\ncolumns\nexists_end': {'node':'check_columns_exists', 'parents':'columns_float64\nto_float32', 'args':{'key_stage':'preprocess_end', 'drop_columns_test':True} }, # # 'split_X_y': {'node':'split_X_y', # # 'parents':'check\ncolumns\nexists_end', # 'args':{} # # }, # 'bayes': {'node':'model', # 'parents':'check\ncolumns\nexists_end', # 'args':{'models': ['bayes']} # }, 'lightgbm\nend': {'node':'model', 'parents':'columns_float64\nto_float32', #'bayes', 'args':{'models': ['lightgbm']} }, # '': {'node':'', # 'parents':'', # 'args':{} # }, # '': {'node':'', # 'parents':'', # 'args':{} # }, } } automl = AutoML(args.model_dir, params=params_autoML, verbose=verbose) # automl.pipeline_draw(view=True) automl.train(args.train_csv, args.mode) automl.save() elif args.test_csv is not None: automl = AutoML(args.model_dir, params={}, verbose=verbose) automl.load() _, score = automl.predict(args.test_csv, args.prediction_csv) if verbose: print('score', score) else: exit(1)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['classification', 'regression']) parser.add_argument('--model-dir') parser.add_argument('--train-csv') parser.add_argument('--test-csv') parser.add_argument('--prediction-csv') parser.add_argument('--test-target-csv') args = parser.parse_args() if args.model_dir is None: tests = { 1: 'regression', 2: 'regression', 3: 'regression', 4: 'classification', 5: 'classification', 6: 'classification', 7: 'classification', 8: 'classification', } for i in [3]: # tests.keys(): folder = r'..\check_' + str(i) + '_' + tests[i][0] + '\\' argv = [ '--train-csv', folder + 'train.csv', '--test-csv', folder + 'test.csv', '--prediction-csv', folder + 'prediction.csv', '--test-target-csv', folder + 'test-target.csv', '--model-dir', '.', # '--nrows', '5000' if i in [3, 4, 5, 6, 7] else '500' if i in [8] else '-1', '--mode', tests[i] ] args = parser.parse_args(argv) logf('processing', folder) automl = AutoML(args.model_dir) if args.train_csv is not None: automl.train(args.train_csv, args.mode) automl.save() log_trail('-', '\n') if args.test_csv is not None: automl.load() automl.predict(args.test_csv, args.prediction_csv) log_trail('-', '\n') else: automl = AutoML(args.model_dir) if args.train_csv is not None: automl.train(args.train_csv, args.mode) automl.save() log_trail('-', '\n') if args.test_csv is not None: automl.load() automl.predict(args.test_csv, args.prediction_csv) log_trail('-', '\n') log_trail('=', '\n\n')