while time_spent <= time_budget - ( 2 * time_one_loop) - TIME_RESIDUAL and is_bo and True: vprint( verbose, "=========== " + basename.capitalize() + " Training cycle " + str(cycle) + " ================") M = MyAutoML(D.info, verbose=False, debug_mode=debug_mode) M.fit(X_train_train, Y_train_train) vprint( verbose, "[+] Fitting success, time spent so far %5.2f sec" % (time.time() - start)) # Make predictions # ----------------- pred_train_valid = M.predict(X_train_valid) if 'classification' in D.info['task']: performance = sklearn.metrics.roc_auc_score( Y_train_valid, pred_train_valid) performance = 2 * performance - 1 vprint(verbose, "[+] AUC for X_train_valid, %5.4f" % (performance)) else: preformance = 0.0 vprint(verbose, "[-] Performance cannot be measured") list_hyps_all.append(cur_hyps) list_measures_all.append([(1.0 - performance) * 10.0]) cur_hyps, _, _, _ = model_bo.optimize(np.array(list_hyps_all), np.array(list_measures_all), is_grid_optimized=False,
else: # Make a learning curve by exponentially increasing the number of estimators M.model.n_estimators = int(np.exp2(cycle)) M.model.n_estimators = min(max_estimators, M.model.n_estimators) vprint( verbose, "[+] Number of estimators: %d" % (M.model.n_estimators)) last_n_estimators = M.model.n_estimators # Fit base estimators # ------------------- M.fit(D.data['X_train'], D.data['Y_train']) vprint( verbose, "[+] Fitting success, time spent so far %5.2f sec" % (time.time() - start)) vprint( verbose, "[+] Size of trained model %5.2f bytes" % data_io.total_size(M)) # Make predictions # ----------------- Y_valid = M.predict(D.data['X_valid']) Y_test = M.predict(D.data['X_test']) vprint( verbose, "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start)) # Write results # ------------- if overwrite_output: filename_valid = basename + '_valid.predict' filename_test = basename + '_test.predict' else: filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict' filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict' data_io.write(os.path.join(output_dir,filename_valid), Y_valid) data_io.write(os.path.join(output_dir,filename_test), Y_test) vprint( verbose, "[+] Results saved, time spent so far %5.2f sec" % (time.time() - start))
# The model can also select its hyper-parameters based on other elements of info. # Only instantiates class object vprint( verbose, "======== Creating model ==========") M = MyAutoML(D.info, verbose=False, debug_mode=debug_mode) # I turned off verbose to avoid tons of junk... print M # Does cross validation for all models and picks the best classifier # Probably need to pass in the time remaining since timekeeping needs to be done in here # ------------------- M.run_cycles(D.data['X_train'], D.data['Y_train']) vprint( verbose, "[+] Fitting success, time spent so far %5.2f sec" % (time.time() - start)) vprint( verbose, "[+] Size of trained model %5.2f bytes" % data_io.total_size(M)) # Make predictions # ----------------- Y_test = M.predict(D.data['X_test']) vprint( verbose, "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start)) # Write results # ------------- filename_test = basename + '_test.predict' data_io.write(os.path.join(output_dir,filename_test), Y_test)