Exemplo n.º 1
0
        while time_spent <= time_budget - (
                2 * time_one_loop) - TIME_RESIDUAL and is_bo and True:
            vprint(
                verbose, "=========== " + basename.capitalize() +
                " Training cycle " + str(cycle) + " ================")
            M = MyAutoML(D.info, verbose=False, debug_mode=debug_mode)

            M.fit(X_train_train, Y_train_train)

            vprint(
                verbose, "[+] Fitting success, time spent so far %5.2f sec" %
                (time.time() - start))
            # Make predictions
            # -----------------
            pred_train_valid = M.predict(X_train_valid)
            if 'classification' in D.info['task']:
                performance = sklearn.metrics.roc_auc_score(
                    Y_train_valid, pred_train_valid)
                performance = 2 * performance - 1
                vprint(verbose,
                       "[+] AUC for X_train_valid, %5.4f" % (performance))
            else:
                preformance = 0.0
                vprint(verbose, "[-] Performance cannot be measured")

            list_hyps_all.append(cur_hyps)
            list_measures_all.append([(1.0 - performance) * 10.0])
            cur_hyps, _, _, _ = model_bo.optimize(np.array(list_hyps_all),
                                                  np.array(list_measures_all),
                                                  is_grid_optimized=False,
Exemplo n.º 2
0
            else:
                # Make a learning curve by exponentially increasing the number of estimators
                M.model.n_estimators = int(np.exp2(cycle))
                
            M.model.n_estimators = min(max_estimators, M.model.n_estimators)
            vprint( verbose,  "[+] Number of estimators: %d" % (M.model.n_estimators))  
            last_n_estimators =  M.model.n_estimators 
            # Fit base estimators
            # -------------------
            M.fit(D.data['X_train'], D.data['Y_train']) 

            vprint( verbose,  "[+] Fitting success, time spent so far %5.2f sec" % (time.time() - start))
            vprint( verbose,  "[+] Size of trained model  %5.2f bytes" % data_io.total_size(M))
            # Make predictions
            # -----------------
            Y_valid = M.predict(D.data['X_valid'])
            Y_test = M.predict(D.data['X_test'])                         
            vprint( verbose,  "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start))
            # Write results
            # -------------
            if overwrite_output:
                filename_valid = basename + '_valid.predict'                
                filename_test = basename + '_test.predict'
            else:
                filename_valid = basename + '_valid_' + str(cycle).zfill(3) + '.predict'                
                filename_test = basename + '_test_' + str(cycle).zfill(3) + '.predict'                
            data_io.write(os.path.join(output_dir,filename_valid), Y_valid)
            data_io.write(os.path.join(output_dir,filename_test), Y_test)


            vprint( verbose,  "[+] Results saved, time spent so far %5.2f sec" % (time.time() - start))
Exemplo n.º 3
0
        # The model can also select its hyper-parameters based on other elements of info. 
        # Only instantiates class object 
        vprint( verbose,  "======== Creating model ==========")
        M = MyAutoML(D.info, verbose=False, debug_mode=debug_mode) # I turned off verbose to avoid tons of junk...
        print M

        # Does cross validation for all models and picks the best classifier
        # Probably need to pass in the time remaining since timekeeping needs to be done in here
        # -------------------
        M.run_cycles(D.data['X_train'], D.data['Y_train']) 
        vprint( verbose,  "[+] Fitting success, time spent so far %5.2f sec" % (time.time() - start))
        vprint( verbose,  "[+] Size of trained model  %5.2f bytes" % data_io.total_size(M))

        # Make predictions
        # -----------------
        Y_test = M.predict(D.data['X_test'])                         
        vprint( verbose,  "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start))

        # Write results
        # -------------
        filename_test = basename + '_test.predict'

        data_io.write(os.path.join(output_dir,filename_test), Y_test)