Exemplo n.º 1
0
    def testFetchPinned(self):
        X = pin_in_object_store("hello")

        def train(config, reporter):
            get_pinned_object(X)
            reporter(timesteps_total=100, done=True)

        register_trainable("f1", train)
        [trial] = run_experiments({"foo": {
            "run": "f1",
        }})
        self.assertEqual(trial.status, Trial.TERMINATED)
        self.assertEqual(trial.last_result[TIMESTEPS_TOTAL], 100)
    "max_depth": max_depth_range,
    "max_features": max_features_range,
}

ray.init(memory=11000 * 1024 * 1024,
         object_store_memory=500 * 1024 * 1024,
         driver_object_store_memory=100 * 1024 * 1024,
         local_mode=False,
         num_gpus=1)

max_concurrent = cupy.cuda.runtime.getDeviceCount()

cdf = prepare_dataset()  #prepare dataset

# for shared access across processes
data_id = pin_in_object_store(cdf)

search = build_search_alg(search_alg, hpo_ranges)

sched = select_sched_alg(sched_alg)

exp_name = None

if exp_name is not None:
    exp_name += exp_name
else:
    exp_name = ""
    exp_name += "{}_{}_CV-{}_{}M_SAMP-{}".format("RF", compute, CV_folds,
                                                 int(len(cdf) / 1000000),
                                                 num_samples)
Exemplo n.º 3
0
    def optimize(self, trial_amount, search_algo="bayesopt", max_workers=8, verbose=1):
        """
        Description:
            This method goes through all labeling methods in self.automatic_labeling_methods and optimizes their parameters for maximal specific profit. It is built with ray tune therefore it is highly scalable.
        Arguments:
            - trial_amount (int): the amount of parameter-combinations that are going to be tested
            - search_algo (string): the search algorithm with which it's going to get optimized [hyperopt, bayesopt]
            - max_workers (int): how many parallel tasks it should run
            - verbose (int): The Level of logging in the console (0, 1, 2)
        Return:
            - nothing
            - updates the alm_optimal dictionary if the new best config is better than the old one
            - for new optimizations to be saved, you need to save the object with the .save() method
        """

        for index, labeling_method in enumerate(self.auto_labeling_methods):
            
            #get the labeling method
            labeler = getattr(self, f"_{labeling_method}_labeling")
            
            #get he price data
            price_array = pd.DataFrame()
            for data in self.raw_data:
                df = pd.DataFrame()
                df["close"] = data["close"].copy()
                price_array = pd.concat([price_array, df], axis=0)
            price_array = price_array.to_numpy()

            #store the needed objects in raytune mainprocess
            ray.init()
            labeler_id = pin_in_object_store(labeler)
            price_array_id = pin_in_object_store(price_array)
            calculate_profit_id = pin_in_object_store(calculate_profit)

            #function that gets passed to the bayesian optimizer
            def objective(config):
                
                #get objects from central tune storage
                labeler = get_pinned_object(labeler_id)
                price_array = get_pinned_object(price_array_id)
                calculate_profit = get_pinned_object(calculate_profit_id)

                #get the labels from the specific method
                label_list = labeler(config)

                #create the array tha gets passed to the profit calculator
                label_array = pd.concat(label_list, axis=0).to_numpy()
                label_array = np.expand_dims(label_array, axis=1)
                array = np.concatenate([price_array, label_array], axis=1)

                specific_profit, _ = calculate_profit(array, self.trading_fee)

                tune.report(specific_profit=specific_profit)
                time.sleep(0.1)

            #setup the searchalgo
            if search_algo == "bayesopt":
                search_alg = BayesOptSearch(random_search_steps=trial_amount/10)
                search_alg = ConcurrencyLimiter(search_alg, max_concurrent=max_workers)
            elif search_algo == "hyperopt":
                #get the current best params
                best_config = None
                if labeling_method in self.alm_optimal.keys():
                    best_config = [self.alm_optimal[labeling_method]["parameters"]]
                
                search_alg = HyperOptSearch(n_initial_points=trial_amount/10, points_to_evaluate=best_config)
                search_alg = ConcurrencyLimiter(search_alg, max_concurrent=max_workers)
            else:
                raise Exception("You chose a Search Algorithm that is not available, please choose from this list: bayesopt, hyperopt")

            #run the optimization
            result = tune.run(objective, config=self.alm_range[labeling_method], metric="specific_profit", search_alg=search_alg, mode="max", num_samples=trial_amount, verbose=verbose)
            
            #save the best config in self.alm_optimal
            if labeling_method not in self.alm_optimal.keys() or self.alm_optimal[labeling_method]["specific_profit"] < result.best_result["specific_profit"]:
                self.alm_optimal[labeling_method] = {"parameters": result.get_best_config(),
                                                    "specific_profit": result.best_result["specific_profit"]}

            print(result.get_best_config())
            print(result.best_result["specific_profit"])
    print("Preparing and caching data (this may take a while)…")
    ds = SparseEventDataset()
    ds.value_columns = ["wf_AE_loss", "wf_integral"]
    ds.prune(dtype)
    ds.limit_event_t_size(limit)
    ds.save(cache_dir, dtype)


# cache_data()
ds = SparseEventDataset.load(cache_dir)

data_train, data_val, data_test = ds.get_train_val_test(CPU_WORKERS_PER_TRAIL)
ray.init(num_cpus=12, num_gpus=1, memory=29000000000)
#ray.init(local_mode=True)
data_train = pin_in_object_store(data_train)
data_val = pin_in_object_store(data_val)

config_space = CS.ConfigurationSpace()


class _:
    add = config_space.add_hyperparameter
    extend = config_space.add_hyperparameters
    int = CS.UniformIntegerHyperparameter
    float = CS.UniformFloatHyperparameter
    cat = CS.CategoricalHyperparameter

    cond = config_space.add_condition
    eq = CS.EqualsCondition
Exemplo n.º 5
0
                           n_classes=2)

X = pd.DataFrame(X.astype(data_type))

# cuML Random Forest Classifier requires the labels to be integers
y = pd.Series(y.astype(np.int32))

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

from ray import tune
from ray.tune.utils import pin_in_object_store, get_pinned_object

data_id = pin_in_object_store([X_train, X_test, y_train, y_test])


class CUMLTrainable(tune.Trainable):
    def _setup(self, config):
        [X_train, X_test, y_train, y_test] = get_pinned_object(data_id)

        self.cuml_model = curfc(n_estimators=config.get("estimators", 40),
                                max_depth=config.get("depth", 16),
                                max_features=1.0)
        self.X_cudf_train = cudf.DataFrame.from_pandas(X_train)
        self.X_cudf_test = cudf.DataFrame.from_pandas(X_test)
        self.y_cudf_train = cudf.Series(y_train.values)
        self.y_test = y_test

    def _train(self):