Python bootstrap示例，utils.bootstrap Python示例

示例#1

0

显示文件

文件： A2C.py 项目： elumixor/DRL

def train_epoch(rollouts):
    actor.train()
    critic.train()

    loss = 0
    total_len = 0

    for states, actions, rewards, next_states in rollouts:
        # 2nd step: use advantage function, estimated by critic
        # bootstrap estimated next state values with rewards TD-1
        values = critic(states)

        last_state = next_states[-1].unsqueeze(0)
        last_value = critic(last_state).item()
        next_values = bootstrap(rewards, last_value, discounting)

        advantage = normalize(next_values - values).flatten()

        loss_critic = .5 * (advantage**2).sum()

        # Get probabilities, shape (episode_length * num_actions)
        # Then select only the probabilities corresponding to sampled actions
        probabilities = actor(states)
        probabilities = probabilities[range(states.shape[0]),
                                      actions.flatten()]
        loss_actor = (-torch.log(probabilities) * advantage).sum()

        # Take the weighted average (helps convergence)
        loss += loss_critic + loss_actor
        total_len += states.shape[0]

    loss = loss / total_len

    optim_actor.zero_grad()
    optim_critic.zero_grad()
    loss.backward()
    optim_actor.step()
    optim_critic.step()

    actor.eval()
    critic.eval()

示例#2

0

显示文件

def random_forest_classifier(dataset, random_number=None, tree_number=10):

    rf = split_tree.RandomForest()
    end, feature = dataset.shape

    feature = range(feature - 1)

    if random_number == None:
        dataset_number = end / 2

    for i in range(tree_number):

        sample_set = bootstrap(dataset, dataset_number)

        sample_feature = random.sample(feature, len(feature) / 2)

        tree = choose_feature(sample_set, 0, end / 2, sample_feature)

        rf.Add_Tree(tree)

    return rf

示例#3

0

显示文件

文件： bootstrap_helper.py 项目： rawfalafel/docs-tools

def main():
    bootstrap()

示例#4

0

显示文件

    ('/skia-telemetry/skia_telemetry_info_page?',
     skia_telemetry.TelemetryInfoPage),
    ('/skia-telemetry/skia_try', skia_telemetry.SkiaTryPage),
    ('/skia-telemetry/update_admin_tasks?',
     skia_telemetry.UpdateAdminTasksPage),
    ('/skia-telemetry/update_chromium_build_tasks?',
     skia_telemetry.UpdateChromiumBuildTasksPage),
    ('/skia-telemetry/update_chromium_try_tasks?',
     skia_telemetry.UpdateChromiumTryTasksPage),
    ('/skia-telemetry/update_skia_try_tasks?',
     skia_telemetry.UpdateSkiaTryTasksPage),
    ('/skia-telemetry/update_telemetry_tasks?',
     skia_telemetry.UpdateTelemetryTasksPage),
    ('/skia-telemetry/update_lua_tasks?', skia_telemetry.UpdateLuaTasksPage),
    ('/skia-telemetry/update_telemetry_info?', skia_telemetry.UpdateInfoPage),
    ('/update_gpu_sheriffs_schedule', sheriff.UpdateGpuSheriffsSchedule),
    ('/update_robocops_schedule', sheriff.UpdateRobocopsSchedule),
    ('/update_sheriffs_schedule', sheriff.UpdateSheriffsSchedule),
    ('/update_troopers_schedule', sheriff.UpdateTroopersSchedule),
]
APPLICATION = webapp.WSGIApplication(URLS, debug=True)

# Do some one-time initializations.
base_page.bootstrap()
builder_status.bootstrap()
commit_queue.bootstrap()
status.bootstrap()
sheriff.bootstrap()
skia_telemetry.bootstrap()
utils.bootstrap()

示例#5

0

显示文件

文件： main.py 项目： google/skia-buildbot

  ('/skia-telemetry/skia_telemetry_info_page?',
   skia_telemetry.TelemetryInfoPage),
  ('/skia-telemetry/skia_try', skia_telemetry.SkiaTryPage),
  ('/skia-telemetry/update_admin_tasks?', skia_telemetry.UpdateAdminTasksPage),
  ('/skia-telemetry/update_chromium_build_tasks?',
   skia_telemetry.UpdateChromiumBuildTasksPage),
  ('/skia-telemetry/update_chromium_try_tasks?',
   skia_telemetry.UpdateChromiumTryTasksPage),
  ('/skia-telemetry/update_skia_try_tasks?',
   skia_telemetry.UpdateSkiaTryTasksPage),
  ('/skia-telemetry/update_telemetry_tasks?',
   skia_telemetry.UpdateTelemetryTasksPage),
  ('/skia-telemetry/update_lua_tasks?', skia_telemetry.UpdateLuaTasksPage),
  ('/skia-telemetry/update_telemetry_info?', skia_telemetry.UpdateInfoPage),
  ('/update_gpu_sheriffs_schedule', sheriff.UpdateGpuSheriffsSchedule),
  ('/update_robocops_schedule', sheriff.UpdateRobocopsSchedule),
  ('/update_sheriffs_schedule', sheriff.UpdateSheriffsSchedule),
  ('/update_troopers_schedule', sheriff.UpdateTroopersSchedule),
]
APPLICATION = webapp.WSGIApplication(URLS, debug=True)


# Do some one-time initializations.
base_page.bootstrap()
builder_status.bootstrap()
commit_queue.bootstrap()
status.bootstrap()
sheriff.bootstrap()
skia_telemetry.bootstrap()
utils.bootstrap()

示例#6

0

显示文件

文件： bootstrap_helper.py 项目： QiangTimer/docs-tools

def main():
    bootstrap()

示例#7

0

显示文件

文件： cli.py 项目： ape-box/discord-schedule-message-bot

def cli():
    bootstrap()

示例#8

0

显示文件

def test_bootstrap():
    with mock.patch("numpy.random.randint", return_value=np.arange(len(x))):
        X_subset, z_subset = bootstrap(X, z, 1)
        assert (np.allclose(X_subset, X) and np.allclose(z, z_subset))

示例#9

0

显示文件

def singleDriverTrainer2(file_to_classify,
                         training_files,
                         threshold=0.2,
                         in_model=RandomForestClassifier()):
    """
    Takes in the file path of the driver file we want to classify (the target),
    the paths of the files we will use as our 'noise' files,
    and the input model

    First, trains the input model on all of the files, with file_to_classify
    as class 1 and training_files as class 0

    Then, uses the model to make probabilistic predictions on file_to_classify

    Changes:
    1. Upsamples target data to balance classes for model training
    2. Uses probabilistic predictions relabels 1s to 0s based on threshhold
    """
    # first, grab the target data
    x_target, y_target, id_target = extractCSV(file_to_classify, target=1)

    # remove na's
    x_target = np.nan_to_num(x_target)
    y_target = np.nan_to_num(y_target)

    # copy target data
    x_target_upsampled = copy.copy(x_target)
    y_target_upsampled = copy.copy(y_target)

    #upsample target to balance classes
    if len(training_files) > 1:
        num_samples = len(x_target_upsampled) * len(training_files)
        x_target_upsampled, y_target_upsampled = bootstrap(
            x_target, y_target, num_samples)

    x_trains = None
    y_trains = None

    # loop through all of our training/noise files, keep separate from target
    for filepath in training_files:
        # open the file
        x_current, y_current, ids = extractCSV(filepath, target=0)
        # and add the contents to our training data
        if x_trains is None or y_trains is None:
            x_trains = x_current
            y_trains = y_current
        else:
            x_trains = np.concatenate((x_trains, x_current))
            y_trains = np.concatenate((y_trains, y_current))
    # repeat for every filepath in our training files list

    # remove NAs from train data
    x_trains = np.nan_to_num(x_trains)
    y_trains = np.nan_to_num(y_trains)

    # now combine with target data
    x_all = np.concatenate((x_target_upsampled, x_trains))
    y_all = np.concatenate((y_target_upsampled, y_trains))

    # with all of our data, now we can train our model
    in_model.fit(x_all, y_all)

    # now we are ready to provide class probabilities for our predictions
    predictions = in_model.predict_proba(x_target)

    # note that we must extract the index of the class 1 probability
    prob_idx = np.where(in_model.classes_ == 1)[0][0]
    class_probs = [pred[prob_idx] for pred in predictions]

    #get new data labels by comparing threshold to class probs
    new_labels = np.array([1 if p > threshold else 0 for p in class_probs])

    #redo upsampling
    if len(training_files) > 1:
        num_samples = len(x_target) * len(training_files)
        x_target_relabeled, y_target_relabeled = bootstrap(
            x_target, new_labels, num_samples)
    else:
        x_target_relabeled = copy.copy(x_target)
        y_target_relabeled = copy.copy(new_labels)

    #combine with non-target data from before
    x_all_new = np.concatenate((x_target_relabeled, x_trains))
    y_all_new = np.concatenate((y_target_relabeled, y_trains))

    # refit model
    in_model.fit(x_all_new, y_all_new)
    # provide class probabilities for our predictions
    predictions = in_model.predict_proba(x_target)
    # extract the index of the class 1 probability
    prob_idx = np.where(in_model.classes_ == 1)[0][0]
    class_probs = [pred[prob_idx] for pred in predictions]

    # and return a matrix of the id's and the corresponding probabilities
    return_mat = [[id_target[idx], class_probs[idx]] \
                    for idx in xrange(len(class_probs))]
    # report
    print 'completed driver %s' % file_to_classify

    return np.asarray(return_mat)

示例#10

0

显示文件

def random_forest(N, M, F, table, attr_indexes, attr_domains, class_index,
                  strat_index):
    random.shuffle(table)
    test, remainder = test_remainder_stratified(table, strat_index)
    boot_samples = []
    attr_subsets = []
    trees = []
    accuracies = []
    trees = []
    #setup boot straps
    for _ in range(N):
        attr_subsets.append(utils.rand_attributes(attr_indexes, F))
        boot = utils.bootstrap(remainder)
        valid = []
        #build validator set
        for item in remainder:
            if item not in boot:
                valid.append(item)
        boot_samples.append([boot, valid])

    #build trees
    for i in range(N):
        #returns predictions, tree
        pred, tree = train_test_tree(boot_samples[i][0], boot_samples[i][1],
                                     attr_subsets[i], attr_domains,
                                     class_index)
        correct = 0
        for j in range(len(boot_samples[i][1])):
            if boot_samples[i][1][j][class_index] == pred[j]:
                correct += 1
        trees.append([tree, utils.div(correct, len(boot_samples[i][1]))])

    trees.sort(key=lambda x: x[1])
    mtrees = trees[len(trees) - M:]

    #predict and determine accuracy
    print("     grouping test set")
    minutes, groups = utils.groupBy(test, 1)
    print("     running classifier")
    accuracies = []
    overall_correct = 0
    total_instance = len(test)

    for count in range(len(minutes)):
        correct = 0
        for item in groups[count]:
            votes = []
            for tree in mtrees:
                votes.append(classify_tdidt(tree[0], item))
            vote = utils.majority_vote(votes)
            if item[class_index] == vote:
                correct += 1
                overall_correct += 1
        accuracies.append([
            minutes[count], correct / len(groups[count]), correct,
            len(groups[count])
        ])

    print("Sorting accuracies")
    accuracies.sort(key=lambda x: x[0])
    count = 0
    for item in accuracies:
        print('Minute: ', item[0])
        print('     Accuracy: ', item[1])
        print('     Correct: ', item[2])
        print('     Instances: ', item[3])
        print()
        count += 1
    print("Overll Accurracy: ", overall_correct / total_instance)
    print("Instances: ", total_instance)
    print("Correct: ", overall_correct)

    return accuracies

示例#11

0

显示文件

文件： resample.py 项目： hannasv/Project3

def resample(models, lmd, X, z, nboots, split_size=0.2):
    """ Dictionaires to keep track of the results  """
    z_test = {"ridge": [], "lasso": [], "ols": []}
    z_pred_test = {"ridge": [], "lasso": [], "ols": []}

    bias = {"ridge": [], "lasso": [], "ols": []}
    var = {"ridge": [], "lasso": [], "ols": []}
    beta = {"ridge": [], "lasso": [], "ols": []}

    mse_test = {"ridge": [], "lasso": [], "ols": []}
    #r2_test = {"ridge": [], "lasso": [], "ols": []}
    "       ----------------------"
    mse_train = {"ridge": [], "lasso": [], "ols": []}
    #r2_train = {"ridge": [], "lasso": [], "ols": []}

    np.random.seed(2018)

    # Spilt the data in tran and split
    X_train, X_test, z_train, z_test_ = train_test_split(X,
                                                         z,
                                                         test_size=split_size)

    # # extract data from design matrix
    # x = X[:, 1]
    # y = X[:, 2]
    # x_test = X_test[:, 1]
    # y_test = X_test[:, 2]

    for name, model in models.items():
        # creating a model with the previosly known best lmd
        estimator = model(lmd[name])
        # Train a model for this pair of lambda and random state
        """  Keeping information for test """
        estimator.fit(X_train, z_train)
        z_pred_test_ = np.empty((z_test_.shape[0], nboots))
        z_pred_train_ = np.empty((z_train.shape[0], nboots))
        beta_ = np.empty((X.shape[1], nboots))
        for i in range(nboots):
            X_, z_ = bootstrap(
                X_train, z_train,
                i)  # i is now also the random state for the bootstrap

            estimator.fit(X_, z_)
            # Evaluate the new model on the same test data each time.
            z_pred_test_[:, i] = np.squeeze(estimator.predict(X_test))
            z_pred_train_[:, i] = np.squeeze(estimator.predict(X_train))
            beta_[:, i] = np.squeeze(estimator.coef_)

        beta[name] = beta_
        z_pred_test[name] = z_pred_test_

        z_test_ = z_test_.reshape((z_test_.shape[0], 1))
        z_test[name] = z_test_
        mse_test[name] = (np.mean(
            np.mean((z_test_ - z_pred_test_)**2, axis=1, keepdims=True)))
        bias[name] = np.mean(
            (z_test_ - np.mean(z_pred_test_, axis=1, keepdims=True))**2)
        var[name] = np.mean(np.var(z_pred_test_, axis=1, keepdims=True))

        z_train = z_train.reshape((z_train.shape[0], 1))
        mse_train[name] = np.mean(
            np.mean((z_train - z_pred_train_)**2, axis=1, keepdims=True))

        # print('Error:', mse_test)
        # print('Bias^2:', bias)
        # print('Var:', var)
        # print('{} >= {} + {} = {}'.format(mse_test, bias, variance, bias + variance))

        # plt.figure(1, figsize=(11, 7))
        # plt.subplot(121)
        # plt.plot(x, z, label='f')
        # plt.scatter(x_test, z_test, label='Data points')
        # plt.scatter(x_test, np.mean(z_pred, axis=1), label='Pred')
        # plt.legend()
        # plt.xlabel('x')
        # plt.ylabel('z')
        #
        # plt.subplot(122)
        # plt.plot(y, z, label='f')
        # plt.scatter(y_test, z_test, label='Data points')
        # plt.scatter(y_test, np.mean(z_pred, axis=1), label='Pred')
        # plt.legend()
        # plt.xlabel('y')
        # plt.ylabel('z')
        # plt.show()

        # Confidence intervals
        ci_beta = np.empty((2, beta_.shape[0]))
        poly = []
        for p in range(beta_.shape[0]):
            ci_beta[:, p] = np.array(ci(beta_[p, :])).T
            poly.append(p)

        # plt.plot(poly, ci_beta[0, :], label='Upper CI (95%)')  # --> Vise i tabell
        # plt.plot(poly, np.mean(beta, axis=1), label='Beta')
        # plt.plot(poly, ci_beta[1, :], label='Lower CI (95%)')
        # plt.legend()
        # plt.show()

    return z_test, z_pred_test, bias, var, beta, mse_test, mse_train, ci_beta

示例#12

0

显示文件

文件： boot.py 项目： dafa321/ib_tools

import pickle
from functools import partial
from utils import breakout_strategy, bootstrap, m_proc
import sys
sys.path.append('/home/tomek/ib_tools/')
from datastore_pytables import Store  # noqa

store = Store()

contract = store.read('/cont/min/NQ_20191220_GLOBEX_USD').sort_index()

table = bootstrap(contract, start='20180701', end='20181231', paths=100)

func = partial(
    breakout_strategy,
    time_int=30,
    periods=[
        5,
        10,
        20,
        40,
        80,
        160,
    ],
    ema_fast=10,
    ema_slow=120,
    atr_periods=80,
    sl_atr=1,
)

results = m_proc(table, func)