def fit(self, X): from numpy.random.mtrand import RandomState min_width = np.repeat(sys.float_info.max, len(X) - 1) randomizer = RandomState(111) if self.initial_ordering is not None: ordering = self.initial_ordering assert len(ordering) == len(X), \ 'initial_ordering has wrong size' else: ordering = np.arange(len(X), dtype=int) for i in range(self.n_trials): final_ordering, bd, labels, prs = self._fit_once(X, ordering) width = np.sort(bd)[::-1] if lt_lex(width, min_width): best_order = final_ordering best_bd = bd best_labels = labels best_pinch_ratios = prs min_width = width randomizer.shuffle(ordering) self._ordering = best_order self._boundary = best_bd self.labels_ = best_labels self._pinch_ratios = best_pinch_ratios self._width = min_width
def rarefaction_keep(M, RID, reads=0, iters=0, myLambda=0.1): global curSamples, totSamples noccur = np.sum(M, axis=1) # number of occurrences for each sample nvar = M.shape[1] # number of variables nsamp = M.shape[0] # number of samples Mrarefied = np.empty_like(M) for i in range(nsamp): p = (M[i] + myLambda) / (float(noccur[i]) + nvar * myLambda) myArr = np.zeros(nvar) for n in xrange(iters): prng = RandomState() choice = prng.choice(nvar, size=reads, replace=True, p=p) binArr = np.bincount(choice, minlength=nvar) if n == 0: myArr = binArr else: myArr = np.vstack((myArr, binArr)) if iters > 1: Mrarefied[i] = np.mean(myArr, axis=0) else: Mrarefied[i] = myArr curSamples[RID] += 1 functions.setBase(RID, 'Step 2 of 6: Sub-sampling data...\nSub-sampling is complete for ' + str(curSamples[RID]) + ' out of ' + str(totSamples[RID]) + ' samples') return Mrarefied
def __init__(self,h,cutoff=0.2,seed=dfseed): dummy = RandomState(seed) s1, s2, s3 = 0,0,0 while(s1==s2 or s1==s3 or s2==s3): s1, s2, s3 = dummy.randint(low=4294967296,size=3) self.__dxy = randxy(s1,cutoff) self.__site = randsite(s2,h.dim) self.__acc = randgen(s3)
def create_nonce(): """ Create a random string :return: a random string """ rand = RandomState() lo=1000000000000000 hi=999999999999999999 return b2a_hex(rand.randint(lo, hi, 3).tostring())[:32]
def job_pick_and_move(path_src, path_dst, filenames, labels): rs = RandomState(0) bools = [labels == l for l in xrange(5)] indices_lists = [x.nonzero()[0] for x in bools] permuted_indices = [rs.permutation(x) for x in indices_lists] permuted_indices_top700 = [x[:700] for x in permuted_indices] for i in xrange(5): for s in filenames[permuted_indices_top700[i]]: shutil.copy(os.path.join(path_src, s+".png"), os.path.join(path_dst, s + "_" + str(i) + ".png"))
def get_indices(self): """ Get data indices :rtype: numpy.array """ if self._indices is None: rs = RandomState(seed=self._random_state) self._indices = rs.permutation(len(self)) return self._indices
def generate_binned_dataset(n_samples, n_bins): """useful function, generates dataset with bins, groups, random weights. This is used to test correlation functions. """ random = RandomState() y = random.uniform(size=n_samples) > 0.5 pred = random.uniform(size=(n_samples, 2)) weights = random.exponential(size=(n_samples,)) bins = random.randint(0, n_bins, n_samples) groups = bin_to_group_indices(bin_indices=bins, mask=(y == 1)) return y, pred, weights, bins, groups
def test_state_setter_getter(fname): # make sure the presence of custom __setstate__, __getstate__ methods # is honored -- numpy's RNGs have it from numpy.random.mtrand import RandomState r = RandomState() h5save(fname, r) rl = h5load(fname) rl_state = rl.get_state() for i, v in enumerate(r.get_state()): assert_array_equal(v, rl_state[i])
def test_fast_cvm(n_samples=1000): random = RandomState() data1 = random.uniform(size=n_samples) weights1 = random.uniform(size=n_samples) mask = random.uniform(size=n_samples) > 0.5 data2 = data1[mask] weights2 = weights1[mask] a = cvm_2samp(data1, data2, weights1, weights2) prepared_data1, prepared_weights1, F1 = prepare_distibution(data1, weights1) b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, F1=F1) assert numpy.allclose(a, b)
def create_icon(filename): rand = RandomState() lo = 1000000000000000 hi = 999999999999999999 random_string = binascii.b2a_hex(rand.randint(lo, hi, 4).tostring())[:64] time_millis = int(round(time.time() * 1000)) v = visicon.Visicon(random_string, str(time_millis), 128) i = v.draw_image() i.save(filename)
def __call__(self, particle, rand): """Get the next velocity from this particle given a particle that it should be moving toward""" # I'm not sure what "given a particle that it should be moving toward" # means. We only take one argument, and that's "this particle" # In a Bypass mrs implementation (and possibly also with Serial), # Kalman motion should work just fine. However, in parallel, the state # required for each particle's motion is not persistent. We would need # to add a "motion state" field to the particle, or something similar, # so that each Slave task can access the state that is supposed to be # building up in the Kalman filter # Note that care needs to be taken in speculative methods to not # clobber the state that is passed around, or superfluously add to the # state when other particles are evaluating the motion, or when # speculative children are being moved. This needs to be rethought to # be compatible with speculative evaluation. raise NotImplementedError("Kalman motion requires state that is not " "persistent in mrs") kalman = self.getfilter(particle, rand) grel = particle.nbestpos - particle.pos if self.norandscale: newvel = 1.0 * grel else: newvel = rand.uniform(0,2) * grel if self.restrictvel: self.cube.constrain_vec(newvel, True) newpos = particle.pos + newvel if not self.usepbest: kalman.add(newpos) else: kalman.add(array(list(newpos) + list(particle.pbestpos))) if self.predict: mean, var = kalman.predict() else: mean, var = kalman.filt() # Bad! We should find a better way to initialize the random state # instead of just drawing a random number from the particle rand # This does give reproducible results, it just makes the random numbers # from state less good state = RandomState(rand.randint(0, sys.maxint)) newstate = state.multivariate_normal(mean, var) return array(newstate[:self.dims]),array(newstate[self.dims:])
def test_compute_cut(): random = RandomState() predictions = random.permutation(100) labels = numpy.ones(100) for eff in [0.1, 0.5, 0.75, 0.99]: cut = compute_cut_for_efficiency(eff, labels, predictions) assert numpy.sum(predictions > cut) / len(predictions) == eff, 'the cut was set wrongly' weights = numpy.array(random.exponential(size=100)) for eff in random.uniform(size=100): cut = compute_cut_for_efficiency(eff, labels, predictions, sample_weight=weights) lower = numpy.sum(weights[predictions > cut + 1]) / numpy.sum(weights) upper = numpy.sum(weights[predictions > cut - 1]) / numpy.sum(weights) assert lower < eff < upper
def test_msee_computations(size=1000, n_bins=10): random = RandomState() testY = random.uniform(size=size) > 0.5 pred = random.uniform(size=(size, 2)) weights = random.exponential(size=size) bins = random.randint(0, n_bins, size) target_efficiencies = [0.5, 0.6] groups = [numpy.where(testY & (bins == bin))[0] for bin in range(n_bins)] x1 = compute_msee_on_bins(pred[:, 1], testY, bin_indices=bins, target_efficiencies=target_efficiencies, sample_weight=weights) x2 = compute_msee_on_groups(pred[:, 1], testY, groups=groups, target_efficiencies=target_efficiencies, sample_weight=weights) assert abs(x1 - x2) < 1e-6, "MSE are different" print("MSE variation is ok")
def __init__(self, athlete, queue, ID, rate = 20, noise = 0.3, verbose = False, seed = None): """ Sensor class which gets position measurements from athlete, adds noise and collects them in a queue. :param athlete: object yielding position data when called :param queue: queue to which the measurements are added :param id: sensor ID :param rate (optional): sampling rate of sensor in Hz, default: 20 :param noise (optional): standard deviation of noise on measurement in meter, default: 0.3 :param verbose (optional): verbosity of sensor, default: False :param seed (optional): seed of noise generation, default: None """ super(Sensor, self).__init__() self.queue = queue self.ID = ID self.athlete = athlete self.rate = rate self.deltat = 1./self.rate self.noise = noise self.verbose = verbose self.rs = RandomState(seed) self.running = Event()
def test_binner(): """This function tests binner class""" random = RandomState() binner = Binner(random.permutation(30), 3) assert numpy.all(binner.limits > [9, 19]), 'failed on the limits' assert numpy.all(binner.limits < [10, 20]), 'failed on the limits' bins = binner.get_bins([-1000, 1000, 0, 10, 20, 9.0, 10.1, 19.0, 20.1]) assert numpy.all(bins == [0, 2, 0, 1, 2, 0, 1, 1, 2]), 'wrong binning' binner = Binner(random.permutation(20), 5) p = random.permutation(40) # checking whether binner preserves correspondence list1 = list(binner.split_into_bins(numpy.array(range(-10, 30))[p], numpy.array(range(0, 40))[p])) for a, b in list1: for x, y in zip(a, b): assert x + 10 == y, 'transpositions are wrong after binning' binner = Binner(random.permutation(30), 3) result2 = list(binner.split_into_bins(range(10, 20))) answer2 = [[], range(10, 20), []] for a, b in zip(result2, answer2): for x, y in zip(a[0], b): assert x == y, 'binning is wrong' result3 = list(binner.split_into_bins(random.permutation(45))) answer3 = list(binner.split_into_bins(range(45))) for x, y in zip(result3, answer3): assert set(x[0]) == set(y[0]), "binner doesn't work well with permutations" print('binner is ok')
def __init__(self, left_num_neurons, right_num_neurons, transfer_function): """ Initializes a vectorneuron with a weight matrix of size (left_num_neurons, right_num_neurons), a bias vector of size (right_num_neurons, 1), and a transfer function transfer_function. """ print '>>> Creating VectorNeuron: (%s, %s) %s' % \ (left_num_neurons, right_num_neurons, transfer_function) self.__weight_matrix = Matrix(rand(right_num_neurons, left_num_neurons)) self.__weight_matrix_backup = self.__weight_matrix.copy() self.__bias_vector = Matrix(rand(right_num_neurons, 1)) self.__delta_w_matrix = Matrix(rand(right_num_neurons, left_num_neurons)) self.__mersenne_twister = MersenneTwister() self.__mersenne_twister.seed(int(1000*time.time())) self.__transfer_function = transfer_function
def check_weighted_percentile(size=100, q_size=20): random = RandomState() array = random.permutation(size) quantiles = random.uniform(size=q_size) q_permutation = random.permutation(q_size) result1 = weighted_percentile(array, quantiles)[q_permutation] result2 = weighted_percentile(array, quantiles[q_permutation]) result3 = weighted_percentile(array[random.permutation(size)], quantiles[q_permutation]) assert numpy.all(result1 == result2) and numpy.all(result1 == result3), 'breaks on permutations' # checks that order is kept quantiles = numpy.linspace(0, 1, size * 3) x = weighted_percentile(array, quantiles, sample_weight=random.exponential(size=size)) assert numpy.all(x == numpy.sort(x)), "doesn't preserve order" array = numpy.array([0, 1, 2, 5]) # comparing with simple percentiles for x in random.uniform(size=10): assert numpy.abs(numpy.percentile(array, x * 100) - weighted_percentile(array, x, old_style=True)) < 1e-7, \ "doesn't coincide with numpy.percentile"
def __init__(self, subject_column, *args, **kwargs): super(_PereiraBenchmark.PereiraExtrapolationCeiling, self).__init__( subject_column, *args, **kwargs) self._num_subsamples = 10 self.holdout_ceiling = _PereiraBenchmark.PereiraHoldoutSubjectCeiling(subject_column=subject_column) self._rng = RandomState(0)
def __init__(self, lam: Union[float, ndarray, Iterable[float]] = 1.0, seed=None): self.lam = lam self.rs = RandomState(seed=seed)
def __init__(self, alpha: list, seed=None): self.alpha = alpha self.rs = RandomState(seed=seed)
def __init__(self, config): super(OrganicUserEventCounterModel, self).__init__(config) if config.select_randomly: self.rng = RandomState(self.config.random_seed)
def __init__(self, config, model): super(TorchModel, self).__init__(config) self.model = model if self.config.select_randomly: self.rng = RandomState(self.config.random_seed)
def add_poisson_noise(imgs, seed=123): poisson_noise = RandomState(seed).poisson(lam=1, size=imgs.shape) return imgs + poisson_noise
# 定义输入 x = tf.placeholder(tf.float32, shape=(None, 2), name="x-input") y_ = tf.placeholder(tf.float32, shape=(None, 1), name="y-input") a = tf.matmul(x, w1) y = tf.matmul(a, w2) # 定义损失函数 cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) # 定义优化算法 train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy) rdm = RandomState(1) dataset_size = 10000 X = rdm.rand(dataset_size, 2) Y = [[int(x1+x2 < 1)] for (x1, x2) in X] batch_size = 128 with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) print(sess.run(w1))
def __init__(self, *args, **kwargs): super(_Fedorenko2016.ElectrodeExtrapolation, self).__init__(*args, **kwargs) self._rng = RandomState(0) self._num_samples = 15 # number of samples per electrode selection
def main(): config = Configuration() y_train = np.load(config.training_data_folder + 'train_labels.npy') # labels of the training data x_train = np.load(config.training_data_folder + 'train_features.npy') # labels of the training data feature_names = np.load(config.training_data_folder + 'feature_names.npy') failure_times_train = np.load(config.training_data_folder + 'train_failure_times.npy') window_times_train = np.load(config.training_data_folder + 'train_window_times.npy') # get unique classes classes = np.unique(y_train) print('Number of examples in training data set:', x_train.shape[0]) print('Reducing to', config.examples_per_class, 'examples per class with', len(classes), 'classes') # for each class get the indices of all examples with this class indices_of_classes = [] for c in classes: indices_of_classes.append(np.where(y_train == c)[0]) # reduce classes to equal many examples new_indices = [] ran = RandomState(config.random_seed_index_selection) for i in range(len(indices_of_classes)): length = len(indices_of_classes[i]) # if there are less examples than there should be for each class only those can be used epc = config.examples_per_class if config.examples_per_class < length else length temp = ran.choice(indices_of_classes[i], epc, replace=False) # print(len(indices_of_classes[i]), len(temp)) new_indices.append(temp) casebase_features_list = [] casebase_labels_list = [] casebase_failures_list = [] casebase_window_times_list = [] # extract the values at the selected indices and add to list for i in range(len(classes)): casebase_labels_list.extend(y_train[new_indices[i]]) casebase_features_list.extend(x_train[new_indices[i]]) casebase_failures_list.extend(failure_times_train[new_indices[i]]) casebase_window_times_list.extend(window_times_train[new_indices[i]]) # transform list of values back into an array and safe to file casebase_labels = np.stack(casebase_labels_list, axis=0) casebase_features = np.stack(casebase_features_list, axis=0) casebase_failures = np.stack(casebase_failures_list, axis=0) casebase_window_times = np.stack(casebase_window_times_list, axis=0) print('Number of exaples in training data set:', casebase_features.shape[0]) np.save(config.case_base_folder + 'train_features.npy', casebase_features.astype('float32')) np.save(config.case_base_folder + 'train_labels.npy', casebase_labels) np.save(config.case_base_folder + 'train_failure_times.npy', casebase_failures) np.save(config.case_base_folder + 'train_window_times.npy', casebase_window_times) files_to_copy = [ 'feature_names.npy', 'test_labels.npy', 'test_features.npy', 'test_window_times.npy', 'test_failure_times.npy', 'FailureMode_Sim_Matrix.csv', 'Lokalization_Sim_Matrix.csv', 'Condition_Sim_Matrix.csv' ] for file in files_to_copy: copyfile(config.training_data_folder + file, config.case_base_folder + file)
def __init__(self, seed=None): self.rs = RandomState(seed=seed)
def __init__(self, config=Configuration(random_args)): super(RandomAgent, self).__init__(config) self.rng = RandomState(config.random_seed)
def __init__(self, probabilities: numpy.array, seed=None): self.probabilities = probabilities self.a = numpy.arange(len(probabilities)) self.lb = 0 self.ub = len(probabilities) - 1 self.rs = RandomState(seed=seed)
def __init__(self, a: Union[float, ndarray, Iterable[float]], seed=None): self.a = a self.rs = RandomState(seed=seed)
def __init__(self, lb: int, ub: int, seed=None): self.lb = lb self.ub = ub self.rs = RandomState(seed=seed)
def train(config, model_dir, train_src, train_tgt, valid_src, valid_tgt, batch_max_words, batch_max_sentences, epochs, test_sentences, test_interval, valid_freq, keep_models, patience, max_words, learning_rate, max_seconds, exit_status_max_train, anneal_restarts, anneal_decay, override_learning_rate, valid_ref, lc_bleu, stop_on_cost): start = time.time() state = TrainingState() state.learning_rate = learning_rate log.info('hostname: %s', socket.gethostname()) x_vocab = Vocab(vocab_path=os.path.join(model_dir, 'x_vocab.txt')) y_vocab = Vocab(vocab_path=os.path.join(model_dir, 'y_vocab.txt')) cnn_mt = ConvolutionalMT(config, x_vocab, y_vocab) model_file = find_latest_model(model_dir) if model_file: compat.load_params(cnn_mt, model_file) state_path = state.path_for_model(model_file) if os.path.exists(state_path): state.load(state_path) if state.learning_rate != learning_rate: if override_learning_rate: log.info('overriding saved learning rate {} to {}'.format( state.learning_rate, learning_rate)) state.learning_rate = learning_rate else: log.warning('using saved learning rate {}'.format( state.learning_rate)) else: log.warning('no training state file found for model!') state.training_iteration = model_iter_from_path(model_file) log.info('TrainingState: {}'.format(state.format_for_log())) log.info('using {} for stopping criteria'.format('cost' if stop_on_cost else 'bleu')) next_test_cycle = test_interval early_stop = False train_seconds = state.total_train_seconds # Get a different random state to avoid seeing the same shuffled batches # on restart. We want to see different data, especially for large datasets. random_state = RandomState() log.info('preparing training batches...') train_dataset = XYDataset(train_src, train_tgt, x_vocab, y_vocab, max_words_per_sentence=max_words, max_words_per_batch=batch_max_words, max_sentences_per_batch=batch_max_sentences, random_state=random_state) log.info('preparing validation batches...') valid_xy_dataset = XYDataset(valid_src, valid_tgt, x_vocab, y_vocab, max_words_per_sentence=max_words, max_words_per_batch=batch_max_words, max_sentences_per_batch=batch_max_sentences, random_state=None) valid_x_dataset = XDataset(valid_src, x_vocab, config.num_positions, max_words_per_batch=batch_max_words, max_sentences_per_batch=batch_max_sentences) log.info('starting train loop...') log.info('process memory at start of train loop: {:.2f} GB'.format( used_memory_in_gigabytes())) while state.completed_epochs < epochs: epoch_cost = 0 for batch in train_dataset(): x, x_mask, y, y_mask = batch elapsed = time.time() - start if max_seconds and elapsed > max_seconds: log.info('%d seconds elapsed in train()', elapsed) log.info('exiting with status %d', exit_status_max_train) exit(exit_status_max_train) state.training_iteration += 1 cnn_mt.set_learning_rate(state.learning_rate) batch_cost = cnn_mt.train(x, x_mask, y, y_mask) epoch_cost += batch_cost next_test_cycle -= 1 if next_test_cycle == 0: test(cnn_mt, x_vocab, y_vocab, test_sentences, max_words) next_test_cycle = test_interval if state.training_iteration % valid_freq == 0: log.info('BEGIN Validating') valid_cost = dataset_cost(cnn_mt, valid_xy_dataset) state.validation_costs.append(float(valid_cost)) new_best = False bleu, bleu_s, max_bleu_s = -1.0, '?????', '?????' if valid_ref: bleu, bleu_line = compute_greedy_bleu(cnn_mt, valid_x_dataset, valid_ref, lc_bleu, max_words) log.info(bleu_line) state.validation_bleus.append(bleu) bleu_s = '{:05.2f}'.format(bleu) max_bleu_s = '{:05.2f}'.format(max(state.validation_bleus)) if stop_on_cost: if valid_cost <= min(state.validation_costs): state.bad_counter = 0 new_best = True else: if bleu >= max(state.validation_bleus): state.bad_counter = 0 new_best = True log.info('END Validating') ts = train_seconds + int(time.time() - start) log.info('bleu{} {:5s} max {:5s} cost {:f} min {:f} bad_counter {:d} lr {:f} ' 'iter {:d} completed_epochs: {:d} train_secs {:d}'.format( '-lc' if lc_bleu else '', bleu_s, max_bleu_s, valid_cost, min(state.validation_costs), state.bad_counter, state.learning_rate, state.training_iteration, state.completed_epochs, ts)) model_src = save_model(cnn_mt, model_dir, keep_models, state, train_seconds + int(time.time() - start)) if new_best: log.info('New best model; saving model') model_dst = os.path.join(model_dir, 'model') copy_checkpoint(model_src, model_dst) else: state.bad_counter += 1 if state.bad_counter > patience: if state.anneal_restarts_done < anneal_restarts: log.info('No progress on the validation set, annealing learning ' 'rate and resuming from best params.') state.learning_rate *= anneal_decay log.info('new learning rate: {:f}'.format(state.learning_rate)) state.anneal_restarts_done += 1 state.bad_counter = 0 best_model_path = os.path.join(model_dir, 'model') compat.load_params(cnn_mt, best_model_path) else: log.info('Early Stop!') early_stop = True break if early_stop: # Non-zero exit status to prevent dependent queue # jobs from executing. exit(1) state.completed_epochs += 1 log.info('epoch %d, epoch cost %f', state.completed_epochs, epoch_cost) log.info('process memory at end of epoch: {:.2f} GB'.format( used_memory_in_gigabytes())) log.info('process memory at end of training: {:.2f} GB'.format( used_memory_in_gigabytes())) log.info('training ends')
def __init__(self, seed, epsilon, num_action): self.name = "epsilon-Greedy Agent" self.np_random = RandomState(seed) self.epsilon = epsilon self.RVS = [0 for i in range(num_action)]
def __init__(self, seed): RandomState.__init__(self, seed) self.seed = seed
class LikelihoodAgent(Agent): def __init__(self, feature_provider, use_argmax=False, seed=43): self.feature_provider = feature_provider self.use_argmax = use_argmax self.random_state = RandomState(seed) self.model = None @property def num_products(self): return self.feature_provider.config.num_products def _create_features(self, user_state, action): """Create the features that are used to estimate the expected reward from the user state. """ features = np.zeros(len(user_state) * self.num_products) features[action * len(user_state):(action + 1) * len(user_state)] = user_state return features def train(self, logs): user_states, actions, rewards, proba_actions = build_rectangular_data( logs, self.feature_provider) print(user_states) features = np.vstack([ self._create_features(user_state, action) for user_state, action in zip(user_states, actions) # should be the enumerate of action ]) self.model = LogisticRegression() self.model.fit(features.astype(float), rewards.astype(int)) def _score_products(self, user_state): all_action_features = np.array([ self._create_features(user_state, action) for action in range(self.num_products) ]) temp = self.model.predict_proba(all_action_features)[:, 0] return temp def act(self, observation, reward, done): """Act method returns an action based on current observation and past history""" self.feature_provider.observe(observation) user_state = self.feature_provider.features(observation) prob = self._score_products(user_state) try: action = self.random_state.choice(self.num_products, p=prob / sum(prob)) ps = prob[action] all_ps = prob.copy() except: action = np.argmax(prob) ps = 1.0 all_ps = np.zeros(self.num_products) all_ps[action] = 1.0 # ##epsilon greedy is working better, change it after tests return { **super().act(observation, reward, done), **{ 'action': action, 'ps': ps, 'ps-a': all_ps, } } def reset(self): self.feature_provider.reset()
def __init__(self, feature_provider, use_argmax=False, seed=43): self.feature_provider = feature_provider self.use_argmax = use_argmax self.random_state = RandomState(seed) self.model = None
def __init__(self, seed, espilon): self.name = "epsilon-Greedy Agent" self.np_random = RandomState(seed) self.epsilon = espilon
def randxy(seed,cutoff=1.0): r = RandomState(seed) while(True): yield array(r.uniform(-cutoff,cutoff,2))
from numpy.random.mtrand import RandomState from hep_ml.commonutils import generate_sample from hep_ml.metrics_utils import prepare_distribution, _ks_2samp_fast, ks_2samp_weighted, _cvm_2samp_fast, \ group_indices_to_groups_matrix from hep_ml.metrics import KnnBasedSDE, KnnBasedTheil, KnnBasedCvM, \ BinBasedSDE, BinBasedTheil, BinBasedCvM from hep_ml.metrics_utils import bin_to_group_indices, compute_bin_indices from tests._metrics_oldimplementation import compute_sde_on_bins, compute_sde_on_groups, compute_theil_on_bins, \ compute_theil_on_groups, bin_based_ks, groups_based_ks, cvm_2samp, bin_based_cvm, group_based_cvm, sde, \ cvm_flatness, theil_flatness __author__ = 'Alex Rogozhnikov' random = RandomState() def generate_binned_dataset(n_samples, n_bins): """useful function, generates dataset with bins, groups, random weights. This is used to test correlation functions. """ random = RandomState() y = random.uniform(size=n_samples) > 0.5 pred = random.uniform(size=(n_samples, 2)) weights = random.exponential(size=(n_samples,)) bins = random.randint(0, n_bins, n_samples) groups = bin_to_group_indices(bin_indices=bins, mask=(y == 1)) return y, pred, weights, bins, groups def test_bin_to_group_indices(size=100, bins=10):
def randgen(seed): r = RandomState(seed) while(True): yield r.rand()
def __init__(self, config, logreg): super(LogregMulticlassModel, self).__init__(config) self.logreg = logreg if config.select_randomly: self.rng = RandomState(self.config.random_seed)
def __init__(self, beta: Union[int, ndarray, Iterable[int]] = 1.0, seed=None): self.beta = beta self.rs = RandomState(seed=seed)
def randsite(seed,dim): r = RandomState(seed) while(True): yield r.randint(dim)
def __init__(self, *args, **kwargs): super(_PereiraBenchmark.PereiraHoldoutSubjectCeiling, self).__init__(*args, **kwargs) self._rng = RandomState(0) self._num_bootstraps = 5
class AbstractEnv(gym.Env, ABC): def __init__(self): gym.Env.__init__(self) ABC.__init__(self) self.first_step = True self.config = None self.state = None self.current_user_id = None self.current_time = None self.empty_sessions = OrganicSessions() def reset_random_seed(self, epoch=0): # Initialize Random State. assert (self.config.random_seed is not None) self.rng = RandomState(self.config.random_seed + epoch) if self.config.random_seed_for_user is not None: assert isinstance(self.config.random_seed_for_user, int) self.user_rng = RandomState(self.config.random_seed_for_user + epoch) def init_gym(self, args): self.config = Configuration(args) # Defining Action Space. self.action_space = Discrete(self.config.num_products) if 'time_generator' not in args: self.time_generator = DefaultTimeGenerator(self.config) else: self.time_generator = self.config.time_generator # Setting random seed for the first time. self.reset_random_seed() if 'agent' not in args: self.agent = None else: self.agent = self.config.agent # Setting any static parameters such as transition probabilities. self.set_static_params() # Set random seed for second time, ensures multiple epochs possible. self.reset_random_seed() def reset(self, user_id=0): # Current state. self.first_step = True self.state = organic # Manually set first state as Organic. self.time_generator.reset() if self.agent: self.agent.reset() self.current_time = self.time_generator.new_time() self.current_user_id = user_id # Record number of times each product seen for static policy calculation. self.organic_views = np.zeros(self.config.num_products) def generate_organic_sessions(self): # Initialize session. session = OrganicSessions() while self.state == organic: # Add next product view. self.update_product_view() session.next( DefaultContext(self.current_time, self.current_user_id), self.product_view) # Update markov state. self.update_state() return session def step(self, action_id): """ Parameters ---------- action_id : int between 1 and num_products indicating which product recommended (aka which ad shown) Returns ------- observation, reward, done, info : tuple observation (tuple) : a tuple of values (is_organic, product_view) is_organic - True if Markov state is `organic`, False if Markov state `bandit` or `stop`. product_view - if Markov state is `organic` then it is an int between 1 and P where P is the number of products otherwise it is None. reward (tuple) : a tuple of values (click, ctr), ctr is click-through-rate which means the probability of user clicking. if the previous state was `bandit` - then reward is (1, ctr) if the user clicked on the ad you recommended otherwise (0, ctr) `organic` - then reward is (None, None) done (bool) : whether it's time to reset the environment again. An episode is over at the end of a user's timeline (all of their organic and bandit sessions) info (dict) : this is unused, it's always an empty dict """ # No information to return. info = {} if self.first_step: assert (action_id is None) self.first_step = False sessions = self.generate_organic_sessions() return (Observation( DefaultContext(self.current_time, self.current_user_id), sessions), (None, None), self.state == stop, info) assert (action_id is not None) # Calculate reward from action. reward = self.draw_click(action_id) # (click ,ctr) self.update_state() # Markov state dependent logic. if self.state == organic: sessions = self.generate_organic_sessions() else: sessions = self.empty_sessions return (Observation( DefaultContext(self.current_time, self.current_user_id), sessions), reward, self.state == stop, info) def step_offline(self, observation, reward, done): """Call step function wih the policy implemented by a particular Agent.""" if self.first_step: action = None else: assert (hasattr(self, 'agent')) assert (observation is not None) if self.agent: action = self.agent.act(observation, reward, done) else: # Select a Product randomly. action = { 't': observation.context().time(), 'u': observation.context().user(), 'a': np.int16(self.rng.choice(self.config.num_products)), 'ps': 1.0 / self.config.num_products, 'ps-a': (np.ones(self.config.num_products) / self.config.num_products if self.config.with_ps_all else ()), } if done: reward = self.draw_click(action['a']) # (click ,ctr) return (action, Observation( DefaultContext(self.current_time, self.current_user_id), self.empty_sessions), reward, done, None) else: observation, reward, done, info = self.step( action['a'] if action is not None else None) return action, observation, reward, done, info def generate_logs(self, num_offline_users: int, agent: Agent = None, num_organic_offline_users: int = 0): """ Produce logs of applying an Agent in the Environment for the specified amount of Users. If the Agent is not provided, then the default Agent is used that randomly selects an Action. """ if agent: old_agent = self.agent self.agent = agent data = { 't': [], 'u': [], 'z': [], 'v': [], 'a': [], 'c': [], 'ctr': [], 'ps': [], 'ps-a': [], } def _store_organic(observation): assert (observation is not None) assert (observation.sessions() is not None) for session in observation.sessions(): data['t'].append(session['t']) data['u'].append(session['u']) data['z'].append('organic') data['v'].append(session['v']) data['a'].append(None) data['c'].append(None) data['ctr'].append(None) data['ps'].append(None) data['ps-a'].append(None) def _store_bandit(action, reward): if action: assert (reward is not None) data['t'].append(action['t']) data['u'].append(action['u']) data['z'].append('bandit') data['v'].append(None) data['a'].append(action['a']) data['c'].append(reward[0]) data['ctr'].append(reward[1]) data['ps'].append(action['ps']) data['ps-a'].append(action['ps-a'] if 'ps-a' in action else ()) unique_user_id = 0 for _ in trange(num_organic_offline_users, desc='Organic Users'): self.reset(unique_user_id) unique_user_id += 1 observation, _, _, _ = self.step(None) _store_organic(observation) for _ in trange(num_offline_users, desc='Users'): self.reset(unique_user_id) unique_user_id += 1 observation, reward, done, _ = self.step(None) while not done: _store_organic(observation) action, observation, reward, done, _ = self.step_offline( observation, reward, done) _store_bandit(action, reward) _store_organic(observation) data['t'] = np.array(data['t'], dtype=np.float32) data['u'] = pd.array(data['u'], dtype=pd.UInt32Dtype()) data['v'] = pd.array(data['v'], dtype=pd.UInt32Dtype()) data['a'] = pd.array(data['a'], dtype=pd.UInt32Dtype()) data['c'] = np.array(data['c'], dtype=np.float32) data['ctr'] = np.array(data['ctr'], dtype=np.float32) if agent: self.agent = old_agent return pd.DataFrame().from_dict(data) def generate_gt( self, num_offline_users: int, ): data = { 't': [], 'u': [], 'z': [], 'v': [], 'a': [], 'c': [], 'ctr': [], 'ps': [], 'ps-a': [], } def _store_organic(observation): assert (observation is not None) assert (observation.sessions() is not None) for session in observation.sessions(): data['t'].append(session['t']) data['u'].append(session['u']) data['z'].append('organic') data['v'].append(session['v']) data['a'].append(None) data['c'].append(None) data['ctr'].append(None) data['ps'].append(None) data['ps-a'].append(None) def _store_bandit(action, reward): if action: assert (reward is not None) data['t'].append(action['t']) data['u'].append(action['u']) data['z'].append('bandit') data['v'].append(None) data['a'].append(action['a']) data['c'].append(reward[0]) data['ctr'].append(reward[1]) data['ps'].append(action['ps']) data['ps-a'].append(action['ps-a'] if 'ps-a' in action else ()) unique_user_id = 0 all_actions = np.arange(self.config.num_products) for _ in trange(num_offline_users, desc='Users'): self.reset(unique_user_id) unique_user_id += 1 observation, reward, done, _ = self.step(None) while not done: _store_organic(observation) for action in all_actions: if action == 0: observation, reward, done, info = self.step(0) else: reward = self.draw_click(action) action = { 't': observation.context().time(), 'u': observation.context().user(), 'a': action, 'ps': 1.0, 'ps-a': (np.ones(self.config.num_products) / self.config.num_products if self.config.with_ps_all else ()), } _store_bandit(action, reward) _store_organic(observation) data['t'] = np.array(data['t'], dtype=np.float32) data['u'] = pd.array(data['u'], dtype=pd.UInt32Dtype()) data['v'] = pd.array(data['v'], dtype=pd.UInt32Dtype()) data['a'] = pd.array(data['a'], dtype=pd.UInt32Dtype()) data['c'] = np.array(data['c'], dtype=np.float32) data['ctr'] = np.array(data['ctr'], dtype=np.float32) return pd.DataFrame().from_dict(data)
def __init__(self, k: Union[int, ndarray, Iterable[int]], seed=None): self.k = k self.rs = RandomState(seed=seed)
class VectorNeuron(object): """ The VectorNeuron class represents a single weight matrix and a corresponding transfer function. An input to a VectorNeuron is first multiplied by the weight matrix. The result is then fed through a transfer function to produce the VectorNeuron's output. The output is either the containing neural network's final output or the input to another VectorNeuron. """ __weight_matrix = None __weight_matrix_backup = None __bias_vector = None __delta_w_matrix = None __result = None __transfer_function = "" __mersenne_twister = None def __init__(self, left_num_neurons, right_num_neurons, transfer_function): """ Initializes a vectorneuron with a weight matrix of size (left_num_neurons, right_num_neurons), a bias vector of size (right_num_neurons, 1), and a transfer function transfer_function. """ print '>>> Creating VectorNeuron: (%s, %s) %s' % \ (left_num_neurons, right_num_neurons, transfer_function) self.__weight_matrix = Matrix(rand(right_num_neurons, left_num_neurons)) self.__weight_matrix_backup = self.__weight_matrix.copy() self.__bias_vector = Matrix(rand(right_num_neurons, 1)) self.__delta_w_matrix = Matrix(rand(right_num_neurons, left_num_neurons)) self.__mersenne_twister = MersenneTwister() self.__mersenne_twister.seed(int(1000*time.time())) self.__transfer_function = transfer_function def neuron_compute(self, input_matrix): """Computes the vectorneuron output for input_matrix""" self.__result = self.__weight_matrix * input_matrix current_value = None transfer_function = self.__transfer_function row_dim = self.__weight_matrix.shape[0] input_col_dim = input_matrix.shape[1] for i in range(0,row_dim): for j in range(0, input_col_dim): current_value = self.__result[i, j] self.__result[i, j]= self.__bias_vector[i, 0] + current_value cmd = "self.%s(current_value)" % transfer_function self.__result[i, j] = eval(cmd) def compute_delta_w(self, m, lr): """Computes new delta_w matrix""" k = 0 delta_w = None delta_w_row_dim = self.__delta_w_matrix.shape[0] delta_w_col_dim = self.__delta_w_matrix.shape[1] for i in range(0, delta_w_row_dim): for j in range(0,delta_w_col_dim): k = abs(self.__mersenne_twister.randint(0,math.pow(2,32)) % m) if k == 0: delta_w = lr elif k == 1: delta_w = -1.0 * lr else: delta_w = 0.0 self.__delta_w_matrix[i, j] = delta_w def compute_delta_w_annealing(self, n, m, lr): """Computes new delta_w matrix (annealing style)""" k = 0 delta_w = None delta_w_row_dim = self.__delta_w_matrix.shape[0] for i in range(0,delta_w_row_dim): delta_w_matrix_col = self.__delta_w_matrix.shape[1] for j in range(0, delta_w_matrix_col): k = abs(self.__mersenne_twister.randint(0,math.pow(2,32)) % m) if k < n: if k % 2 == 0: if (k == 0): delta_w = lr else: delta_w = lr / k elif k % 2 == 1: delta_w = -1.0 * lr / k else: delta_w = 0.0 else: delta_w = 0.0 self.__delta_w_matrix[i, j] = delta_w def logsig(self, x): """Returns logsig of a single variable x""" return 1.0/(1.0 + exp(-1.0 * x)) def purelin(self, x): """Returns purelin of a single variable x""" return x def tansig(self, x): """Returns tansig of a single variable x""" return 2.0/exp(1.0 + exp(-2.0 * x), -1.0) def linsig(self, x): """Returns linsig of a single variable x""" if x <= 1.0 and x >= -1.0: return x if x > 1: return 1.0 else: return -1.0 def change_weights(self): """Changes weight_matrix by adding delta_w_matrix""" #print 'weight_matrix orig' #print self.__weight_matrix self.__weight_matrix = self.__weight_matrix + self.__delta_w_matrix #print 'weight matrix new' #print self.__weight_matrix def rollback_weights(self): """Reset weight_matrix to weight_matrix_backup""" #print 'resetting weights' self.__weight_matrix = self.__weight_matrix_backup.copy() def weight_matrix_backup(self): """Copies the current weight_matrix to weight_matrix_backup""" self.__weight_matrix_backup = self.__weight_matrix.copy() def get_bias(self): """Returns the vectorneuron's bias vector""" return self.__bias_vector def get_delta_w(self): """Return the computed delta_w matrix used to alter the weights""" return self.__delta_w_matrix def get_result(self): """Returns the output of vectorneuron's neuron_compute function""" return self.__result def get_weight_matrix(self): """Returns the vectorneuron's current weight_matrix""" return self.__weight_matrix def get_weight_matrix_backup(self): """Returns a backup of the vectorneuron's previous weight_matrix""" return self.__weight_matrix_backup def get_transfer_function(self): """Returns the vectorneuron's transfer function""" return self.__transfer_function def write_weight_to_file(self, filename): """Write the vectorneuron's weight_matrix to filename """ savetxt(filename, self.__weight_matrix) return True def write_bias_to_file(self, filename): """Write the vectorneuron's biias vector to filename""" savetxt(filename, self.__bias_vector) return True
class OrganicUserEventCounterModel(Model): """ Organic Event Count Model (per a User). """ def __init__(self, config): super(OrganicUserEventCounterModel, self).__init__(config) self.rng = RandomState(self.config.random_seed) def act(self, observation, features): features = features.flatten() if self.config.exploit_explore: is_explore_case = self.rng.choice( [True, False], p=[self.config.epsilon, 1 - self.config.epsilon] ) if is_explore_case: mask = features == 0 features[mask] = 1 features[~mask] = 0 action_proba = features / np.sum(features) else: features = self.config.epsilon + features # adding epsilon where we dont explore to force it at some point action_proba = features / np.sum(features) if self.config.reverse_pop: action_proba = 1 - action_proba action_proba = action_proba / np.sum(action_proba) if self.config.select_randomly: action = self.rng.choice(self.config.num_products, p=action_proba) #if self.config.exploit_explore: # ps = ( # ( # self.config.epsilon # if is_explore_case else # 1 - self.config.epsilon # ) * action_proba[action] # ) #else: ps = action_proba[action] if self.config.with_ps_all: ps_all = action_proba else: ps_all = () else: action = np.argmax(action_proba) ps = 1.0 if self.config.with_ps_all: ps_all = np.zeros(self.config.num_products) ps_all[action] = 1.0 else: ps_all = () return { **super().act(observation, features), **{ 'a': action, 'ps': ps, 'ps-a': ps_all, }, }
class Sensor(Thread): def __init__(self, athlete, queue, ID, rate = 20, noise = 0.3, verbose = False, seed = None): """ Sensor class which gets position measurements from athlete, adds noise and collects them in a queue. :param athlete: object yielding position data when called :param queue: queue to which the measurements are added :param id: sensor ID :param rate (optional): sampling rate of sensor in Hz, default: 20 :param noise (optional): standard deviation of noise on measurement in meter, default: 0.3 :param verbose (optional): verbosity of sensor, default: False :param seed (optional): seed of noise generation, default: None """ super(Sensor, self).__init__() self.queue = queue self.ID = ID self.athlete = athlete self.rate = rate self.deltat = 1./self.rate self.noise = noise self.verbose = verbose self.rs = RandomState(seed) self.running = Event() def run(self): """ Run sensor. """ if self.verbose: print('Sensor %s started'%self.ID) # start time time = datetime.now() # number of measurements i = 0 while not self.running.isSet(): # get time of measurement t = datetime.now() # get data from athlete data = self.athlete(t) # add noise to position pos = data.pos + self.rs.randn(2) * self.noise # create MeasurementSpec instance containing ID, position, # and time of measurement measurement = MeasurementSpec(ID = self.ID, coords = pos, time = t) # add measurement to queue self.queue.put(measurement) # increment number of measurements i += 1 # calculate time to wait to satisfy sampling rate timeout = i * self.deltat - (datetime.now()-time).total_seconds() self.running.wait(timeout) if self.verbose: print('Sensor %s stopped'%self.ID) def stop(self): """ Stop sensor. """ self.running.set()
def compute_amplitude_prediction_correlations_voltage(pred_fn, examples, n_iterations, perturb_fn=None, batch_size=30, seed=((2017, 7, 10))): """ Changed function to calculate time-resolved voltage pertubations, and not frequency as original in compute_amplitude_prediction_correlations Perturb input amplitudes and compute correlation between amplitude perturbations and prediction changes when pushing perturbed input through the prediction function. For more details, see [EEGDeepLearning]_. Parameters ---------- pred_fn: function Function accepting an numpy input and returning prediction. examples: ndarray Numpy examples, first axis should be example axis. n_iterations: int Number of iterations to compute. perturb_fn: function, optional Function accepting amplitude array and random generator and returning perturbation. Default is Gaussian perturbation. batch_size: int, optional Batch size for computing predictions. seed: int, optional Random generator seed Returns ------- amplitude_pred_corrs: ndarray Correlations between amplitude perturbations and prediction changes for all sensors and frequency bins. References ---------- .. [EEGDeepLearning] Schirrmeister, R. T., Springenberg, J. T., Fiederer, L. D. J., Glasstetter, M., Eggensperger, K., Tangermann, M., ... & Ball, T. (2017). Deep learning with convolutional neural networks for EEG decoding and visualizations. arXiv preprint arXiv:1703.05051. """ inds_per_batch = get_balanced_batches(n_trials=len(examples), rng=None, shuffle=False, batch_size=batch_size) log.info("Compute original predictions...") orig_preds = [ pred_fn(examples[example_inds]) for example_inds in inds_per_batch ] orig_preds_arr = np.concatenate(orig_preds) rng = RandomState(seed) fft_input = np.fft.rfft(examples, axis=2) amps = np.abs(fft_input) phases = np.angle(fft_input) amp_pred_corrs = [] for i_iteration in range(n_iterations): log.info("Iteration {:d}...".format(i_iteration)) log.info("Sample perturbation...") #modified part start perturbation = rng.randn(*examples.shape) new_in = examples + perturbation #modified part end log.info("Compute new predictions...") new_in = new_in.astype('float32') new_preds = [ pred_fn(new_in[example_inds]) for example_inds in inds_per_batch ] new_preds_arr = np.concatenate(new_preds) diff_preds = new_preds_arr - orig_preds_arr log.info("Compute correlation...") amp_pred_corr = wrap_reshape_apply_fn(corr, perturbation[:, :, :, 0], diff_preds, axis_a=(0, ), axis_b=(0)) amp_pred_corrs.append(amp_pred_corr) return amp_pred_corrs
def _initialize_random_state(self): return RandomState(seed=0) # fix the seed here so that the same halves are produced for score and ceiling
def __init__(self, model_identifier, model, file_path, vocab_size=None, block_size=512, max_features=4000): assert os.path.isfile(file_path), f"{file_path} is not a file" with open(file_path, encoding="utf-8") as f: text = f.read() # Tokens directory, filename = os.path.split(file_path) cached_tokens_file = os.path.join( directory, f'cached_lm_{model_identifier}_{block_size}_{filename}') if os.path.exists(cached_tokens_file) and os.getenv('NOSAVE', '0') != '1': logger.info("Loading tokens from cached file %s", cached_tokens_file) with open(cached_tokens_file, 'rb') as handle: self.examples = pickle.load(handle) else: logger.info("Creating tokens from dataset file %s", file_path) self.examples = [] tokenized_text = model.tokenize(text, vocab_size=vocab_size) assert tokenized_text.max() < vocab_size # Truncate in block of block_size # Especially with the small block sizes we end up using together with the # "feeding in context one word increments at a time", this is not ideal because the model doesn't see a lot # of context. But it's going to be even more compute if we maximize the context per block. for i in tqdm(range(0, len(tokenized_text) - block_size + 1, block_size), desc='truncate text into blocks'): self.examples.append( model.tokens_to_inputs(tokenized_text[i:i + block_size])) # Note that we are loosing the last truncated example here for the sake of simplicity (no padding) # If your dataset is small, first you should loook for a bigger one :-) and second you # can change this behavior by adding (model specific) padding. if os.getenv('NOSAVE', '0') != '1': logger.info("Saving tokens into cached file %s", cached_tokens_file) with open(cached_tokens_file, 'wb') as handle: pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL) # Features cached_features_file = os.path.join( directory, f'cached_lm_features_{model_identifier}_{block_size}_{filename}') if os.path.exists(cached_features_file) and os.getenv('NOSAVE', '0') != '1': logger.info("Loading features from cached file %s", cached_features_file) with open(cached_features_file, 'rb') as handle: self.features = pickle.load(handle) else: self.features = [] for block in tqdm( self.examples, desc="token blocks to features"): # pass tokens to model block_features = model(block) self.features.append(block_features) self.features = np.array(self.features) if os.getenv('NOSAVE', '0') != '1': logger.info("Saving features into cached file %s", cached_features_file) with open(cached_features_file, 'wb') as handle: pickle.dump(self.features, handle, protocol=pickle.HIGHEST_PROTOCOL) assert len(self.examples) == len(self.features) # optional subsampling if self.features[0].shape[-1] > max_features: indices = np.arange(self.features[0].shape[-1]) rnd = RandomState(0) indices = rnd.choice(indices, size=max_features, replace=False) self.subsample = lambda features: features[:, :, indices] else: self.subsample = lambda features: features