def _generate_data(): ''' Generate random input data (un_coded_bits) and subsequent output data (coded_bits) of a rate (2,3) convolutional coder ''' seq_length = 100000 prng = RandomState(314159265) un_coded_bits = prng.randint(2, size = seq_length) initial_state = prng.randint(2, size = 6) coded_bits = _convolutional_coder(un_coded_bits, initial_state) return coded_bits, un_coded_bits
def test_MACD_window_length_generation(self, seed): rng = RandomState(seed) signal_period = rng.randint(1, 90) fast_period = rng.randint(signal_period + 1, signal_period + 100) slow_period = rng.randint(fast_period + 1, fast_period + 100) ewma = MovingAverageConvergenceDivergenceSignal( fast_period=fast_period, slow_period=slow_period, signal_period=signal_period, ) assert_equal( ewma.window_length, slow_period + signal_period - 1, )
def align_converge(y_LR,size=64): """iterate until offsets converge""" (h,w) = y_LR.shape # split image y_L = y_LR[:,:w/2] y_R = y_LR[:,w/2:] (h,w) = y_L.shape s = size / 2 # now find n offsets rand = RandomState(0) prev_dx, prev_dy = 0, 0 series = [] while True: # at a random locations in y_L y = rand.randint(h/4,h*3/4) x = rand.randint(w/4,w*3/4) it = y_L[y:y+s,x:x+s] # take an s x s chunk there tm = match_template(y_R,it) # match it against y_R ry, rx = maximum_position(tm) # max value is location series += [((y-ry), (x-rx))] # accumulatea print series n = len(series) if n % 2 == 0: # take the median dy, dx = np.median(np.asarray(series),axis=0).astype(int) if n > 100 or (abs(dy-prev_dy) == 0 and abs(dx-prev_dx) == 0): return dy, dx prev_dy, prev_dx = dy, dx
def createGraph(self): """ W -> Adjacency Matrix For test cases, this method is modified to generate random weight matrix with zero weights for no edges. Graph is undirected with only positive weights @prng: pseudo random number generator """ ''' _ = 0 # no edge # a b c d W = [[_, 1, 3, 4], # a [1, _, 2, 1], # b [_, 1, _, 1], # c [5, 1, 2, _]] # d #print W ''' prng = RandomState() # alternative to random.seed w = prng.randint(0, 6, size=16) # inflating 1D array to 2D square matrix W = w.reshape(4, 4) #pprint(W) #print W[a][d] W = np.array(W) W_symm = (W + W.T)/2 # making the matrix symmetric np.fill_diagonal(W_symm, 0) return W_symm
def test_qtl_fast_binomial_scan(): random = RandomState(9) N = 200 G = random.randn(N, N + 100) G = stdnorm(G, 0) G /= sqrt(G.shape[1]) p = 2 X = random.randn(N, p) X = stdnorm(X, 0) X /= sqrt(X.shape[1]) ntrials = random.randint(1, 50, N) nsuccesses = binomial( ntrials, -0.1, G, causal_variants=X, causal_variance=0.1, random_state=random) qtl = scan(BinomialPhenotype(nsuccesses, ntrials), X, G=G, progress=False, fast=True) assert_allclose( qtl.pvalues(), [ 0.698565827403, 0.443299805368 ], rtol=1e-4)
class SubSampled(object): def __init__(self, dataset, nb, random_state=2, mode='random', shuffle=True): self.dataset = dataset self.nb = nb self.rng = RandomState(random_state) self.mode = mode self.shuffle = shuffle self.next_batch_iter = None def load(self): self.dataset.load() if self.mode == 'random': indices = self.rng.randint(0, len(self.dataset.X), size=self.nb) elif self.mode == 'batch': try: indices = next(self.next_batch_iter) except Exception: self.next_batch_iter = iterate_minibatches(self.dataset.X.shape[0], self.nb, shuffle=self.shuffle) indices = next(self.next_batch_iter) self.X = self.dataset.X[indices] if hasattr(self.dataset, "y"): self.y = [self.dataset.y[ind] for ind in indices] if hasattr(self.dataset, "img_dim"): self.img_dim = self.dataset.img_dim if hasattr(self.dataset, "output_dim"): self.output_dim = self.dataset.output_dim if hasattr(self.dataset, "y_raw"): self.y_raw = [self.dataset.y_raw[ind] for ind in indices]
class RandomGenerator(object): def __init__(self, seed=None): self._random = RandomState(seed=seed) def random(self): return self._random.rand() def randint(self, a, b=None): if b is None: b = a a = 0 r = self._random.randint(a, high=b, size=1) return r[0] def sample(self, population, k): if k == 0: return [] return self._random.choice(population, size=k, replace=False) def __getattr__(self, attr): return getattr(self._random, attr) def __getstate__(self): return {'_random': self._random} def __setstate__(self, d): self._random = d['_random']
def transpose_characters(token, index_to_char, n=1, char_pool=None, seed=17): if isinstance(seed, RandomState): rng = seed else: rng = RandomState(seed) chars = set(token) if len(chars) == 1: return token new_token = token for i in six.moves.range(n): idx = max(1, rng.randint(len(new_token))) neighbor = 0 if idx == 0: neighbor == 1 elif idx == len(new_token) - 1: neighbor = len(new_token) - 2 else: if rng.uniform() > 0.5: neighbor = idx + 1 else: neighbor = idx - 1 left = min(idx, neighbor) right = max(idx, neighbor) new_token = unicode(new_token[0:left] + new_token[right] + new_token[left] + new_token[right+1:]) return new_token
def test_get_random_state(): prng1 = RandomState(42) prng2 = get_prng(42) prng3 = get_prng(prng1) prng4 = get_prng(prng2) prng5 = get_prng() prng6 = get_prng(None) prng7 = get_prng(np.random) assert(isinstance(prng1, RandomState)) assert(isinstance(prng3, RandomState)) assert(isinstance(prng5, RandomState)) assert(isinstance(prng6, RandomState)) assert(isinstance(prng7, RandomState)) x1 = prng1.randint(5, size=10) x2 = prng2.randint(5, size=10) x3 = prng3.randint(5, size=10) x4 = prng4.randint(5, size=10) x5 = prng5.randint(5, size=10) x6 = prng6.randint(5, size=10) x7 = prng7.randint(5, size=10) assert_equal(x1, x2) assert_equal(x3, x4) assert_equal(len(x5), 10) assert_equal(len(x6), 10) assert_equal(len(x7), 10)
def make_ratings(n_users, n_items, min_rating_per_user, max_rating_per_user, rating_choices, seed=None, shuffle=True): """Randomly generate a (user_id, item_id, rating) array Return ------ ndarray with shape (n_samples, 3) """ if not (isinstance(rating_choices, list) or isinstance(rating_choices, tuple)): raise ValueError("'rating_choices' must be a list or tuple") if min_rating_per_user < 0 or min_rating_per_user >= n_items: raise ValueError("invalid 'min_rating_per_user' invalid") if (min_rating_per_user > max_rating_per_user) or \ (max_rating_per_user >= n_items): raise ValueError("invalid 'max_rating_per_user' invalid") rs = RandomState(seed=seed) user_arrs = [] for user_id in xrange(n_users): item_count = rs.randint(min_rating_per_user, max_rating_per_user) item_ids = rs.choice(n_items, item_count, replace=False) ratings = rs.choice(rating_choices, item_count) arr = np.stack( [np.repeat(user_id, item_count), item_ids, ratings], axis=1) user_arrs.append(arr) ratings = np.array(np.vstack(user_arrs)) ratings[:, 2] = ratings[:, 2].astype('float') if shuffle: rs.shuffle(ratings) return ratings
def test_qtl_binomial_scan_covariate_redundance(): random = RandomState(9) N = 200 G = random.randn(N, N + 100) G = stdnorm(G, 0) G /= sqrt(G.shape[1]) p = 2 X = random.randn(N, p) X = stdnorm(X, 0) X /= sqrt(X.shape[1]) ntrials = random.randint(1, 50, N) nsuccesses = binomial( ntrials, -0.1, G, causal_variants=X, causal_variance=0.1, random_state=random) X[:] = 1 qtl = scan(BinomialPhenotype(nsuccesses, ntrials), X, G=G, progress=False, fast=False) assert_allclose(qtl.pvalues(), [1] * p, rtol=1e-4)
def randShots(seed): prng = RandomState(seed) treat = prng.randint(0, 3) if treat == 1: return "1 Shot" else: return str(treat) + " Shots"
def randShots(seed): prng = RandomState(seed) treat = prng.randint(0, 2) if treat == 1: return('1 Shot') else: return(str(treat) + ' Shots')
def test_corrcoef(): prng = RandomState(42) x = prng.rand(10) y = x group = prng.randint(3, size=10) res1 = corrcoef(x, y, group) res2 = corrcoef(x, y, group) np.testing.assert_equal(res1, res2)
def test_sim_corr(): prng = RandomState(42) x = prng.rand(10) y = x group = prng.randint(3, size=10) res1 = sim_corr(x, y, group, seed=prng) res2 = sim_corr(x, y, group) np.testing.assert_equal(res1[0], res2[0])
def initialize_random(cexinf, random_seed): if random_seed is None: random_seed = generate_seed() random_seed = int(random_seed) msg('initializing random state with seed=0x%X', random_seed) rnd = RandomState(random_seed) cexinf.map_all_async(make_writing_message('initialize_random', 'u', seed) for seed in rnd.randint(0xfffffff, size=cexinf.get_size()) ).read_frmt('x')
def test_corr(): prng = RandomState(42) x = prng.randint(5, size=10) y = x res1 = corr(x, y, prng=prng) res2 = corr(x, y) np.testing.assert_equal(len(res1), 5) np.testing.assert_equal(len(res2), 5) np.testing.assert_equal(res1[0], res2[0]) np.testing.assert_equal(res1[1], res2[1]) #np.testing.assert_equal(res1[2], res2[2]) #np.testing.assert_equal(res1[3], res2[3]) y = prng.randint(5, size=10) res1 = corr(x, y, prng=prng) res2 = corr(x, y) np.testing.assert_equal(len(res1), 5) np.testing.assert_equal(len(res2), 5) np.testing.assert_equal(res1[0], res2[0])
def test_permute(): prng = RandomState(42) x = prng.randint(10, size=20) permute(x, prng) expected = np.array([3, 2, 7, 9, 6, 5, 1, 6, 4, 2, 7, 7, 7, 4, 4, 3, 1, 5, 7, 6]) np.testing.assert_array_equal(x, expected) permute(x) np.testing.assert_equal(x.max(), 9) np.testing.assert_equal(x.min(), 1)
def sample_half_counts(evt0, seed=0): evt = sdict(b = evt0.b, d = evt0.d, ims = evt0.ims, ths = evt0.ths, bkgr = evt0.bkgr/2) N = len(evt0.xc) rnd = RandomState(seed) m = rnd.randint(low=0, high=N, size=rnd.binomial(N,0.5)) evt.xc = evt0.xc[m] evt.yc = evt0.yc[m] evt.w = evt0.w[m] return evt
def chromosome_init(N, h_dom=16, w_dom=16, h=100, w=100): x_dom = RandomState() #[0-(w-16-1)] y_dom = RandomState() #[0-(h-16-1)] flip = RandomState() #[0-7] chrom_pool = (ctypes.c_char_p * N)() for i in xrange(N): chromosome = ctypes.create_string_buffer(2*9+3) # 9 bits x_dom, 9 bits y_dom and 3 bits flip chromosome.value = '{0:09b}'.format(x_dom.randint(0,w-w_dom))+'{0:09b}'.format(y_dom.randint(0,h-h_dom))+'{0:03b}'.format(flip.randint(0,8)) chrom_pool[i] = ctypes.string_at(chromosome,21) return chrom_pool
def test_permute_rows(): prng = RandomState(42) x = prng.randint(10, size=20).reshape(2, 10) permute_rows(x, prng) expected = np.array([[2, 7, 7, 6, 4, 9, 3, 4, 6, 6], [7, 4, 5, 5, 3, 7, 1, 2, 7, 1]]) np.testing.assert_array_equal(x, expected) permute_rows(x) np.testing.assert_equal(x.max(), 9) np.testing.assert_equal(x.min(), 1)
def replace_characters(token, index_to_char, n=1, char_pool=string.ascii_lowercase, seed=17): if isinstance(seed, RandomState): rng = seed else: rng = RandomState(seed) new_token = token for i in six.moves.range(n): idx = max(1, rng.randint(len(new_token))) #ch = index_to_char[rng.randint(len(index_to_char))] ch = rng.choice(list(char_pool)) new_token = unicode(new_token[0:idx-1] + ch + new_token[idx:]) return new_token
def pseudorandom(sequence, seed=None): ''' Returns a randomly selected element from the sequence. ''' # We need to create a stand-alone generator that cannot be affected by other # parts of the code that may require random data (e.g. noise). from numpy.random import RandomState state = RandomState() state.seed(seed) n = len(sequence) while True: i = state.randint(0, n) yield sequence[i]
def test_naive_ova_asgd_wrong_labels(): rstate = RandomState(42) n_classes = 10 Xtrn, ytrn = get_fake_multiclass_data(N_POINTS, N_FEATURES, n_classes, rstate) clf = OVAASGD(*((n_classes,) + DEFAULT_ARGS), rstate=RandomState(999), **DEFAULT_KWARGS) ytrn_bad = rstate.randint(n_classes + 42, size=len(ytrn)) clf.partial_fit(Xtrn, ytrn_bad)
def test_sim_corr(): prng = RandomState(42) x = prng.rand(10) y = x group = prng.randint(3, size=10) res1 = sim_corr(x, y, group, seed=prng, reps=100) res2 = sim_corr(x, y, group, seed=prng, alternative='less', reps=100) res3 = sim_corr(x, y, group, seed=prng, alternative='two-sided', reps=100) assert_almost_equal(res1[0], 1-res2[0]) assert_equal(res1[1], res2[1]) assert_equal(res1[1], res3[1]) assert_equal(res1[0], res3[0])
def delete_characters(token, index_to_char, n=1, char_pool=None, seed=17): if isinstance(seed, RandomState): rng = seed else: rng = RandomState(seed) new_token = token if n > len(new_token): n = len(new_token) - 1 for i in six.moves.range(n): try: idx = max(1, rng.randint(len(new_token))) new_token = unicode(new_token[0:idx-1] + new_token[idx:]) except ValueError, e: print('new_token', new_token, len(new_token)) raise e
def setupSeed(hoursBetweenTimestepInROMSFiles,startTime,endTime,startSpawningTime,endSpawningTime,releaseParticles): ################################################## # Create seed variation as function of day ################################################## # Make datetime array from start to end at 3 hour interval #interval = timedelta(hours=hoursBetweenTimestepInROMSFiles) difference=endTime-startTime hoursOfSimulation=divmod(difference.total_seconds(), 3600) difference=endSpawningTime-startSpawningTime hoursOfSpawning=divmod(difference.total_seconds(), 3600) #startSimulationJD=startTime.timetuple().tm_yday #endSimulationJD=endTime.timetuple().tm_yday timeStepsSimulation=int(int(hoursOfSimulation[0])/hoursBetweenTimestepInROMSFiles) #startSpawningJD=startSpawningTime.timetuple().tm_yday #endSpawningJD=endSpawningTime.timetuple().tm_yday #timeStepsSpawning=int(int(hoursOfSpawning[0])/hoursBetweenTimestepInROMSFiles) print "\nKINO TIME EVOLUTION:" print "=>SIMULATION: Drift simulation will run for %s simulation hours" %(timeStepsSimulation) print "=>SPAWNING: Simulated spawning will run for %s simulation hours\n initiated on %s and ending on %s"%(timeStepsSimulation,startSpawningTime,endSpawningTime) interval = timedelta(hours=24) hoursPerSpawning=divmod(interval.total_seconds(), 3600) #hours per spawning event timeStepsSpawning=int(int(hoursOfSpawning[0])/int(hoursPerSpawning[0])) #number of spawning timesteps spawningTimes = [startSpawningTime + interval*n for n in range(timeStepsSpawning)] #times of spawning # Normal distribution around 0.5 mu, sigma = 0.5, 0.1 # mean and standard deviation prng = RandomState() scale = prng.randint(1, 5, size=1) prng = RandomState() s = prng.normal(mu, sigma, len(spawningTimes)) num=(s*releaseParticles).astype(int) num=np.sort(num) #sort particles in increasing order num=np.concatenate((num[len(num)%2::2],num[::-2]),axis=0) #release the highest number of particles at the midpoint of the spawning period print "SPAWNING: Simulated spawning will release %s eggs"%(np.sum(num)) return num, spawningTimes
def test_permute_incidence_fixed_sums(): prng = RandomState(42) x0 = prng.randint(2, size=80).reshape((8, 10)) x1 = permute_incidence_fixed_sums(x0) K = 5 m = [] for i in range(1000): x2 = permute_incidence_fixed_sums(x0, k=K) m.append(np.sum(x0 != x2)) np.testing.assert_(max(m) <= K * 4, "Too many swaps occurred") for axis in (0, 1): for test_arr in (x1, x2): np.testing.assert_array_equal(x0.sum(axis=axis), test_arr.sum(axis=axis))
def test_precision(): rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))), (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))), (DecisionTreeRegressor(criterion="friedman_mse", random_state=0, max_depth=1), DecisionTreeClassifier(max_depth=1, random_state=0))): clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz(clf, out_file=None, precision=precision, proportion=True) # With the current random state, the impurity and the threshold # will have the number of precision set in the export_graphviz # function. We will check the number of precision with a strict # equality. The value reported will have only 2 precision and # therefore, only a less equal comparison will be done. # check value for finding in finditer(r"value = \d+\.\d+", dot_data): assert_less_equal( len(search(r"\.\d+", finding.group()).group()), precision + 1) # check impurity if is_classifier(clf): pattern = r"gini = \d+\.\d+" else: pattern = r"friedman_mse = \d+\.\d+" # check impurity for finding in finditer(pattern, dot_data): assert_equal(len(search(r"\.\d+", finding.group()).group()), precision + 1) # check threshold for finding in finditer(r"<= \d+\.\d+", dot_data): assert_equal(len(search(r"\.\d+", finding.group()).group()), precision + 1)
def align(y_LR,size=64,n=12): (h,w) = y_LR.shape # split image y_L = y_LR[:,:w/2] y_R = y_LR[:,w/2:] (h,w) = y_L.shape s = size / 2 # now find n offsets R = np.zeros((n,2)) rand = RandomState(0) for i in range(n): # to find each offset # at a random locations in y_L y = rand.randint(h/4,h*3/4) x = rand.randint(w/4,w*3/4) it = y_L[y:y+s,x:x+s] # take an s x s chunk there tm = match_template(y_R,it) # match it against y_R ry, rx = maximum_position(tm) # max value is location R[i,:] = ((y-ry), (x-rx)) # accumulatea # take the median dy, dx = np.median(R,axis=0).astype(int) return dy, dx
def test_he_otp_lr_ft1(): federal_info = fed_conf_host sec_param = {"he_algo": 'paillier', "he_key_length": 1024} prng = RandomState(0) guest_theta = prng.uniform(-1, 1, (6, )) guest_features = prng.uniform(-1, 1, (32, 6)) guest_labels = prng.randint(0, 2, (32, )) host_theta = prng.uniform(-1, 1, (6, )) host_features = prng.uniform(-1, 1, (32, 6)) def calu_grad(host_theta, host_features, guest_theta, guest_features, guest_labels): u2 = host_theta.dot(host_features.T) u1 = guest_theta.dot(guest_features.T) u = u1 + u2 h_x = 1 / (1 + np.exp(-u)) diff_y = guest_labels - h_x batch_size = host_features.shape[0] grads = (-1 / batch_size) * (diff_y.dot(host_features)) return grads trainer = make_protocol(HE_OTP_LR_FT1, federal_info, sec_param, algo_param=None) # 联邦计算结果 fed_grads = trainer.exchange(host_theta, host_features) # 本地计算结果 local_grads = calu_grad(host_theta, host_features, guest_theta, guest_features, guest_labels) assert almost_equal(fed_grads, local_grads)
def test_ggp_expfam_tobi(): random = RandomState(2) n = 30 ntrials = random.randint(30, size=n) K = random.randn(n, n) K = matmul(K, K.T) lik = BinomialProdLik(ntrials=ntrials, link=LogitLink()) mean = OffsetMean(n) cov2 = EyeCov(n) y = GGPSampler(lik, mean, cov2).sample(random) ggp = ExpFamGP(y, ("binomial", ntrials), mean, cov2) assert_allclose(ggp.lml(), -67.84095700542488) ggp.fit(verbose=False) assert_allclose(ggp.lml(), -64.26701904994792)
def add_cspy_edge_attributes(G, seed=None): """ Set edge attributes required for cspy """ if seed is None: random_state = RandomState() elif isinstance(seed, int): random_state = RandomState(seed) elif isinstance(seed, RandomState): random_state = seed else: raise Exception( '{} cannot be used to seed numpy.random.RandomState'.format(seed)) # Initialise edge attributes set_edge_attributes(G, 0, 'weight') set_edge_attributes(G, 0, 'res_cost') # Iterate through edges to specify 'weight' and 'res_cost' attributes for edge in G.edges(data=True): # Distance is converted from an already existing edge attribute (m to km) dist = edge[2]['length'] * 0.001 # Fixed resource costs for a given edge. # 'sights' is a random integer between [0, 5) res_cost_sights = random_state.randint(1, 5) # 'travel-time' is distance over speed (not necessary) res_cost_travel_time = dist / float(WALKING_SPEED) # 'delivery time' is a random number between the travel-time for # the edge and 10 times the travel time. # in reality this would depend on the buildings present res_cost_delivery_time = random_state.uniform( res_cost_travel_time, 10 * res_cost_travel_time) # 'shift' is not required. res_cost_shift = 0 edge[2]['res_cost'] = array([ 0, res_cost_sights, res_cost_shift, res_cost_travel_time, res_cost_delivery_time ]) edge[2]['weight'] = 0 #-dist return G
class AxelrodTraitFactory(object): """ In the original Axelrod model, agents have F loci and T possible traits per locus. Individuals are initialized with a list of F random integers, each chosen from 0 to T-1. The result is given as a Python list, and stored as the individual's initial trait set. This factory is dynamically loaded from its fully qualified name in a configuration file, and passed the simulation configuration object in its constructor. The instantiating code then calls initialize_population(graph), passing it a NetworkX graph of nodes, previously constructed """ def __init__(self, simconfig): self.simconfig = simconfig self.prng = RandomState() # allow the library to choose a seed via OS specific mechanism def initialize_population(self,graph): nf = self.simconfig.num_features nt = self.simconfig.num_traits for nodename in graph.nodes(): graph.node[nodename]['traits'] = self.prng.randint(0, nt, size=nf)
class Dataset(udata.Dataset): def __init__(self, name, patchsize): super().__init__() self.dataset = name self.patch_size = patchsize self.mat_files = open(self.dataset, 'r').readlines() self.file_num = len(self.mat_files) self.rand_state = RandomState(66) def __len__(self): return self.file_num * 100 def __getitem__(self, idx): file_name = self.mat_files[idx % self.file_num] gt_file = file_name.split(' ')[1][:-1] img_file = file_name.split(' ')[0] O = cv2.imread(self.args.dir_data + img_file) b, g, r = cv2.split(O) input_img = cv2.merge([r, g, b]) B = cv2.imread(self.args.dir_data + gt_file) b, g, r = cv2.split(B) gt = cv2.merge([r, g, b]) im_pair = np.hstack((gt, input_img)) O, B = self.crop(im_pair, self.patch_size) O, B = O.astype(np.float32), B.astype(np.float32) O = np.transpose(O, (2, 0, 1)) B = np.transpose(B, (2, 0, 1)) sample = {'O': O, 'B': B} return sample def crop(self, img_pair, patchsize): h, ww, c = img_pair.shape w = int(ww / 2) p_h, p_w = patchsize, patchsize r = self.rand_state.randint(0, h - p_h) c = self.rand_state.randint(0, w - p_w) O = img_pair[r:r + p_h, c + w:c + p_w + w] B = img_pair[r:r + p_h, c:c + p_w] return O, B
def test_glmmexpfam_poisson(): random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def make_ratings(n_users, n_items, min_rating_per_user, max_rating_per_user, rating_choices, seed=None, shuffle=True): """Randomly generate a (user_id, item_id, rating) array Return ------ ndarray with shape (n_samples, 3) """ if not (isinstance(rating_choices, list) or isinstance(rating_choices, tuple)): raise ValueError("'rating_choices' must be a list or tuple") if min_rating_per_user < 0 or min_rating_per_user >= n_items: raise ValueError("invalid 'min_rating_per_user' invalid") if (min_rating_per_user > max_rating_per_user) or \ (max_rating_per_user >= n_items): raise ValueError("invalid 'max_rating_per_user' invalid") rs = RandomState(seed=seed) user_arrs = [] for user_id in xrange(n_users): item_count = rs.randint(min_rating_per_user, max_rating_per_user) item_ids = rs.choice(n_items, item_count, replace=False) ratings = rs.choice(rating_choices, item_count) arr = np.stack([np.repeat(user_id, item_count), item_ids, ratings], axis=1) user_arrs.append(arr) ratings = np.array(np.vstack(user_arrs)) ratings[:, 2] = ratings[:, 2].astype('float') if shuffle: rs.shuffle(ratings) return ratings
def test_stochastic_environment_model(): random_state = RandomState(12345) model = StochasticEnvironmentModel() actions = [ Action(i) for i in range(5) ] states = [ State(i, actions) for i in range(5) ] for t in range(1000): state = sample_list_item(states, None, random_state) action = sample_list_item(state.AA, None, random_state) next_state = sample_list_item(states, None, random_state) reward = Reward(None, random_state.randint(10)) model.update(state, action, next_state, reward) environment_sequence = [] for i in range(1000): state = model.sample_state(random_state) action = model.sample_action(state, random_state) next_state, reward = model.sample_next_state_and_reward(state, action, random_state) environment_sequence.append((next_state, reward)) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_stochastic_environment_model.pickle', 'wb') as file: # pickle.dump(environment_sequence, file) with open(f'{os.path.dirname(__file__)}/fixtures/test_stochastic_environment_model.pickle', 'rb') as file: environment_sequence_fixture = pickle.load(file) assert environment_sequence == environment_sequence_fixture
def spsa_step_one(self, lambdas, spsa_params, count): """Evaluate +/- perturbations of kernel parameters (lambdas). Args: lambdas (numpy.ndarray): kernel parameters at step 'count' in SPSA optimization loop spsa_params (numpy.ndarray): SPSA parameters count (int): the current step in the SPSA optimization loop Returns: numpy.ndarray: kernel parameters in + direction numpy.ndarray: kernel parameters in - direction numpy.ndarray: random vector with elements {-1,1} """ prng = RandomState(count) c_spsa = float(spsa_params[1]) / np.power(count + 1, spsa_params[3]) delta = 2 * prng.randint(0, 2, size=np.shape(lambdas)[0]) - 1 lambda_plus = lambdas + c_spsa * delta lambda_minus = lambdas - c_spsa * delta return lambda_plus, lambda_minus, delta
def test_classification_kmeans_relevance(self): state = RandomState(seed=0) Xs = [] Ys = [] n = 20 for i in range(0, 5): for j in range(0, 4): x1 = state.rand(n) + i * 1.1 x2 = state.rand(n) + j * 1.1 Xs.append(numpy.vstack([x1, x2]).T) cl = state.randint(0, 4) Ys.extend([cl for i in range(n)]) X = numpy.vstack(Xs) Y = numpy.array(Ys) clk = ClassifierAfterKMeans(c_n_clusters=6, c_random_state=state) try: clk.fit(X, Y) except AttributeError as e: if compare_module_version(sklver, "0.24") < 0: return raise e score = clk.score(X, Y) self.assertGreater(score, 0.95)
class TestStickExpectation(unittest.TestCase): """Test stick_expectation""" def setUp(self): self.rand = RandomState(0) self.num_stick = 100 self.uniform_stick = np.array([ np.ones(self.num_stick - 1), np.arange(self.num_stick - 1, 0, -1) ]) def test_stick_expectation_shape(self): num_stick = self.rand.randint(100, 200) var_sticks = np.ones((2, num_stick - 1)) expectation_stick = stick_expectation(var_sticks) assert_equal(expectation_stick.shape, (num_stick, )) assert_almost_equal(np.sum(expectation_stick), 1.0) def test_uniform_stick_expectation(self): expectation_stick = stick_expectation(self.uniform_stick) shape = expectation_stick.shape all_equal_stick = np.ones(shape) * expectation_stick[0] assert_almost_equal(expectation_stick, all_equal_stick) assert_almost_equal(np.sum(expectation_stick), 1.0)
def seeds(self, value: Union[int, Mapping[str, int]]) -> None: keys = ['map', 'objects', 'quest', 'surface'] def _key_missing(seeds): return not set(seeds.keys()).issuperset(keys) seeds = value if type(value) is int: rng = RandomState(value) seeds = {} elif _key_missing(value): rng = g_rng.next() # Check if we need to generate missing seeds. self._seeds = {} for key in keys: if key in seeds: self._seeds[key] = seeds[key] else: self._seeds[key] = rng.randint(65635) self.quest_gen_options.quest_rng = self.rngs['quest'] self.surface_gen_options.seed = self._seeds['surface']
def test_binomial_optimize_refit(): random = RandomState(139) nsamples = 30 nfeatures = 31 G = random.randn(nsamples, nfeatures) / sqrt(nfeatures) u = random.randn(nfeatures) z = 0.1 + 2 * dot(G, u) + random.randn(nsamples) ntrials = random.randint(10, 500, size=nsamples) y = zeros(nsamples) for i in range(len(ntrials)): y[i] = sum( z[i] + random.logistic(scale=pi / sqrt(3), size=ntrials[i]) > 0) (Q, S0) = economic_qs_linear(G) M = ones((nsamples, 1)) lik = BinomialProdLik(ntrials, LogitLink()) lik.nsuccesses = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0) ep.learn(progress=False) assert_allclose(ep.lml(), -144.2381842202486, rtol=1e-3) nep = ep.copy() assert_allclose(ep.lml(), -144.2381842202486, rtol=1e-3) assert_allclose(nep.lml(), -144.2381842202486, rtol=1e-3) nep.M = c_[M, random.randn(nsamples)] assert_allclose(nep.lml(), -145.7076758124364, rtol=1e-3) nep.learn(progress=False) assert_allclose(nep.lml(), -143.98475638974728, rtol=1e-3)
def generator(self): """ :return: """ triples_train_idx = self._data.triples_train_idx if self._data.triples_train_subset_idx is None else self._data.triples_train_subset_idx for (idx, (head, relation, gold_tail)) in enumerate(triples_train_idx): # Hint: We use 'np.RandomState' as 'np.random.randint' is not thread-safe cou_inter = idx % 4096 if cou_inter == 0: np_random = RandomState(idx) tail_idx_negatives = np_random.randint(self._gp.num_vertices, size=(4096, self._model_params.num_negative)) def generate_negative_samples(head, gold_tail, tail_idx_negatives): replacement = int((head + gold_tail) / 2) neg_tail_replacement = replacement + 1 if replacement < ( self._gp.num_vertices - 1) else replacement tail_idx_negatives = np.where(tail_idx_negatives == gold_tail, neg_tail_replacement, tail_idx_negatives) tail_idx_negatives = np.insert(tail_idx_negatives, 0, gold_tail) return tail_idx_negatives pos_neg_tails = generate_negative_samples(head, gold_tail, tail_idx_negatives[cou_inter]) mask = np.array([1.] + [0.] * (len(pos_neg_tails)-1), dtype=self.np_precision) device = '/gpu:0' if self._deterministic is True: device = '/cpu:0' with tf.device(device): a = tf.constant(np.array([head])) b = tf.constant(np.array([relation])) c = tf.constant(np.array(pos_neg_tails)) d = tf.constant(np.array([gold_tail])) e = tf.constant(np.array([mask])) yield (a, b), (c, d, e)
def test_binomial_get_normal_likelihood_trick(): random = RandomState(139) nsamples = 30 nfeatures = 31 G = random.randn(nsamples, nfeatures) / sqrt(nfeatures) u = random.randn(nfeatures) z = 0.1 + 2 * dot(G, u) + random.randn(nsamples) ntrials = random.randint(10, 500, size=nsamples) y = zeros(nsamples) for i in range(len(ntrials)): y[i] = sum( z[i] + random.logistic(scale=pi / sqrt(3), size=ntrials[i]) > 0) (Q, S0) = economic_qs_linear(G) M = ones((nsamples, 1)) lik = BinomialProdLik(ntrials, LogitLink()) lik.nsuccesses = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0) ep.learn(progress=False) nlt = ep.get_normal_likelihood_trick() assert_allclose(nlt.fast_scan(G)[0], [ -143.48903288, -144.32031587, -144.03889888, -144.31806561, -143.90248659, -144.303103, -144.47854112, -144.44469341, -144.285027, -144.31240175, -143.11590263, -142.81623878, -141.67554141, -144.4780024, -144.47780285, -144.10317082, -142.10043322, -143.0813298, -143.99841663, -143.345783, -144.45458683, -144.37877612, -142.56846859, -144.32923028, -144.44116855, -144.45082936, -144.40932741, -143.0212886, -144.47902176, -143.94188634, -143.72765373 ], rtol=1e-5)
def generateDegradation(args, seed): from numpy.random import RandomState from numpy.linalg import norm rs = RandomState(seed) if args.D == 2: rotation = (rs.uniform(*args.rotate),) if args.D == 3: angle = rs.uniform(*args.rotate) axis = rs.uniform(size=3) axis = axis/norm(axis) rotation = angle, axis translation = rs.uniform(*args.translate, size=args.D) scale = rs.uniform(*args.scale) if args.drop[0] == args.drop[1]: ndrops = args.drop[0] else: ndrops = rs.randint(*sorted(args.drop)) drops = rs.choice(range(args.N), size=ndrops, replace=False) duplications = rs.choice(range(args.duplicate[0], args.duplicate[1] + 1), size=args.N - ndrops) noise = rs.uniform(*args.noise) * rs.randn(sum(duplications), args.D) return rotation, translation, scale, drops, duplications, noise
def get_data_index2(data): # 获取所有的药物列表 drug_code_list = get_drugs_code() # 获取药物及SMILES drug_smiles_dict = get_drug_json() data_dict = batch_dict(data) result_dict = dict() for name, smiles in data_dict.items(): result_dict[name] = None for code, smiles2 in drug_smiles_dict.items(): if smiles == smiles2: code_index = drug_code_list.index(code) result_dict[name] = code_index break if result_dict[name] is None: rdm = RandomState(len(smiles)) result_dict[name] = rdm.randint(0, 3882) return result_dict
class RandomPolicy(LayerPolicy): def __init__(self, num_layers, policy_seed=None, **params): self._count = 0 self._seed = policy_seed self._state = RandomState(policy_seed) super(RandomPolicy, self).__init__(num_layers) def _random_layer(self): return self._state.randint(1, self._num_layers+1) def initial_layer_id(self): return self._random_layer() def next_layer_id(self): self._count = (self._count + 1) % self._num_layers self._layer_id = self._random_layer() return self._layer_id def cycle_ended(self): return self._count == 0 def name(self): return 'random'
def test_glmmexpfam_copy(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL) glmm0.fit(verbose=False) v = -19.575736562427252 assert_allclose(glmm0.lml(), v) glmm1 = glmm0.copy() assert_allclose(glmm1.lml(), v) glmm1.scale = 0.92 assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL) assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL) glmm0.fit(verbose=False) glmm1.fit(verbose=False) v = -19.575736562378573 assert_allclose(glmm0.lml(), v) assert_allclose(glmm1.lml(), v)
class RandomWrapper: """ This is a wrapper for the numpy random object""" rand = None def __init__(self, seed=None): if (seed is None): seed = int(datetime.utcnow().timestamp()) print("************* Seed is: {0}\n".format(seed)) self.rand = RandomState(seed) else: self.rand = RandomState(seed) def randint(self, low: int, high: int) -> int: """ low and high are both inclusive and exclusive respectively""" return self.rand.randint(low, high) def uniform(self, low: float, high: float) -> float: return self.rand.uniform(low, high) def random(self) -> float: return self.rand.uniform(0.0, 1.0) def sample(self, elements: List, size: int) -> List: return self.rand.choice(elements, size=size, replace=False)
def validate(self) -> None: """ Check arguments correctness and consistency. * input files must exist * output files must be in a writeable directory * if no seed specified, set random seed. * length of per-chain lists equals specified # of chains """ if self.model_name is None: raise ValueError('no stan model specified') if self.model_exe is None: raise ValueError('model not compiled') if self.chain_ids is not None: for i in range(len(self.chain_ids)): if self.chain_ids[i] < 1: raise ValueError('invalid chain_id {}'.format( self.chain_ids[i])) if self.output_dir is not None: self.output_dir = os.path.realpath( os.path.expanduser(self.output_dir)) if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) self._logger.info('created output directory: %s', self.output_dir) except (RuntimeError, PermissionError) as exc: raise ValueError( 'invalid path for output files, no such dir: {}'. format(self.output_dir)) from exc if not os.path.isdir(self.output_dir): raise ValueError( 'specified output_dir not a directory: {}'.format( self.output_dir)) try: testpath = os.path.join(self.output_dir, str(time())) with open(testpath, 'w+'): pass os.remove(testpath) # cleanup except Exception as exc: raise ValueError('invalid path for output files,' ' cannot write to dir: {}'.format( self.output_dir)) from exc if self.seed is None: rng = RandomState() self.seed = rng.randint(1, 99999 + 1) else: if not isinstance(self.seed, (int, list)): raise ValueError( 'seed must be an integer between 0 and 2**32-1,' ' found {}'.format(self.seed)) if isinstance(self.seed, int): if self.seed < 0 or self.seed > 2**32 - 1: raise ValueError( 'seed must be an integer between 0 and 2**32-1,' ' found {}'.format(self.seed)) else: if self.chain_ids is None: raise ValueError( 'seed must not be a list when no chains used') if len(self.seed) != len(self.chain_ids): raise ValueError( 'number of seeds must match number of chains,' ' found {} seed for {} chains '.format( len(self.seed), len(self.chain_ids))) for i in range(len(self.seed)): if self.seed[i] < 0 or self.seed[i] > 2**32 - 1: raise ValueError('seed must be an integer value' ' between 0 and 2**32-1,' ' found {}'.format(self.seed[i])) if isinstance(self.data, str): if not os.path.exists(self.data): raise ValueError('no such file {}'.format(self.data)) elif self.data is None: if isinstance(self.method_args, OptimizeArgs): raise ValueError('data must be set when optimizing') elif not isinstance(self.data, (str, dict)): raise ValueError('data must be string or dict') if self.inits is not None: if isinstance(self.inits, (Integral, Real)): if self.inits < 0: raise ValueError('inits must be > 0, found {}'.format( self.inits)) elif isinstance(self.inits, str): if not os.path.exists(self.inits): raise ValueError('no such file {}'.format(self.inits)) elif isinstance(self.inits, list): if self.chain_ids is None: raise ValueError( 'inits must not be a list when no chains are used') if len(self.inits) != len(self.chain_ids): raise ValueError( 'number of inits files must match number of chains,' ' found {} inits files for {} chains '.format( len(self.inits), len(self.chain_ids))) names_set = set(self.inits) if len(names_set) != len(self.inits): raise ValueError('each chain must have its own init file,' ' found duplicates in inits files list.') for i in range(len(self.inits)): if not os.path.exists(self.inits[i]): raise ValueError('no such file {}'.format( self.inits[i]))
def build_program( self, random_state: RandomState) -> Union[List[Optional], None]: """ 在没有指定program的情况下,随机创建一个naive的program Parameters ---------- random_state: numpy.random.RandomState 随机种子状态 Returns ------- program: List[Optional] 树的内容,元素为函数(_Function)或常数或变量 """ # 初始方法有full, grow, half and half三种方法 if self.init_method == 'half and half': method = ('full' if random_state.randint(2) else 'grow') else: method = self.init_method # 最大深度 max_depth = random_state.randint(*self.init_depth) # 初始化,随机从function_set中选出一个函数 # 将挑选出的函数添加到program中 # 将挑选出的函数的参数个数添加到terminal_stack中 function = random_state.randint(len(self.function_set)) # 随机挑选一个函数结点 function = self.function_set[function] # 随机挑选一个函数 program = [function] # 向program中添加函数 terminal_stack = [function.arity] # 向terminal_stack中添加函数参数个数 # 开始生成树 while terminal_stack: # 树的深度(函数个数=树的深度(指有多少层子结点),因为这里是按照深度优先搜索) depth: int = len(terminal_stack) # choice等于特征数+函数数,表示选择数 choice: int = self.n_features + len(self.function_set) # 随机选择生成一个整数 choice: int = random_state.randint(choice) # 如果树的深度小于最大深度且选择full的方法生成树且choice小于等于可选函数个数 # 在full的情况下优先叠加函数 if (depth < max_depth) and (method == 'full' or choice <= len(self.function_set)): # 随机挑选一个函数 function = random_state.randint(len(self.function_set)) function = self.function_set[function] # 向program中添加函数 program.append(function) # 向terminal_stack中添加函数参数数目 terminal_stack.append(function.arity) else: # 如果包括常数, cost_range代表常数数值范围 if self.const_range is not None: terminal = random_state.randint(self.n_features + 1) # 如果不包含常数 else: terminal = random_state.randint(self.n_features) # 只有randint(self.n_features+1)的情况下才有可能满足条件,此时添加常数 if terminal == self.n_features: # 在cost_range所规定的范围内生成常数 terminal = random_state.uniform(*self.const_range) if self.const_range is None: raise ValueError('A constant was produced with ' 'const_range=None.') # 如果是变量,则terminal为整数;如果是常数,则terminal为常数本身 program.append(terminal) # 所在结点一个子结点完成处理 terminal_stack[-1] -= 1 # 如果一个结点的所有子结点都处理完毕,则删除该结点,同时上一层结点也-1 # 如果所有子结点都处理完毕,则返回program,结束处理 while terminal_stack[-1] == 0: terminal_stack.pop() if not terminal_stack: return program terminal_stack[-1] -= 1 # We should never get here return None
class HierarchicalLDA(object): def __init__(self, corpus, vocab, alpha=10.0, gamma=1.0, eta=0.1, seed=0, verbose=True, num_levels=3): NCRPNode.total_nodes = 0 NCRPNode.last_node_id = 0 self.corpus = corpus self.vocab = vocab self.alpha = alpha # smoothing on doc-topic distributions self.gamma = gamma # "imaginary" customers at the next, as yet unused table self.eta = eta # smoothing on topic-word distributions self.seed = seed self.random_state = RandomState(seed) self.verbose = verbose self.num_levels = num_levels self.num_documents = len(corpus) self.num_types = len(vocab) self.eta_sum = eta * self.num_types # if self.verbose: # for d in range(len(self.corpus)): # doc = self.corpus[d] # words = ' '.join([self.vocab[n] for n in doc]) # print 'doc_%d = %s' % (d, words) # initialise a single path path = np.zeros(self.num_levels, dtype=np.object) # initialize and fill the topic pointer arrays for # every document. Set everything to the single path that # we added earlier. self.root_node = NCRPNode(self.num_levels, self.vocab) self.document_leaves = {} # currently selected path (ie leaf node) through the NCRP tree self.levels = np.zeros(self.num_documents, dtype=np.object) # indexed < doc, token > for d in range(len(self.corpus)): # populate nodes into the path of this document doc = self.corpus[d] doc_len = len(doc) path[0] = self.root_node self.root_node.customers += 1 # always add to the root node first for level in range(1, self.num_levels): # at each level, a node is selected by its parent node based on the CRP prior parent_node = path[level-1] level_node = parent_node.select(self.gamma) level_node.customers += 1 path[level] = level_node # set the leaf node for this document leaf_node = path[self.num_levels-1] self.document_leaves[d] = leaf_node # randomly assign each word in the document to a level (node) along the path self.levels[d] = np.zeros(doc_len, dtype=np.int) for n in range(doc_len): w = doc[n] random_level = self.random_state.randint(self.num_levels) random_node = path[random_level] random_node.word_counts[w] += 1 random_node.total_words += 1 self.levels[d][n] = random_level def estimate(self, num_samples, display_topics=50, n_words=5, with_weights=True): print 'HierarchicalLDA sampling' for s in range(num_samples): sys.stdout.write('.') for d in range(len(self.corpus)): self.sample_path(d) for d in range(len(self.corpus)): self.sample_topics(d) if (s > 0) and ((s+1) % display_topics == 0): print self.print_nodes(n_words, with_weights) def sample_path(self, d): # define a path starting from the leaf node of this doc path = np.zeros(self.num_levels, dtype=np.object) node = self.document_leaves[d] for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4 path[level] = node node = node.parent # remove this document from the path, deleting empty nodes if necessary self.document_leaves[d].drop_path() ############################################################ # calculates the prior p(c_d | c_{-d}) in eq. (4) ############################################################ node_weights = {} self.calculate_ncrp_prior(node_weights, self.root_node, 0.0) ############################################################ # calculates the likelihood p(w_d | c, w_{-d}, z) in eq. (4) ############################################################ level_word_counts = {} for level in range(self.num_levels): level_word_counts[level] = {} doc_levels = self.levels[d] doc = self.corpus[d] # remove doc from path for n in range(len(doc)): # for each word in the doc # count the word at each level level = doc_levels[n] w = doc[n] if w not in level_word_counts[level]: level_word_counts[level][w] = 1 else: level_word_counts[level][w] += 1 # remove word count from the node at that level level_node = path[level] level_node.word_counts[w] -= 1 level_node.total_words -= 1 assert level_node.word_counts[w] >= 0 assert level_node.total_words >= 0 self.calculate_doc_likelihood(node_weights, level_word_counts) ############################################################ # pick a new path ############################################################ nodes = np.array(list(node_weights.keys())) weights = np.array([node_weights[node] for node in nodes]) weights = np.exp(weights - np.max(weights)) # normalise so the largest weight is 1 weights = weights / np.sum(weights) choice = self.random_state.multinomial(1, weights).argmax() node = nodes[choice] # if we picked an internal node, we need to add a new path to the leaf if not node.is_leaf(): node = node.get_new_leaf() # add the doc back to the path node.add_path() # add a customer to the path self.document_leaves[d] = node # store the leaf node for this doc # add the words for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4 word_counts = level_word_counts[level] for w in word_counts: node.word_counts[w] += word_counts[w] node.total_words += word_counts[w] node = node.parent def calculate_ncrp_prior(self, node_weights, node, weight): ''' Calculates the prior on the path according to the nested CRP ''' for child in node.children: child_weight = log( float(child.customers) / (node.customers + self.gamma) ) self.calculate_ncrp_prior(node_weights, child, weight + child_weight) node_weights[node] = weight + log( self.gamma / (node.customers + self.gamma)) def calculate_doc_likelihood(self, node_weights, level_word_counts): # calculate the weight for a new path at a given level new_topic_weights = np.zeros(self.num_levels) for level in range(1, self.num_levels): # skip the root word_counts = level_word_counts[level] total_tokens = 0 for w in word_counts: count = word_counts[w] for i in range(count): # why ????????? new_topic_weights[level] += log((self.eta + i) / (self.eta_sum + total_tokens)) total_tokens += 1 self.calculate_word_likelihood(node_weights, self.root_node, 0.0, level_word_counts, new_topic_weights, 0) def calculate_word_likelihood(self, node_weights, node, weight, level_word_counts, new_topic_weights, level): # first calculate the likelihood of the words at this level, given this topic node_weight = 0.0 word_counts = level_word_counts[level] total_words = 0 for w in word_counts: count = word_counts[w] for i in range(count): # why ????????? node_weight += log( (self.eta + node.word_counts[w] + i) / (self.eta_sum + node.total_words + total_words) ) total_words += 1 # propagate that weight to the child nodes for child in node.children: self.calculate_word_likelihood(node_weights, child, weight + node_weight, level_word_counts, new_topic_weights, level+1) # finally if this is an internal node, add the weight of a new path level += 1 while level < self.num_levels: node_weight += new_topic_weights[level] level += 1 node_weights[node] += node_weight def sample_topics(self, d): doc = self.corpus[d] # initialise level counts doc_levels = self.levels[d] level_counts = np.zeros(self.num_levels, dtype=np.int) for c in doc_levels: level_counts[c] += 1 # get the leaf node and populate the path path = np.zeros(self.num_levels, dtype=np.object) node = self.document_leaves[d] for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4 path[level] = node node = node.parent # sample a new level for each word level_weights = np.zeros(self.num_levels) for n in range(len(doc)): w = doc[n] word_level = doc_levels[n] # remove from model level_counts[word_level] -= 1 node = path[word_level] node.word_counts[w] -= 1 node.total_words -= 1 # pick new level for level in range(self.num_levels): level_weights[level] = (self.alpha + level_counts[level]) * \ (self.eta + path[level].word_counts[w]) / \ (self.eta_sum + path[level].total_words) level_weights = level_weights / np.sum(level_weights) level = self.random_state.multinomial(1, level_weights).argmax() # put the word back into the model doc_levels[n] = level level_counts[level] += 1 node = path[level] node.word_counts[w] += 1 node.total_words += 1 def print_nodes(self, n_words, with_weights): self.print_node(self.root_node, 0, n_words, with_weights) def print_node(self, node, indent, n_words, with_weights): out = ' ' * indent out += 'topic %d (level=%d, total_words=%d, documents=%d): ' % (node.node_id, node.level, node.total_words, node.customers) out += node.get_top_words(n_words, with_weights) print out for child in node.children: self.print_node(child, indent+1, n_words, with_weights)
def analyse_response_repeats_all_trials(repeats_data, anchor_model, neg_model, sess): # generate positive examples of resposnes - responses at same time, different repeats prng = RandomState(50) n_trials = repeats_data['repeats'].shape[0] n_random_times = 10 random_times = prng.randint(0, repeats_data['repeats'].shape[1], n_random_times) responses = repeats_data['repeats'][:, random_times, :].astype(np.float32) responses = np.transpose(responses, [1, 0, 2]) responses = np.reshape( responses, [n_trials * n_random_times, responses.shape[2]]).astype(np.float32) stim_idx = np.repeat(np.arange(n_random_times), n_trials, 0) # embed a sample response to get dimensions feed_dict = { anchor_model.map_cell_grid_tf: repeats_data['map_cell_grid'], anchor_model.cell_types_tf: repeats_data['ctype_1hot'], anchor_model.mean_fr_tf: repeats_data['mean_firing_rate'], anchor_model.responses_tf: np.expand_dims(responses[0:100, :], 2) } resp_test = sess.run(anchor_model.responses_embed, feed_dict=feed_dict) resp_embed = np.zeros( (responses.shape[0], resp_test.shape[1], resp_test.shape[2], 1)) # embed the responses # since we use batch norm in testing, we need to jumble the response to get correct estimate of batch norm statistics tms = np.arange(responses.shape[0]) tms_jumble = np.random.permutation(tms) batch_sz = 100 for itm in np.arange(0, tms_jumble.shape[0], batch_sz): print(itm) feed_dict = { anchor_model.map_cell_grid_tf: repeats_data['map_cell_grid'], anchor_model.cell_types_tf: repeats_data['ctype_1hot'], anchor_model.mean_fr_tf: repeats_data['mean_firing_rate'], anchor_model.responses_tf: np.expand_dims(responses[tms_jumble[itm:itm + batch_sz], :], 2) } resp_embed[tms_jumble[itm:itm + batch_sz], :, :, :] = sess.run( anchor_model.responses_embed, feed_dict=feed_dict) # compute distance between pairs of responses distances = np.zeros((responses.shape[0], responses.shape[0])) distances_euclidean = np.zeros((responses.shape[0], responses.shape[0])) batch_dist = np.int(100) for iresp in np.arange(0, distances.shape[0], batch_dist): print(iresp) for jresp in np.arange(0, distances.shape[1], batch_dist): r1 = np.expand_dims(resp_embed[iresp:iresp + batch_dist], 1) r2 = np.expand_dims(resp_embed[jresp:jresp + batch_dist], 0) distances[iresp:iresp + batch_dist, jresp:jresp + batch_dist] = np.sum((r1 - r2)**2, (2, 3, 4)) rr1 = np.expand_dims(responses[iresp:iresp + batch_dist], 1) rr2 = np.expand_dims(responses[jresp:jresp + batch_dist], 0) distances_euclidean[iresp:iresp + batch_dist, jresp:jresp + batch_dist] = np.sum( (rr1 - rr2)**2, 2) test_clustering = { 'distances': distances, 'responses': responses, 'stim_idx': stim_idx, 'resp_embed': resp_embed, 'random_times': random_times, 'distances_euclidean': distances_euclidean } return test_clustering
class MicrobeFactory: def __init__(self, user_id, game_type, store, microbe_types, n_in_epoch=6, n_in_epoch_promo=10, epoch_period=5000, second_epoch_period=3000, n_in_epoch_mobile=4, n_in_epoch_promo_mobile=8, epoch_period_mobile=1000, second_epoch_period_mobile=1000, ): self._user_id = user_id self._game_type = game_type self._store = store assert self._game_type is not None self._seed = int(time.time()) self._rnd = RandomState(self._seed) self._microbes = [] self._n_in_epoch = n_in_epoch self._n_in_epoch_promo = n_in_epoch_promo self._epoch_period = epoch_period self._second_epoch_period = second_epoch_period self._n_in_epoch_mobile = n_in_epoch_mobile self._n_in_epoch_promo_mobile = n_in_epoch_promo_mobile self._epoch_period_mobile = epoch_period_mobile self._second_epoch_period_mobile = second_epoch_period_mobile self.microbe_types = microbe_types self._epoch = 0 self._last_epoch_time = None assert self.microbe_types is not None assert len(self.microbe_types) > 0 for m in self.microbe_types: assert m['type'] > 0 assert m['width'] > 0 assert m['height'] > 0 top_bar_size = self.game_cfg['top_bar_size'] self.cell_width = self.game_cfg['cell_width'] self.cell_height = self.game_cfg['cell_height'] self.x_max = 1 self.x_min = -1 self.y_max = 1 - top_bar_size self.y_min = -1 + self.cell_height / 2 self.cells_x = math.floor((self.x_max - self.x_min) / self.cell_width) self.cells_y = math.floor((self.y_max - self.y_min) / self.cell_height) @property def n_in_epoch(self): return self._n_in_epoch_mobile if \ self.is_mobile() else self._n_in_epoch @property def n_in_epoch_promo(self): return self._n_in_epoch_promo_mobile if \ self.is_mobile() else self._n_in_epoch_promo @property def epoch_period(self): return self._epoch_period_mobile if \ self.is_mobile() else self._epoch_period @property def second_epoch_period(self): return self._second_epoch_period_mobile if \ self.is_mobile() else self._second_epoch_period @property def store(self): return self._store @property def game_cfg(self): return settings.config['game'] @property def user_id(self): return self._user_id @property def microbes(self): return self._microbes @property def epoch(self): return self._epoch def dump_microbes(self, microbes=None): if microbes is None: microbes = self.microbes return [m.to_dict() for m in microbes] def gen_microbes(self, has_promo): self._epoch += 1 self._last_epoch_time = datetime.datetime.now() n = self.n_in_epoch if has_promo: n = self.n_in_epoch_promo new_microbes = [] for _ in range(0, n): microbe_info_i = self._rnd.randint(0, len(self.microbe_types)) m = Microbe(self, self._epoch, self.microbe_types[microbe_info_i], self._rnd) m.set_position(self.microbes) self.microbes.append(m) new_microbes.append(m) return new_microbes def get_alive(self): return list(filter(lambda item: item.is_alive, self.microbes)) def shoot(self, x, y, has_promo, radius=None): killed = [] closest = list(sorted( self.get_alive(), key=lambda item: numpy.math.hypot(x - item.x, y - item.y) )) if has_promo: microbes = closest[:4] else: microbes = closest[:1] if len(microbes) > 0 and microbes[0].is_hit(x, y, radius): for m in microbes: if m.damage() <= 0: killed.append(m.id) self._microbes = self.get_alive() score = len(killed) # simple for just now return score, killed def check_world(self, game_started_at, current_time, has_promo): if self._last_epoch_time is None: self._last_epoch_time = game_started_at delta = (current_time - self._last_epoch_time).total_seconds() * MS epoch_to_delete = self._epoch - 1 if self.is_mobile(): epoch_to_delete = self._epoch if self._epoch >= 2 or self.is_mobile(): # remove & generate if delta >= self.epoch_period: # removing epoch removed_microbes = [] for m in self.microbes: if m.epoch == epoch_to_delete: # remove previous epoch m.kill() removed_microbes.append(m.id) # creating new epoch self._microbes = self.get_alive() new_microbes = self.gen_microbes(has_promo) return self.dump_microbes(new_microbes), removed_microbes else: # just add new epoch if delta >= self.second_epoch_period: # creating new epoch self.gen_microbes(has_promo) return self.dump_microbes(), [] return None def is_mobile(self): return self._game_type == Game.Type.mobile
n_variables = 15 seeds = list(range(101, 200)) rng = RandomState(19023) variables = list(range(n_variables)) n_samples = 200 # Data generation parameters gen_mean = np.zeros(n_variables) gen_var = np.zeros(n_variables) + 0.2 gen_weight = 2 # Generate some data form a GN graph = random_dag(variables, rng=rng) beta = graph.A.T * gen_weight sample_seed = rng.randint(0, 2**32 - 1) data_gn = sample_from_gn(graph, gen_mean, gen_var, beta, n_samples, sample_seed) # Fit the score and create the parent set distributions fan_in = 5 bge = BGe(data_gn) ps_dist = get_parent_set_distributions(variables, fan_in, bge, rng=rng) # Some random state to start state = DAGState(random_dag(variables, fan_in, rng=rng)) # Check consistency of first state check_consistency(state)
def cross_validate(seed_dir, language, n_samples, n_splits, evl_size, max_epochs, batch_size, max_generalization_loss, min_coverage, hyperparams, seed=None, verbose=False, dashboard_port=6006): """Perform cross-validation on the The process is designed to be able to continue with minimal additional effort after a crash. It can therefore be stopped and taken up again later. Tensorboard is served during each training run. :param str seed_dir: Where to store model data for this seed. If cross-validation is performed for multiple seeds, multiple seed directories are needed. :param yokome.language.Language language: The language to train on. :param int n_samples: The number of sample sentences to load. :param int n_splits: The number ``k`` of folds. :param float evl_size: The portion of evaluation samples w.r.t. the non-validation part of all samples. :param int max_epochs: The maximum number of epochs to train for. The actual number of epochs may be less if the training process stops early. :param int batch_size: The number of sentences to estimate the probability for in parallel. :param float max_generalization_loss: The maximum generalization loss at which the training process is still continued. :param min_coverage: The portion of the corpus that has to be covered by the minimal vocabulary of the most frequent words that is used to encode incoming data. :param hyperparams: The model parameters used in this pass of cross-validation. :param int seed: The seed used for the pseudo-random number generator that generates the seeds for the models to be trained. :param bool verbose: Whether to print progress indiation. :param int dashboard_port: On which port to serve Tensorboard. :return: The average loss over all folds. """ total_loss = 0 r = RandomState(seed) for i, (trn, evl, vld) in enumerate(kfold(language, n_samples, n_splits, evl_size), start=1): fold_dir = seed_dir + ('/fold_%d' % (i, )) if verbose: print(' Fold %d...' % (i, )) try: with open(fold_dir + '/report.json', 'r') as f: total_loss += json.load(f)['loss'] except OSError: pass else: r.randint(0x100000000) r.randint(0x100000000) continue vocabulary = generate_vocabulary_from(language, trn + evl, min_coverage) model_seed = r.randint(0x100000000) os.makedirs(fold_dir, exist_ok=True) model = LanguageModel(fold_dir, params=hyperparams, seed=model_seed, production_mode=False, language=language, vocabulary=vocabulary) tensorboard = Popen([ 'tensorboard', '--logdir', model.training_dir(), '--port', str(dashboard_port) ], stdout=DEVNULL, stderr=DEVNULL) model.train(trn, evl, max_epochs, batch_size, max_generalization_loss=max_generalization_loss, shuffle=True, random_state=r.randint(0x100000000), verbose=False) # Load the best model model = LanguageModel(fold_dir, params=hyperparams, seed=model_seed, production_mode=True, language=language, vocabulary=None) loss = float(model.validate(vld, batch_size)['loss']) with open(fold_dir + '/.tmp.report.json', 'w') as f: json.dump({'loss': loss}, f) os.replace(fold_dir + '/.tmp.report.json', fold_dir + '/report.json') total_loss += loss tensorboard.terminate() tensorboard.wait() return total_loss / n_splits
class Simulation: num_random_pairings: int = 0 # N==0 -> (agents are evolved in pairs: a genotype contains a pair of agents) # N>0 -> each agent will go though a simulation with N other agents (randomly chosen) entropy_type: str = 'shannon-dd' # 'shannon-1d', 'shannon-dd', 'transfer', 'sample' entropy_target_value: str = 'neural' # 'neural', 'distance', 'angle' concatenate: bool = True # whether to concatenate values in entropy_target_value isolation: bool = False # whether to run simulation on a single agent (as if second agent does not exits) genotype_structure: Dict = field(default_factory=lambda:gen_structure.DEFAULT_GEN_STRUCTURE(2)) num_brain_neurons: int = None # initialized in __post_init__ collision_type: str = 'overlapping' # 'none', 'overlapping', 'edge' agent_body_radius: int = 4 agents_pair_initial_distance: int = 20 agent_sensors_divergence_angle: float = np.radians(45) # angle between sensors and axes of symmetry brain_step_size: float = 0.1 num_trials: int = 4 # hard coded trial_duration: int = 200 num_cores: int = 1 data_noise_level: float = 1e-8 timeit: bool = False def __post_init__(self): self.num_brain_neurons = gen_structure.get_num_brain_neurons(self.genotype_structure) self.num_data_points = int(self.trial_duration / self.brain_step_size) self.init_agents_pair() self.set_initial_positions_angles() if self.isolation: # if we run agents in isolation we want to ignore collisions self.collision_type = 'none' self.timing = Timing(self.timeit) self.__check_params__() def __check_params__(self): assert self.num_random_pairings >= 0, \ "Number of pairing must be >= 0 (0 if a genotype already contains a pair of agents)" assert_string_in_values(self.collision_type, 'collision_type', ['none', 'overlapping', 'edge']) assert_string_in_values(self.entropy_type, 'entropy_type', ['shannon-1d', 'shannon-dd', 'transfer', 'sample']) assert_string_in_values(self.entropy_target_value, 'entropy_target_value', ['neural', 'distance', 'angle']) if self.entropy_type in ['shannon-1d', 'shannon-dd']: accepted_entropy_target_values = ['neural', 'distance', 'angle'] assert self.entropy_target_value in accepted_entropy_target_values, \ "Shannon entropy currently works only when entropy_target_value in {}".format(accepted_entropy_target_values) if self.entropy_type == 'transfer': assert self.entropy_target_value == 'neural' and self.num_brain_neurons == 2, \ 'Transfer entropy currently works only on two dimensional data (i.e., 2 neural outputs per agent)' if self.entropy_target_value == 'angle': assert self.entropy_type in ['shannon-1d','sample'], \ "entropy on angle works only for entropy_type in ['shannon-1d','sample']" def init_agents_pair(self): self.agents_pair_net = [] self.agents_pair_body = [] for _ in range(2): self.agents_pair_net.append( AgentNetwork( self.num_brain_neurons, self.brain_step_size, self.genotype_structure, ) ) self.agents_pair_body.append( AgentBody( self.agent_body_radius, self.agent_sensors_divergence_angle, collision_type=self.collision_type, timeit = self.timeit ) ) def set_initial_positions_angles(self, random_state=None): if random_state: self.agents_pair_start_angle_trials = pi * random_state.uniform(0, 2, (self.num_trials,2)) else: # first agent always points right # second agent at points right, up, left, down in each trial respectively self.agents_pair_start_angle_trials = [ [0., 0.], [0., pi/2], [0., pi], [0., 3*pi/2], ] # first agent positioned at (0,0) # second agent 20 units away from first, along its facing direction # (right, up, left, down) if not random self.agents_pair_start_pos_trials = [ [ np.array([0.,0.]), self.agents_pair_initial_distance * \ np.array( [ np.cos(self.agents_pair_start_angle_trials[i][1]), np.sin(self.agents_pair_start_angle_trials[i][1]) ] ) ] for i in range(self.num_trials) ] if random_state: # reinitialized the angle because it was used for positioning # we don't want the second agent to necessarily face outwards self.agents_pair_start_angle_trials = pi * random_state.uniform(0, 2, (self.num_trials,2)) def save_to_file(self, file_path): with open(file_path, 'w') as f_out: obj_dict = asdict(self) json.dump(obj_dict, f_out, indent=3, cls=NumpyListJsonEncoder) @staticmethod def load_from_file(file_path, **kwargs): with open(file_path) as f_in: obj_dict = json.load(f_in) if kwargs: obj_dict.update(kwargs) sim = Simulation(**obj_dict) gen_structure.check_genotype_structure(sim.genotype_structure) return sim def set_agents_genotype_phenotype(self): ''' Split genotype and set phenotype of the two agents :param np.ndarray genotypes_pair: sequence with two genotypes (one after the other) ''' phenotypes = [None,None] if self.num_random_pairings == 0: genotypes_pair = self.genotype_population[self.genotype_index] genotypes_split = np.array_split(genotypes_pair, 2) else: genotypes_split = [ self.genotype_population[self.genotype_index], self.genotype_population[self.rand_agent_indexes[self.sim_index]], ] if self.data_record is not None: self.data_record['genotypes'] = genotypes_split phenotypes = [{},{}] self.data_record['phenotypes'] = phenotypes for a in range(2): self.agents_pair_net[a].genotype_to_phenotype( genotypes_split[a], phenotype_dict=phenotypes[a]) def init_values_for_computing_entropy(self): if self.entropy_target_value == 'neural': # initialize agents brain output of all trial for computing entropy # list of list (4 trials x 2 agents) each containing array (num_data_points,num_brain_neurons) self.values_for_computing_entropy = [ [ np.zeros((self.num_data_points, self.num_brain_neurons)) for _ in range(2) ] for _ in range(self.num_trials) ] elif self.entropy_target_value == 'distance': # distance (1-d data) per trial # entropy is computed based on distances # 4 list (one per trial) with the agent distances self.values_for_computing_entropy = [ np.zeros((self.num_data_points,1)) for _ in range(self.num_trials) ] else: # angle: (1-d data) per trial per agent assert self.entropy_target_value == 'angle' self.values_for_computing_entropy = [ [ np.zeros((self.num_data_points,1)) for _ in range(2) ] for _ in range(self.num_trials) ] def init_data_record(self): if self.data_record is None: return self.data_record['position'] = [[None,None] for _ in range(self.num_trials)] self.data_record['distance'] = [None for _ in range(self.num_trials)] self.data_record['angle'] = [[None,None] for _ in range(self.num_trials)] self.data_record['collision'] = [[None,None] for _ in range(self.num_trials)] self.data_record['delta_xy'] = [[None,None] for _ in range(self.num_trials)] self.data_record['signal_strength'] = [[None,None] for _ in range(self.num_trials)] self.data_record['brain_input'] = [[None,None] for _ in range(self.num_trials)] self.data_record['brain_state'] = [[None,None] for _ in range(self.num_trials)] self.data_record['derivatives'] = [[None,None] for _ in range(self.num_trials)] self.data_record['brain_output'] = [[None,None] for _ in range(self.num_trials)] self.data_record['wheels'] = [[None,None] for _ in range(self.num_trials)] self.data_record['emitter'] = [[None,None] for _ in range(self.num_trials)] self.timing.add_time('SIM_init_data', self.tim) def init_data_record_trial(self, t): if self.data_record is None: return self.data_record['distance'][t] = np.zeros(self.num_data_points) for a in range(2): if self.ghost_index == a: # copy all ghost agent's values from original_data_record if t == 0: for k in self.data_record: self.data_record[k] = deepcopy(self.original_data_record[k]) else: self.data_record['position'][t][a] = np.zeros((self.num_data_points, 2)) self.data_record['angle'][t][a] = np.zeros(self.num_data_points) self.data_record['collision'][t][a] = np.zeros(self.num_data_points) self.data_record['delta_xy'][t][a] = np.zeros((self.num_data_points, 2)) self.data_record['signal_strength'][t][a] = np.zeros((self.num_data_points, 2)) self.data_record['brain_input'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons)) self.data_record['brain_state'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons)) self.data_record['derivatives'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons)) self.data_record['brain_output'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons)) self.data_record['wheels'][t][a] = np.zeros((self.num_data_points, 2)) self.data_record['emitter'][t][a] = np.zeros(self.num_data_points) self.timing.add_time('SIM_init_trial_data', self.tim) def save_data_record(self, t, i): if self.data_record is None: return self.data_record['distance'][t][i] = self.get_agents_distance() for a in range(2): if self.ghost_index == a: continue # do not save data for ghost: already saved in init_data_trial agent_net = self.agents_pair_net[a] agent_body = self.agents_pair_body[a] self.data_record['position'][t][a][i] = agent_body.position self.data_record['angle'][t][a][i] = agent_body.angle self.data_record['collision'][t][a][i] = 1 if agent_body.flag_collision else 0 self.data_record['delta_xy'][t][a][i] = self.prev_delta_xy_agents[a] self.data_record['wheels'][t][a][i] = agent_body.wheels self.data_record['emitter'][t][a][i] = self.emitter_agents[a] self.data_record['signal_strength'][t][a][i] = self.signal_strength_agents[a] self.data_record['brain_input'][t][a][i] = agent_net.brain.input self.data_record['brain_state'][t][a][i] = agent_net.brain.states self.data_record['derivatives'][t][a][i] = agent_net.brain.dy_dt self.data_record['brain_output'][t][a][i] = agent_net.brain.output self.timing.add_time('SIM_save_data', self.tim) def compute_signal_strength_agents(self): for a in [x for x in range(2) if x != self.ghost_index]: if self.isolation and a==1: self.signal_strength_agents[a] = 0 else: b = 1 - a # signal_strength = np.array([0.,0.]) # if we want to mimic zero signal strength self.signal_strength_agents[a] = self.agents_pair_body[a].get_signal_strength( self.agents_pair_body[b].position, self.emitter_agents[b] ) self.timing.add_time('SIM_get_signal_strength', self.tim) def update_wheels_emitter_agents(self, t,i): for a in range(2): if a == self.ghost_index: self.emitter_agents[a] = self.original_data_record['emitter'][t][a][i] if self.isolation and a==1: self.emitter_agents[a] = 0 else: motor_outputs = self.agents_pair_net[a].compute_motor_outputs() self.agents_pair_body[a].wheels = np.take(motor_outputs, [0,2]) # index 0,2: MOTORS self.emitter_agents[a] = motor_outputs[1] # index 1: EMITTER self.timing.add_time('SIM_compute_motors_emitter', self.tim) def get_agents_distance(self): return self.agents_pair_body[0].get_dist_centers(self.agents_pair_body[1].position) def store_values_for_entropy(self, t,i): if self.entropy_target_value == 'neural': #neural outputs for a in [x for x in range(2) if x != self.ghost_index]: self.values_for_computing_entropy[t][a][i] = self.agents_pair_net[a].brain.output elif self.entropy_target_value == 'angle': # angle for a in [x for x in range(2) if x != self.ghost_index]: self.values_for_computing_entropy[t][a][i] = self.agents_pair_body[a].angle else: # distance self.values_for_computing_entropy[t][i] = self.get_agents_distance() def prepare_agents_for_trial(self, t): for a in range(2): agent_net = self.agents_pair_net[a] agent_body = self.agents_pair_body[a] # reset params that are due to change during the experiment agent_body.init_params( wheels = np.zeros(2), flag_collision = False ) # set initial states to zeros agent_net.init_params( brain_states = np.zeros(self.num_brain_neurons), ) agent_pos = np.copy(self.agents_pair_start_pos_trials[t][a]) agent_angle = self.agents_pair_start_angle_trials[t][a] agent_body.set_position_and_angle(agent_pos, agent_angle) # compute output agent_net.brain.compute_output() # compute motor outpus self.update_wheels_emitter_agents(t, 0) # compute signal streng self.store_values_for_entropy(t,0) # self.timing.add_time('SIM_prepare_agents_for_trials', self.tim) def compute_brain_input_agents(self): for a in [x for x in range(2) if x != self.ghost_index]: if self.isolation and a==1: continue self.agents_pair_net[a].compute_brain_input(self.signal_strength_agents[a]) self.timing.add_time('SIM_compute_brain_input', self.tim) def compute_brain_euler_step_agents(self): for a in [x for x in range(2) if x != self.ghost_index]: if self.isolation and a==1: continue self.agents_pair_net[a].brain.euler_step() # this sets agent.brain.output (2-dim vector) self.timing.add_time('SIM_euler_step', self.tim) def move_one_step_agents(self, t, i): delta_xy_agents = [None, None] angle_agents = [None, None] for a in range(2): if self.ghost_index == a: # for ghost agent we need to retrieve position, delta_xy, and angle from data self.agents_pair_body[a].position = self.original_data_record['position'][t][a][i] delta_xy_agents[a] = self.original_data_record['delta_xy'][t][a][i] angle_agents[a] = self.original_data_record['angle'][t][a][i] else: # TODO: check if the agents didn't go too far from one another b = 1 - a delta_xy_agents[a], angle_agents[a] = self.agents_pair_body[a].move_one_step( self.prev_delta_xy_agents[b], self.prev_angle_agents[b] ) self.prev_delta_xy_agents = delta_xy_agents self.prev_angle_agents = angle_agents self.timing.add_time('SIM_move_one_step', self.tim) def compute_performance(self, t): performance_agent_AB = [] if self.entropy_type=='transfer': # it only applies to neural_outputs (with 2 neurons) # add random noise to data before calculating transfer entropy for a in range(2): if self.ghost_index == a: continue if self.isolation and a==1: continue if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy[t][a] for t in range(self.num_trials) ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t][a] all_values_for_computing_entropy = utils.add_noise( all_values_for_computing_entropy, self.random_state, noise_level=self.data_noise_level ) # calculate performance # TODO: understand what happens if reciprocal=False performance_agent_AB.append( get_transfer_entropy(all_values_for_computing_entropy, binning=True) ) elif self.entropy_type in ['shannon-1d', 'shannon-dd']: # shannon-1d, shannon-dd if self.entropy_target_value == 'distance': if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t] min_v, max_v= 0., 100. performance_agent_AB = [ get_shannon_entropy_dd_simplified( all_values_for_computing_entropy, min_v, max_v) ] if self.entropy_target_value == 'angle': # angle (apply modulo angle of 2*pi) # min_v, max_v= 0., 2*np.pi min_v, max_v= -np.pi/4, np.pi/4 for a in range(2): if self.ghost_index == a: continue if self.isolation and a==1: continue if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy[t][a] for t in range(self.num_trials) ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t][a] # all_values_for_computing_entropy = all_values_for_computing_entropy % 2*np.pi all_values_for_computing_entropy = all_values_for_computing_entropy.flatten() all_values_for_computing_entropy = np.diff(all_values_for_computing_entropy) performance_agent_AB.append( get_shannon_entropy_1d(all_values_for_computing_entropy, min_v, max_v) ) else: # neural min_v, max_v= 0., 1. for a in range(2): if self.ghost_index == a: continue if self.isolation and a==1: continue if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy[t][a] for t in range(self.num_trials) ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t][a] if self.entropy_type == 'shannon-dd': performance_agent_AB.append( get_shannon_entropy_dd_simplified(all_values_for_computing_entropy, min_v, max_v) ) else: # shannon-1d for c in range(self.num_brain_neurons): column_values = all_values_for_computing_entropy[:,c] performance_agent_AB.append( get_shannon_entropy_1d(column_values, min_v, max_v) ) else: # sample entropy # only applies to 1d data if self.entropy_target_value == 'neural': for a in range(2): if self.ghost_index == a: continue if self.isolation and a==1: continue if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy[t][a] for t in range(self.num_trials) ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t][a] for c in range(self.num_brain_neurons): column_values = all_values_for_computing_entropy[:,c] mean = column_values.mean() std = column_values.std() normalize_values = (column_values - mean) / std performance_agent_AB.append( _numba_sampen(normalize_values, order=2, r=(0.2 * DEFAULT_SAMPLE_ENTROPY_NEURAL_STD)) ) elif self.entropy_target_value == 'distance': if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t] mean = all_values_for_computing_entropy.mean() std = all_values_for_computing_entropy.std() normalize_values = (all_values_for_computing_entropy - mean) / std performance_agent_AB = [ _numba_sampen(normalize_values.flatten(), order=2, r=(0.2 * DEFAULT_SAMPLE_ENTROPY_DISTANCE_STD)) ] else: assert self.entropy_target_value == 'angle' for a in range(2): if self.ghost_index == a: continue if self.isolation and a==1: continue if self.concatenate: all_values_for_computing_entropy = np.concatenate([ self.values_for_computing_entropy[t][a] for t in range(self.num_trials) ]) else: all_values_for_computing_entropy = self.values_for_computing_entropy[t][a] all_values_for_computing_entropy = np.diff(all_values_for_computing_entropy) mean = all_values_for_computing_entropy.mean() std = all_values_for_computing_entropy.std() normalize_values = (all_values_for_computing_entropy - mean) / std performance_agent_AB.append( _numba_sampen(normalize_values.flatten(), order=2, r=(0.2 * DEFAULT_SAMPLE_ENTROPY_ANGLE_STD)) ) return performance_agent_AB ################# # MAIN FUNCTION ################# def run_simulation(self, genotype_population=None, genotype_index=None, rnd_seed=0, data_record_list=None, ghost_index=None, original_data_record_list=None): ''' Main function to compute shannon/transfer/sample entropy performace ''' self.tim = self.timing.init_tictoc() self.genotype_population = genotype_population self.genotype_index = genotype_index self.random_state = RandomState(rnd_seed) self.rand_agent_indexes = [] self.ghost_index = ghost_index # fill rand_agent_indexes with n indexes i while len(self.rand_agent_indexes) != self.num_random_pairings: next_rand_index = self.random_state.randint(len(self.genotype_population)) if next_rand_index != self.genotype_index: self.rand_agent_indexes.append(next_rand_index) num_simulations = max(1, self.num_random_pairings) sim_performances = [] for self.sim_index in range(num_simulations): self.data_record = None if data_record_list is not None: self.data_record = {} data_record_list.append(self.data_record) self.original_data_record = None if original_data_record_list is None else original_data_record_list[self.sim_index] self.values_for_computing_entropy = [] # initialized in init_values_for_computing_entropy if self.genotype_population is not None: self.set_agents_genotype_phenotype() self.timing.add_time('SIM_init_agent_phenotypes', self.tim) trial_performances = [] self.signal_strength_agents = [None, None] self.emitter_agents = [None, None] self.prev_delta_xy_agents, self.prev_angle_agents = None, None # pylint: disable=W0612 self.init_values_for_computing_entropy() # INITIALIZE DATA RECORD self.init_data_record() # EXPERIMENT START for t in range(self.num_trials): # SETUP AGENTS FOR TRIAL self.prepare_agents_for_trial(t) # initialize prev_delta_xy with zeros (zero dispacement) self.prev_delta_xy_agents = [np.array([0.,0.]), np.array([0.,0.])] # initialize prev_angle as initial angle of each agent self.prev_angle_agents = [self.agents_pair_body[a].angle for a in range(2)] # INIT DATA for TRIAL self.init_data_record_trial(t) self.save_data_record(t, 0) # TRIAL START for i in range(1, self.num_data_points): # 1) Agent senses strength of emitter from the two sensors self.compute_signal_strength_agents() # deletece dist_centers # 2) compute brain input self.compute_brain_input_agents() # 3) Update agent's neural system self.compute_brain_euler_step_agents() # 4) Agent updates wheels and emitter self.update_wheels_emitter_agents(t,i) # 5) Move one step agents self.move_one_step_agents(t, i) # 6) Store the values for computing entropy self.store_values_for_entropy(t,i) # deletece dist_centers self.save_data_record(t, i) # TRIAL END if self.concatenate and t!=self.num_trials-1: # do not compute performance until the last trial continue performance_agent_AB = self.compute_performance(t) if self.num_random_pairings==0: # when agent sare evolved in pairs the # performance is the mean between the two agents agents_perf = np.mean(performance_agent_AB) else: # otherwise it's the performance of the first agent agents_perf = np.mean(performance_agent_AB[0]) # appending mean performance between two agents in trial_performances trial_performances.append(agents_perf) self.timing.add_time('SIM_compute_performace', self.tim) # SIMULATION END # returning mean performances between all trials sim_perf = np.mean(trial_performances) sim_performances.append(sim_perf) if self.data_record: self.data_record['summary'] = { 'rand_agent_indexes': self.rand_agent_indexes, 'performance_trials': trial_performances, 'performance_sim': sim_perf } return np.mean(sim_performances) ''' POPULATION EVALUATION FUNCTION ''' def evaluate(self, population, random_seeds): population_size = len(population) assert population_size == len(random_seeds) if self.num_cores > 1: # run parallel job sim_array = [Simulation(**asdict(self)) for _ in range(self.num_cores)] performances = Parallel(n_jobs=self.num_cores)( # prefer="threads" does not work delayed(sim_array[i%self.num_cores].run_simulation)(population, i, rnd_seed) \ for i, (_, rnd_seed) in enumerate(zip(population, random_seeds)) ) else: # single core performances = [ self.run_simulation(population, i, rnd_seed) for i, (_, rnd_seed) in enumerate(zip(population, random_seeds)) ] return performances
class TestAnalyzer: def setUp(self): self.prng = RandomState(133) self.df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2], 'f1': self.prng.normal(0, 1, 10), 'f2': self.prng.normal(1, 0.1, 10), 'f3': self.prng.normal(2, 0.1, 10), 'group': ['group1'] * 10}, index=range(0, 10)) self.df_features_same_score = self.df_features.copy() self.df_features_same_score[['sc1']] = [3] * 10 self.df_features_with_groups = self.df_features.copy() self.df_features_with_groups['group'] = ['group1']*5 + ['group2']*5 self.df_features_with_groups_and_length = self.df_features_with_groups.copy() self.df_features_with_groups_and_length['length'] = self.prng.normal(50, 250, 10) self.human_scores = pd.Series(self.prng.randint(1, 5, size=10)) self.system_scores = pd.Series(self.prng.random_sample(10) * 5) self.same_human_scores = pd.Series([3] * 10) # get the directory containing the tests self.test_dir = dirname(__file__) def test_correlation_helper(self): # test that there are no nans for data frame with 10 values retval = Analyzer.correlation_helper(self.df_features, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 0) def test_correlation_helper_for_data_with_one_row(self): # this should return two data frames with nans retval = Analyzer.correlation_helper(self.df_features[:1], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_correlation_helper_for_data_with_two_rows(self): # this should return 1/-1 for marginal correlations and nans for # partial correlations retval = Analyzer.correlation_helper(self.df_features[:2], 'sc1', 'group') assert_equal(abs(retval[0].values).sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_correlation_helper_for_data_with_three_rows(self): # this should compute marginal correlations but return Nans for # partial correlations retval = Analyzer.correlation_helper(self.df_features[:3], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 3) def test_correlation_helper_for_data_with_four_rows(self): # this should compute marginal correlations and return a unity # matrix for partial correlations # it should also raise a UserWarning with warnings.catch_warnings(record=True) as warning_list: retval = Analyzer.correlation_helper(self.df_features[:4], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_almost_equal(np.abs(retval[1].values).sum(), 0.9244288637889855) assert issubclass(warning_list[-1].category, UserWarning) def test_correlation_helper_for_data_with_groups(self): retval = Analyzer.correlation_helper(self.df_features_with_groups, 'sc1', 'group') assert_equal(len(retval[0]), 2) assert_equal(len(retval[1]), 2) def test_correlation_helper_for_one_group_with_one_row(self): # this should return a data frames with nans for group with 1 row retval = Analyzer.correlation_helper(self.df_features_with_groups[:6], 'sc1', 'group') assert_equal(len(retval[0]), 2) assert_equal(len(retval[1]), 2) assert_equal(retval[0].isnull().values.sum(), 3) def test_correlation_helper_for_groups_and_length(self): retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length, 'sc1', 'group', include_length=True) for df in retval: assert_equal(len(df), 2) assert_equal(len(df.columns), 3) def test_correlation_helper_for_group_with_one_row_and_length(self): # this should return a data frames with nans for group with 1 row retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length[:6], 'sc1', 'group', include_length=True) for df in retval: assert_equal(len(df), 2) assert_equal(len(df.columns), 3) def test_that_correlation_helper_works_for_data_with_the_same_human_score(self): # this test should raise UserWarning because the determinant is very close to # zero. It also raises Runtime warning because # variance of human scores is 0. with warnings.catch_warnings(record=True) as warning_list: warnings.filterwarnings('ignore', category=RuntimeWarning) retval = Analyzer.correlation_helper(self.df_features_same_score, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) assert issubclass(warning_list[-1].category, UserWarning) def test_that_metrics_helper_works_for_data_with_one_row(self): # There should be NaNs for SMD, correlations and both sds # note that we will get a value for QWK since we are # dividing by N and not N-1 with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) evals = Analyzer.metrics_helper(self.human_scores[0:1], self.system_scores[0:1]) assert_equal(evals.isnull().values.sum(), 5) def test_that_metrics_helper_works_for_data_with_the_same_label(self): # There should be NaNs for correlation and SMD. # Note that for a dataset with a single response # kappas will be 0 or 1 with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) evals = Analyzer.metrics_helper(self.same_human_scores, self.system_scores) assert_equal(evals.isnull().values.sum(), 2) def test_metrics_helper_population_sds(self): df_new_features = pd.read_csv(join(self.test_dir, 'data', 'files', 'train.csv')) # compute the metrics when not specifying the population SDs computed_metrics1 = Analyzer.metrics_helper(df_new_features['score'], df_new_features['score2']) expected_metrics1 = pd.Series({'N': 500.0, 'R2': 0.65340566606389394, 'RMSE': 0.47958315233127197, 'SMD': 0.03679030063229779, 'adj_agr': 100.0, 'corr': 0.82789026370069529, 'exact_agr': 77.0, 'h_max': 6.0, 'h_mean': 3.4199999999999999, 'h_min': 1.0, 'h_sd': 0.81543231461565147, 'kappa': 0.6273493195074531, 'sys_max': 6.0, 'sys_mean': 3.4500000000000002, 'sys_min': 1.0, 'sys_sd': 0.81782496620652367, 'wtkappa': 0.8273273273273274}) # and now compute them specifying the population SDs computed_metrics2 = Analyzer.metrics_helper(df_new_features['score'], df_new_features['score2'], population_human_score_sd=0.5, population_system_score_sd=0.4, smd_method='williamson') # the only number that should change is the SMD expected_metrics2 = expected_metrics1.copy() expected_metrics2['SMD'] = 0.066259 assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index()) assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index()) def test_metrics_helper_zero_system_sd(self): human_scores = [1, 3, 4, 2, 3, 1, 3, 4, 2, 1] system_score = [2.54] * 10 computed_metrics1 = Analyzer.metrics_helper(human_scores, system_score) expected_metrics1 = pd.Series({'N': 10, 'R2': -0.015806451612903283, 'RMSE': 1.122319027727856, 'SMD': 0.11927198519188371, 'adj_agr': 50.0, 'corr': None, 'exact_agr': 0, 'h_max': 4, 'h_mean': 2.4, 'h_min': 1.0, 'h_sd': 1.1737877907772674, 'kappa': 0, 'sys_max': 2.54, 'sys_mean': 2.54, 'sys_min': 2.54, 'sys_sd': 0, 'wtkappa': 0}) # now compute DSM computed_metrics2 = Analyzer.metrics_helper(human_scores, system_score, use_diff_std_means=True) # the only number that should change is the SMD expected_metrics2 = expected_metrics1.copy() expected_metrics2.drop("SMD", inplace=True) expected_metrics2['DSM'] = None assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index(), check_dtype=False) assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index(), check_dtype=False) def test_compute_pca_less_samples_than_features(self): # test pca when we have less samples than # features. In this case the number of components # equals to the number of samples. df = pd.DataFrame({'a': range(50)}) for i in range(100): df[i] = df['a'] * i (components, variance) = Analyzer.compute_pca(df, df.columns) assert_equal(len(components.columns), 50) assert_equal(len(variance.columns), 50) def test_compute_disattenuated_correlations_single_human(self): hm_corr = pd.Series([0.9, 0.8, 0.6], index=['raw', 'raw_trim', 'raw_trim_round']) hh_corr = pd.Series([0.81], index=['']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_equal(df_dis_corr.loc['raw', 'corr_disattenuated'], 1.0) def test_compute_disattenuated_correlations_matching_human(self): hm_corr = pd.Series([0.9, 0.4, 0.6], index=['All data', 'GROUP1', 'GROUP2']) hh_corr = pd.Series([0.81, 0.64, 0.36], index=['All data', 'GROUP1', 'GROUP2']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, 0.5, 1.0]) def test_compute_disattenuated_correlations_single_matching_human(self): hm_corr = pd.Series([0.9, 0.4, 0.6], index=['All data', 'GROUP1', 'GROUP2']) hh_corr = pd.Series([0.81], index=['All data']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan]) def test_compute_disattenuated_correlations_mismatched_indices(self): hm_corr = pd.Series([0.9, 0.6], index=['All data', 'GROUP2']) hh_corr = pd.Series([0.81, 0.64], index=['All data', 'GROUP1']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan]) def test_compute_disattenuated_correlations_negative_human(self): hm_corr = pd.Series([0.9, 0.8], index=['All data', 'GROUP1']) hh_corr = pd.Series([-0.03, 0.64], index=['All data', 'GROUP1']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 2) assert_array_equal(df_dis_corr['corr_disattenuated'], [np.nan, 1.0])
experiment setup, running & evaluation CK 2014 """ import copy import numpy as np import networkx as nx import hcWrapSim as wrp import hcNetworks as net import hcPlotting as plo from hcUtil import printprogress from matplotlib import pyplot as plt from numpy.random import RandomState # provide a fixed set of random seeds rng = RandomState(1) standard_seeds = rng.randint(0, 100000, 200) class measure: def __init__(self, roi=None, wait=0, name=None, window=None, windowlength=1000, increment=1): """ A measure runs some analysis on raw simulation data & remembers the result. It accumulates results when applied repeatedly. Args: