def generate_coasting_beam(Beam, t_start, t_stop, spread = 1E-3, spread_type = 'dp/p', energy_offset = 0, distribution = 'gaussian' , user_distribution = None, user_probability = None): if spread_type == 'dp/p': energy_spread = Beam.energy * Beam.beta**2 * spread elif spread_type == 'dE/E': energy_spread = spread*Beam.energy elif spread_type == 'dp': energy_spread = Beam.energy * Beam.beta**2 * spread / Beam.momentum elif spread_type == 'dE': energy_spread = spread else: #DistributionError raise RuntimeError("spread_type not recognised") if distribution == 'gaussian': Beam.dE = rand.normal(loc = energy_offset, scale = energy_spread, size = Beam.n_macroparticles) elif distribution == 'parabolic': energyRange = np.linspace(-energy_spread, energy_spread, 10000) probabilityDistribution = 1 - (energyRange/energy_spread)**2 probabilityDistribution /= np.cumsum(probabilityDistribution)[-1] Beam.dE = rand.choice(energyRange, size = Beam.n_macroparticles, p = probabilityDistribution) + (rand.rand(Beam.n_macroparticles) - 0.5) * (energyRange[1] - energyRange[0]) + energy_offset #If distribution == 'user' is selected the user must supply a uniformly spaced distribution and the assosciated probability for each bin #momentum_spread and energy_offset are not used in this instance. elif distribution == 'user': Beam.dE = rand.choice(user_distribution, size = Beam.n_macroparticles, p = user_probability) + (rand.rand(Beam.n_macroparticles) - 0.5) * (energyRange[1] - energyRange[0]) else: #DistributionError raise RuntimeError("distribution type not recognised") Beam.dt = rand.rand(Beam.n_macroparticles)*(t_stop - t_start) + t_start
def get_pairs(clique_dict): """Get all pairs of cover songs in a clique dataset, and a sample of non-cover pairs of the same size. Args: clique_dict (dict): clique dataset as a dict with clique names as keys and lists of song URI's as values Returns: pairs (list): list of pairs (each a tuple) """ pairs = [] non_pairs = [] for this_clique in clique_dict: # clique uris clique_uris = clique_dict[this_clique] # non-clique uris other_cliques = [clique for clique in clique_dict if not clique == this_clique] non_clique_uris = [uri for clique in other_cliques for uri in clique_dict[clique]] # clique pairs clique_pairs = list(combinations(clique_uris, 2)) # clique non-pairs = [some clique uri, some non-clique uri] x len(clique pairs) n_clique_pairs = len(clique_pairs) clique_sample = choice(clique_uris, n_clique_pairs, replace=True) non_clique_sample = choice(non_clique_uris, n_clique_pairs, replace=False) clique_non_pairs = zip(clique_sample, non_clique_sample) pairs.extend(clique_pairs) non_pairs.extend(clique_non_pairs) return pairs, non_pairs
def run(self, num_epochs=1, num_episodes=1): num_tasks = len(self.tasks) for epoch in range(num_epochs): # choose task based on weights. ti = -1 if npr.rand() < self.mab_gamma: ti = npr.choice(range(num_tasks), 1)[0] else: p = np.exp(prob.normalize_log(self.log_weights)) ti = npr.choice(range(num_tasks), 1, replace=True, p=p)[0] task = self.tasks[ti] # (TODO) this breaks away the abstraction. self.deepQlearn.task = task self.dqn.task = task # run training. self.deepQlearn.run(num_episodes) # update weights. self.cumulative_epochs += 1 if self.cumulative_epochs >= self.mab_batch_size: self.log_weights[:] = 0. else: for ti, task in enumerate(self.tasks): performance_gain = eval_policy_reward(self.dqn, task, num_episodes=10000) self.log_weights[ti] += self.mab_gamma * self.mab_scale * performance_gain / num_tasks
def recommendNumberBySum(lottoDictByNumber, lottoDictBySum): # 선택할 sum 숫자수 pickSumCount = 5 total = sum(list(lottoDictBySum.values())) for key in lottoDictBySum: lottoDictBySum[key] = lottoDictBySum[key] / total pickSumList = choice(list(lottoDictBySum.keys()), pickSumCount, p=list(lottoDictBySum.values())) total = sum(list(lottoDictByNumber.values())) for key in lottoDictByNumber: lottoDictByNumber[key] = lottoDictByNumber[key] / total # 추천 번호 리스트 pickNumberList = [] for n in pickSumList: # 합이 n인 숫자의 조합과 확률 찾기 listNumbers = getNumberCombinationListBySum(n) pList = [] for numbers in listNumbers: p = 0 for number in numbers: p += lottoDictByNumber[number] pList.append(p) pList = [x / sum(pList) for x in pList] idx = choice(list(range(0, listNumbers.__len__())), 1, p=pList) pickNumberList.append(listNumbers[idx[0]]) return pickNumberList
def get_train(self, shape=(100, 20)): """Делаем из данных train выборку""" train = [] for key in self.base.keys(): if len(self.base[key]) >= 2: for _ in range(0, len(self.base[key]), 2): values = choice(list(self.base[key]), 2) a = np.asarray(self.base[key][values[0]]) b = np.asarray(self.base[key][values[1]]) other = choice(list(self.base), 1)[0] c = np.asarray(self.base[other][choice(list(self.base[other]), 1)[0]]) other_value = c.copy() value_first = a.copy() value_second = b.copy() other_value.resize(shape) value_first.resize(shape) value_second.resize(shape) train.append((value_first, other_value, value_second)) return train
def next_edge_uniformn(G, start, explore_prob, n, candidates=None): ''' Picks edges with probability proportional to the edge weights raised to the n-th power ''' if candidates == None: candidates = G.neighbors(start) total_wt = 0.0 explored = [] unexplored = [] explored_weights = [] for candidate in candidates: wt = G[start][candidate]['weight'] if (wt ** n) <= MIN_DETECTABLE_PHEROMONE: unexplored.append(candidate) else: explored.append(candidate) explored_weights.append(wt ** n) total_wt += wt ** n flip = random() if (flip < explore_prob and len(unexplored) > 0) or (len(explored) == 0): next = choice(len(unexplored)) next = unexplored[next] return next, True elif total_wt == 0: print explored_weights next = choice(len(candidates)) next = candidates[next] return next, True else: explored_weights = np.array(explored_weights) explored_weights /= total_wt next = explored[choice(len(explored), 1, p=explored_weights)[0]] return next, False
def mswdd ( x, alpha=1e-5, nlevels=6, boundary=100, prop=0.1 ): # pad to the next power of two in size N = len(x) maxlevs = np.ceil(np.log2(N)) newlen = 2 ** (1 + maxlevs) padlen = newlen - N boundary = np.min((boundary, np.floor(prop * N))) padbefore = rng.choice(x[0:boundary], np.ceil(padlen/2)) padafter = rng.choice(x[(N-boundary+1):N], np.floor(padlen/2)) padded = np.concatenate((padbefore, x, padafter)) # get wavelet transform J = np.min((nlevels + 1, maxlevs + 1)) vsg = wv.dwt.swt(padded, J, 'db1')[0].reshape(vsg, (J, newlen)) # shift rows to align the scale levels shift = newlen/2 for ii in range(1, vsg.shape[0]): idx = range(newlen - shift, newlen) idx.extend(range(newlen - shift)) vsg[ii,] = vsg[ii, idx] shift = shift/2 # drop 1st (DC) row and padding vsg = vsg[1:,len(padbefore):(len(padbefore)+N)] return discontinuities(vsg, alpha)
def next_edge_maxa(G, start, explore_prob, candidates=None): ''' With some probability, picks equally among the edges whose weight is lower than the Otherwise, picks equally among all edges tied for the highest edge weight. Note that on explore steps it can still pick the highest-weighted edge ''' if candidates == None: candidates = G.neighbors(start) # compute highest adjacent edge weight max_wt = float("-inf") for candidate in candidates: max_wt = max(max_wt, G[start][candidate]['weight']) # split neighbors into maximally weighted and non-maximally weighted edges max_neighbors = [] nonmax_neighbors = [] for candidate in candidates: wt = G[start][candidate]['weight'] # Edges with too small weight not considered maximal if wt == max_wt and wt > MIN_DETECTABLE_PHEROMONE: max_neighbors.append(candidate) else: nonmax_neighbors.append(candidate) flip = random() # Explores non-maximal edge with probability explore_prob if (flip < explore_prob and len(nonmax_neighbors) > 0) or (len(max_neighbors) == 0): next = choice(len(candidates)) next = candidates[next] return next, True else: next = choice(len(max_neighbors)) next = max_neighbors[next] return next, False
def next_edge_maxz(G, start, explore_prob, candidates=None): ''' With some probability, picks equally among zero edges, otherwise picks equally among maximal edges. This choice function ignores all edges in the 'middle', i.e. edges that are neither maximal nor minimal ''' if candidates == None: candidates = G.neighbors(start) max_wt = float("-inf") for candidate in candidates: max_wt = max(max_wt, G[start][candidate]['weight']) max_neighbors = [] nonmax_neighbors = [] for candidate in candidates: wt = G[start][candidate]['weight'] if wt == max_wt and wt > MIN_DETECTABLE_PHEROMONE: max_neighbors.append(candidate) elif wt <= MIN_DETECTABLE_PHEROMONE: nonmax_neighbors.append(candidate) flip = random() if (flip < explore_prob and len(nonmax_neighbors) > 0) or (len(max_neighbors) == 0): next = choice(len(nonmax_neighbors)) next = nonmax_neighbors[next] return next, True else: next = choice(len(max_neighbors)) next = max_neighbors[next] return next, False
def recursive(self, opened): sample = choice(len(self.probabilities), 1, p=self.probabilities) sample = sample[0] length = 2 # ans = self.open_mark[opened] ans = [opened] while (sample != opened): if (sample < opened): # ans.append(0) # length += 1 pass # Open a new depth of recursion else: (new_depth, new_length) = self.recursive(sample) ans.extend(new_depth) length += new_length sample = choice(len(self.probabilities), 1, p=self.probabilities) sample = sample[0] # Close the depth of recursion # return (ans + self.close_mark[sample], length) ans.append(opened + self.depth) return (ans, length)
def _deplacement_alea(individu): """Déplace d'une case un individu. On choisit d'abord le déplacement que va effectuer l'indivu puis on vérifie que celui-ci est dans les limites de la matrice, ne va pas dans l'eau ou sur un autre ennemi. On change la l'attribu position de l'individu de manière à ce qu'il soit déplacé plus tard. """ alea_ligne = rd.choice((-1, 0, 1)) alea_colonne = rd.choice((-1, 0, 1)) x_ini = deepcopy(individu.position[0]) y_ini = deepcopy(individu.position[1]) # On vérifie que le déplacement est dans les limites de la matrice. if 0 <= x_ini + alea_ligne and x_ini + alea_ligne < self.matrice.shape[0]: if int(str(self.matrice[individu.position[0] + alea_ligne, individu.position[1]])) < 100 and int(str(self.matrice[individu.position[0] + alea_ligne, individu.position[1]])) != 0: individu.position[0] += alea_ligne if 0 <= y_ini + alea_colonne and y_ini + alea_colonne < self.matrice.shape[1]: if int(str(self.matrice[individu.position[0], individu.position[1] + alea_colonne])) < 100 and int(str(self.matrice[individu.position[0], individu.position[1] + alea_colonne])) != 0: individu.position[1] += alea_colonne individu.ex_position = [x_ini, y_ini]
def shell_move(inAtom,atomIndex): # we're going to be changing the position of atomIndex inside inAtom # make sure that you remove any crazy outliers before you do this # or else it'll just make a bunch more outliers, which is a poor idea # make sure atomIndex comes from range(len(inAtom.get_positions())) so we don't get out of bounds try: inCOM = inAtom.get_center_of_mass() inDistances = distanceCenter(inAtom) ninetyNinthRadius = stats.scoreatpercentile(inDistances,99) ninetyFifthRadius = stats.scoreatpercentile(inDistances,95) outerFourRadius = ninetyNinthRadius - ninetyFifthRadius randomNewRadius = random.gauss( (ninetyNinthRadius+ninetyFifthRadius)/2 , (ninetyNinthRadius - ninetyFifthRadius)/2 ) xFromCenter = random.uniform(0,randomNewRadius) randomNewRadius = ((randomNewRadius**2) - (xFromCenter**2))**0.5 yFromCenter = random.uniform(0,randomNewRadius) zFromCenter = ((randomNewRadius**2) - (yFromCenter**2))**0.5 newXPosition = inCOM[0] + random.choice([-1,1])*xFromCenter newYPosition = inCOM[1] + random.choice([-1,1])*yFromCenter newZPosition = inCOM[2] + random.choice([-1,1])*zFromCenter positionArray = inAtom.get_positions() positionArray[atomIndex] = (newXPosition,newYPosition,newZPosition) inAtom.set_positions(positionArray) return inAtom except IndexError: print "The index of the atom you wanted to move is too high or too low." print "Please check your function call of shell_move(a,b)" print "-Jeff"
def transition(self): text = "Transition to" if self.current_state is not None: self.current_state = choice(range(self.nStates), p=self.transitionMatrix[self.current_state,]) else: text = "Start at" self.current_state = choice(range(self.nStates), p=self.initialProbabilities)
def setup_images(current_uuid, subreddit, num_questions): thresholds, weights = get_thresholds(subreddit) image_classes = [] query_1 = Post.query.filter(Post.year_posted == 2014, Post.show_to_users == "t", Post.subreddit == subreddit) for t in thresholds: temp_images = query_1.filter(Post.score >= t[0], Post.score <= t[1]).order_by(db.func.random()) image_classes.append(temp_images) indices = [0] * len(thresholds) image_pairs = [] for i in np.arange(num_questions): first_threshold = choice(np.arange(len(thresholds)), p=weights) second_threshold = choice(np.arange(len(thresholds)), p=weights) first_index = indices[first_threshold] first_image = image_classes[first_threshold].offset(first_index).first() indices[first_threshold] = first_index + 1 second_index = indices[second_threshold] second_image = image_classes[second_threshold].offset(second_index).first() while first_image.score == second_image.score: second_index += 1 second_image = image_classes[second_threshold].offset(second_index).first() indices[second_threshold] = second_index + 1 image_pairs.append([first_image, second_image]) mc = pylibmc.Client(["127.0.0.1"], binary=True, behaviors={"tcp_nodelay": True, "ketama": True}) mc.set(current_uuid + "_images", image_pairs, time=10 * 60) return image_pairs
def ball_move(inAtom,atomIndex): """takes an atom defined by atomIndex inside of inAtom and moves it somewhere within the core of the atom randomly. Atoms will almost always end up inside the sphere which contains 85% of the atoms, centered at the center of mass.""" # we're going to be changing the position of atomIndex inside inAtom # we'll take atom of index atomIndex and throw it somewhere inside the core # make sure that you remove any crazy outliers before you do this # or else it'll just make a bunch more outliers, which is a poor idea try: #get all the distances from the center of mass inCOM = inAtom.get_center_of_mass() inDistances = distanceCenter(inAtom) #figure out the distance from the core to the 85th percentile #we'll consider "the core" to be the sphere which contains 85% of the atoms eightyFifthRadius = stats.scoreatpercentile(inDistances,85) #pick a new distance from center somewhere inside that 85th percentile limit randomNewRadius = random.gauss(eightyFifthRadius/2, eightyFifthRadius/3 ) xFromCenter = random.uniform(0,randomNewRadius) randomNewRadius = ((randomNewRadius**2) - (xFromCenter**2))**0.5 yFromCenter = random.uniform(0,randomNewRadius) zFromCenter = ((randomNewRadius**2) - (yFromCenter**2))**0.5 newXPosition = inCOM[0] + random.choice([-1,1])*xFromCenter newYPosition = inCOM[1] + random.choice([-1,1])*yFromCenter newZPosition = inCOM[2] + random.choice([-1,1])*zFromCenter positionArray = inAtom.get_positions() positionArray[atomIndex] = (newXPosition,newYPosition,newZPosition) inAtom.set_positions(positionArray) return inAtom except IndexError: print "The index of the atom you wanted to move is too high or too low." print "Please check your function call of ball_move(a,b)" print "-Jeff"
def qlearn2(m, f, alpha=0.5, gamma=1.0, epsilon=0.2, num_episodes=1000): actions = m.actions() theta = dict() for a in actions: theta[a] = np.zeros((f.num_features,)) for episode in range(num_episodes): t = 0 s = m.start() a = npr.choice(m.actions(s)) q = 0 while not m.is_terminal(s): s2, r2 = m.act(s, a) # if r2 < -1 or s2[0] > 5 and s2[1] < 5: # print "wat", s2, r2 qp = sum(theta[a] * f.features(s)) # print a, s, s2, r2, [(a, r2 + gamma * qp) for a in m.actions(s)] actions = m.actions(s) random.shuffle(actions) a2, q2 = max([(a2, r2 + gamma * sum(theta[a2] * f.features(s2))) for a2 in actions], key=lambda x: x[1]) delta = q2 - qp # if random.random() < 0.01: # print delta # print a, delta, s, s2 theta[a] += alpha * delta * (f.features(s)) if npr.random() < epsilon: a2 = npr.choice(m.actions(s)) q = q2 s = s2 a = a2 t += 1 print episode+1, t, s, r2 pi = lambda s: max([(a, sum(theta[a] * f.features(s))) for a in m.actions(s)], key=lambda x: x[1])[0] f_exp = lambda s: max(sum(theta[a] * f.features(s)) for a in m.actions(s)) return pi, f_exp, theta
def qlearn1(m, f, lmbda=0.1, alpha=0.3, gamma=0.9999, epsilon=0.2, num_episodes=100): theta = np.zeros((f.num_features,)) t = 1 for _ in range(num_episodes): s = m.start() a = npr.choice(m.actions(s)) e = np.zeros((theta.size,)) print t while not m.is_terminal(s): #print t #print s phi_a1 = feature_estimate(m, s, a, f) e += phi_a1 s2, r2 = m.act(s, a) delta = r2 - sum(theta * phi_a1) q = [] for a2 in m.actions(s2): phi_a2 = feature_estimate(m, s2, a2, f) q.append(sum(theta * phi_a2)) delta += (gamma**t) * max(q) theta = (1-alpha) * theta + alpha * delta * e if npr.random() < 1 - epsilon: a2 = max(zip(m.actions(s2), q), key=lambda x: x[1])[0] e *= (gamma**t) * lmbda else: a2 = npr.choice(m.actions(s2)) e = np.zeros((theta.size,)) s = s2 a = a2 t += 1 pi = lambda s: max([(a, sum(theta * feature_estimate(m, s, a, f))) for a in m.actions(s)], key=lambda x: x[1])[0] return pi, theta
def choose_action(self, state): """ The choose_action function is called when the agent is asked to choose which action to take, based on the 'state' the smartcab is in. """ # Set the agent state and default action self.state = state self.next_waypoint = self.planner.next_waypoint() action = None ########### ## TO DO ## ########### # When not learning, choose a random action # When learning, choose a random action with 'epsilon' probability # Otherwise, choose an action with the highest Q-value for the current state if self.learning: rando = random.randint(0, 99) if rando < ( self.epsilon * 100 ) : action = choice(self.valid_actions) else: maxQ = self.get_maxQ(state) Qs= self.Q[state] possible_A = [action for action, value in Qs.items() if value == maxQ] action = choice(possible_A) else: action = choice(self.valid_actions) return action
def resample(gen, scores, percent_eliminate = .5, mutation_rate = 5): """ Breeds generation together based on their scores. Returns new generation. """ assert(len(gen)==len(scores)) # They must be the same length N = len(gen) # N is equal to that length Ns = int(len(gen)*(1.-percent_eliminate)) # Get the sample size P = (np.array(scores, dtype='float')+abs(min(scores))+.001) / np.sum(np.array(scores)+abs(min(scores))+.001) # normalize scores to get probabilities # Kill off percent_eliminate of the worst worlds survive_index = choice(np.arange(len(gen)), size = Ns, replace = False, p = P) gen, scores = [gen[i] for i in survive_index], [scores[i] for i in survive_index] P = (np.array(scores, dtype='float')+abs(min(scores))+.001) / np.sum(np.array(scores)+abs(min(scores))+.001) # normalize scores to get probabilities # Sample the generation based on score, higher the more probable to breed A = choice(np.arange(Ns), size = N, replace = True, p = P) B = choice(np.arange(Ns), size = N, replace = True, p = P) A, B = [gen[i] for i in A], [gen[i] for i in B] # Generate the next generation new_gen, new_scores = [], [] for a, b in zip(A,B): # Generate new world via gene combination and mutation new_world = combine(a,b) new_world = mutate(new_world, rate = mutation_rate) # Score the new world new_score = score(new_world, safety_weight = Safety, freedom_weight = Freedom) # Append it to the new generation new_gen.append(new_world) new_scores.append(new_score) return new_gen, new_scores
def __init__(self, test_data_fn): start = now() if os.path.isfile(test_data_fn): print("Reading test data...") self.prepop_rows, self.idens, self.props, self.rows = \ pickle.load(open(test_data_fn, 'rb')) else: print("Generating test data...") random.seed(4) # 4 chosen by fair dice roll. Guaranteed to be random forms = [gen_random_form() for x in range(NUM_FORMS)] # FIXME: don't use random.choice!!! Super duper slow self.prepop_rows = flatten(_rows_from_tufo(gen_random_tufo(random.choice(forms))) for x in range(NUM_PREEXISTING_TUFOS)) tufos = [gen_random_tufo(random.choice(forms)) for x in range(NUM_TUFOS)] self.idens = [t[0] for t in tufos] self.props = [get_random_keyval(t[1]) for t in tufos] random.shuffle(self.idens) random.shuffle(self.props) self.rows = flatten(_rows_from_tufo(x) for x in tufos) pickle.dump((self.prepop_rows, self.idens, self.props, self.rows), open(test_data_fn, 'wb')) print("Test data generation took: %.2f" % (now() - start)) print('addRows: # Tufos:%8d, # Rows: %8d' % (NUM_TUFOS, len(self.rows))) print('len count: small:%d, medium:%d, large:%d, huge:%d' % (small_count, medium_count, large_count, huge_count))
def sample_profiles(base, num): # pylint: disable=inconsistent-return-statements """Generate unique profiles from a game Parameters ---------- base : RsGame Game to generate random profiles from. num : int Number of profiles to sample from the game. """ if num == base.num_all_profiles: # pylint: disable=no-else-return return base.all_profiles() elif num == 0: return np.empty((0, base.num_strats), int) elif base.num_all_profiles <= np.iinfo(int).max: inds = rand.choice(base.num_all_profiles, num, replace=False) return base.profile_from_id(inds) else: # Number of times we have to re-query ratio = (sps.digamma(float(base.num_all_profiles)) - sps.digamma(float(base.num_all_profiles - num))) # Max is for underflow num_per = max(round(float(ratio * base.num_all_profiles)), num) profiles = set() while len(profiles) < num: profiles.update( utils.hash_array(p) for p in base.random_profiles(num_per)) profiles = np.stack([h.array for h in profiles]) inds = rand.choice(profiles.shape[0], num, replace=False) return profiles[inds]
def main(): from matplotlib.pyplot import figure,plot, close from numpy.random import standard_normal,choice from numpy.linalg import qr from numpy import dot import CAMP_C #from myOmp import omp_naive as omp N=2000 M=900 K=100 sigma_n=0.001 A=standard_normal((N,N))+1j*standard_normal((N,N)) (Q,R)=qr(A) i=choice(N,M,False) A=Q[i,:] x=(standard_normal((N,1))+1j*standard_normal((N,1)))/sqrt(2) j=choice(N,N-K,False) x[j,:]=0 y=dot(A,x)+sigma_n*standard_normal((M,1)) xhat=CAMP_C.CAMP(A,y,1,True) print norm(x-xhat)/N close('all') plot(real(x)) plot(real(xhat)) figure() plot(imag(x)) plot(imag(xhat))
def assemble_data(output_file, anno_file_list=[]): # assemble the annotations to one file size = 12 if len(anno_file_list) == 0: return 0 if os.path.exists(output_file): os.remove(output_file) for anno_file in anno_file_list: with open(anno_file, 'r') as f: anno_lines = f.readlines() base_num = 250000 if len(anno_lines) > base_num * 3: idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True) elif len(anno_lines) > 100000: idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True) else: idx_keep = np.arange(len(anno_lines)) np.random.shuffle(idx_keep) chose_count = 0 with open(output_file, 'a+') as f: for idx in idx_keep: f.write(anno_lines[idx]) chose_count += 1 return chose_count
def choose_action(self, state): """ The choose_action function is called when the agent is asked to choose which action to take, based on the 'state' the smartcab is in. """ # Set the agent state and default action self.state = state self.next_waypoint = self.planner.next_waypoint() action = random.choice(self.valid_actions) # When not learning, choose a random action # When learning, choose a random action with 'epsilon' probability # Otherwise, choose an action with the highest Q-value for the current state if self.learning == True: maxQ = self.get_maxQ(state) n_maxQ = sum(1 for v in self.Q[str(state)].values() if v == maxQ) if n_maxQ > 1: maxQ_actions = [] for k,v in self.Q[str(state)].iteritems(): if v == maxQ: maxQ_actions.append(k) action = choice(maxQ_actions) else: if choice([True,False], 1, p=[1-self.epsilon, self.epsilon]): for k,v in self.Q[str(state)].iteritems(): if v == maxQ: action = k return action
def subsample_arr(arr, N=None, frac_keep=None): """ Subsample a Series, DataFrame, or ndarray along axis 0. Parameters ---------- arr : Series, DataFrame, or ndarray N : Integer Number of samples to keep frac_keep : Real in [0, 1] Fraction of samples to keep Returns ------- subsampled : Series, DataFrame, or ndarray A copy """ # Input checking assert ((N is None) and (frac_keep is not None)) \ or ((N is not None) and (frac_keep is None)) # if N is None: N = int(len(arr) * frac_keep) if isinstance(arr, np.ndarray): index = choice(range(len(arr)), size=N, replace=False) return arr[np.ix_(index)] elif isinstance(arr, pd.Series) or isinstance(arr, pd.DataFrame): index = choice(arr.index, size=N, replace=False) return arr.ix[index] else: raise ValueError("arr of unhandled type: %s" % type(arr))
def transform(self, Xb, yb): Xb, yb = super(AffineTransformBatchIteratorMixin, self).transform(Xb, yb) # Skip if affine_p is 0. Setting affine_p may be useful for quickly # disabling affine transformation if self.affine_p == 0: return Xb, yb idx = get_random_idx(Xb, self.affine_p) Xb_transformed = Xb.copy() for i in idx: scale = choice(self.affine_scale_choices) rotation = choice(self.affine_rotation_choices) shear = choice(self.affine_shear_choices) affine_translation_y_choices = self.affine_translation_choices if self.affine_translation_y_choices is None else self.affine_translation_y_choices affine_translation_x_choices = self.affine_translation_choices if self.affine_translation_x_choices is None else self.affine_translation_x_choices translation_y = choice(affine_translation_y_choices) translation_x = choice(affine_translation_x_choices) img_transformed, tform = im_affine_transform( Xb[i], return_tform=True, scale=scale, rotation=rotation, shear=shear, translation_y=translation_y, translation_x=translation_x, center_rel=self.center_rel, ) Xb_transformed[i] = img_transformed return Xb_transformed, yb
def genArgumentMNIST(self): print "generating Argument MNIST image in progress..." dataset = MNISTDataset("MNIST") gendataset = [] genlabelset = [] stepsizeX = array([0, 2, 6, 8]) stepsizeY = array([0, 2, 6, 8]) rd_idx0 = random.choice(60000, 10000, replace=False) for i in rd_idx0: labelTr, itemTr = dataset.getTrainingItem(i) temp_tr = reshape( itemTr, (28,28)) itemTr_pad = zeropadding(temp_tr, 4, 4) for x in stepsizeX: for y in stepsizeY: TEMP = itemTr_pad[x:x+28, y:y+28].flatten() gendataset.append( TEMP ) genlabelset.append( labelTr.flatten() ) print "Argument MNIST image complete!" rd_idx1 = random.choice(len(gendataset), self.numArgu, replace=False) popdatamatrix = zeros((self.numArgu, 28*28)) poplabelmatrix = zeros((self.numArgu, 10)) for i in range(self.numArgu): popdatamatrix[i,:] = gendataset[rd_idx1[i]] poplabelmatrix[i,:] = genlabelset[rd_idx1[i]] return popdatamatrix, poplabelmatrix
def __generate_random_walks(self,graph, length_of_path, number_per_node): """ For each node in the graph object, this method will compute a number of random walks of a certain length. Parameters: graph: The networkx graph object length_of_path: The length of the random walk generated for each node number_per_node: The number of randoms walks generated for each node Returns: The list of generated random walks """ random_walks = [] for i in graph.nodes(): for j in range(0,number_per_node): path = [i] for k in range(0,length_of_path): sample_set = filter(lambda x: x not in path, graph.all_neighbors(i,i[-1])) if len(sample_set) == 0: break sample = npr.choice(sample_set, size=1) while(sample in path): sample = npr.choice(sample_set, size=1) random_walks.append(path) return random_walks
def update(self, t): # Gather inputs self.next_waypoint = self.planner.next_waypoint() # from route planner, also displayed by simulator inputs = self.env.sense(self) deadline = self.env.get_deadline(self) if self.next_waypoint == None: self.state = 'Destination' else: self.state = (self.next_waypoint, inputs['light'],inputs['oncoming'],inputs['right'],inputs['left']) # TODO: Select action according to your policy valid_actions = ['forward','right','left', None] epsilon = 0.1 #0.01 best_action = max(self.Q_hat[self.state],key=self.Q_hat[self.state].get) random_action = choice(valid_actions) action = choice([best_action, random_action],p=[1-epsilon,epsilon]) # Execute action and get reward reward = self.env.act(self, action) # Learn policy based on state, action, reward new_next_waypoint = self.planner.next_waypoint() # from route planner, also displayed by simulator new_inputs = self.env.sense(self) new_state = (new_next_waypoint, new_inputs['light'],new_inputs['oncoming'],new_inputs['right'],new_inputs['left']) alpha = 0.5 #opt 0.7 gamma = 0.5 #opt 0.1 max_Qhat_ahat = max(self.Q_hat[new_state].values()) self.Q_hat[self.state][action] = (1-alpha)*self.Q_hat[self.state][action]+alpha*(reward+gamma*max_Qhat_ahat) print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}".format(deadline, inputs, action, reward) # [debug]
def AddLeafToTree(self, id, diameter): """ Add a single leaf to the tree. The position is guided by the diameter parameter which indicates the precentage of the maximum possible tree diameter to use. """ # Handle the cases where the tree is empty (or a single node) if len(self.__tree.vertices) == 0: Leaf(label=id, tree=self.__tree) elif len(self.__tree.vertices) == 1: Edge(nodes=[self.__tree.vertices[0], Leaf(label=id, tree=self.__tree)], tree=self.__tree) else: # Find the edges which will (and will not) increase the diameter (will, willnot) = self.__PartitionEdges() # if no unmarked edges exist or we want to increase diameter # randomly select an edge to split if not len(willnot) or random.random() > (1.0 - diameter): esplit = random.choice(will) else: esplit = random.choice(willnot) # Add new taxa splitting the edge self.__SplitEdge(esplit, id)
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride = [16,], anchor_scales = [16,]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0]#9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print((np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], )))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0]#图像的高宽及通道数 #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3]#feature-map的高宽 if DEBUG: print(('AnchorTargetLayer: height', height, 'width', width)) print('') print(('im_size: ({}, {})'.format(im_info[0], im_info[1]))) print(('scale: {}'.format(im_info[2]))) print(('height, width: ({}, {})'.format(height, width))) print(('rpn: gt_boxes.shape', gt_boxes.shape)) print(('rpn: gt_boxes', gt_boxes)) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()#生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors#9个anchor K = shifts.shape[0]#50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))#相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image #仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print(('total_anchors', total_anchors)) print(('inds_inside', len(inds_inside))) # keep only inside anchors anchors = all_anchors[inds_inside, :]#保留那些在图像内的anchor if DEBUG: print(('anchors.shape', anchors.shape)) #至此,anchor准备好了 #-------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1)#初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G #计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float))#假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0#先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1#每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1#overlap大于0.7的认为是前景 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # preclude dontcare areas if dontcare_areas is not None and dontcare_areas.shape[0] > 0:#这里我们暂时不考虑有doncare_area的存在 # intersec shape is D x A intersecs = bbox_intersections( np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 np.ascontiguousarray(anchors, dtype=np.float) # A x 4 ) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 #这里我们暂时不考虑难样本的问题 # preclude hard samples that are highly occlusioned, truncated or difficult to see if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0: assert gt_ishard.shape[0] == gt_boxes.shape[0] gt_ishard = gt_ishard.astype(int) gt_hardboxes = gt_boxes[gt_ishard == 1, :] if gt_hardboxes.shape[0] > 0: # H x A hard_overlaps = bbox_overlaps( np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 hard_max_overlaps = hard_overlaps.max(axis=0) # (A) labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 labels[max_intersec_label_inds] = -1 # # subsample positive labels if we have too many #对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False)#随机去除掉一些正样本 labels[disable_inds] = -1#变为-1 # subsample negative labels if we have too many #对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 #-------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])#根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)#内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:#暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights#外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means ** 2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)#内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)#外部权重以0填充 if DEBUG: print(('rpn: max max_overlap', np.max(max_overlaps))) print(('rpn: num_positive', np.sum(labels == 1))) print(('rpn: num_negative', np.sum(labels == 0))) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print(('rpn: num_positive avg', _fg_sum / _count)) print(('rpn: num_negative avg', _bg_sum / _count)) # labels labels = labels.reshape((1, height, width, A))#reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4))#reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
random.seed(42) n_tests = 10000 winning_doors = random.randint(0, 3, n_tests) change_mind_wins = 0 insist_wins = 0 for winning_door in winning_doors: first_try = random.randint(0, 3) remaining_choices = [i for i in range(3) if i != first_try] wrong_choices = [i for i in range(3) if i != winning_door] if first_try in wrong_choices: wrong_choices.remove(first_try) screened_out = random.choice(wrong_choices) remaining_choices.remove(screened_out) changed_mind_try = remaining_choices[0] change_mind_wins += 1 if changed_mind_try == winning_door else 0 insist_wins += 1 if first_try == winning_door else 0 print( 'You win {1} out of {0} tests if you changed your mind\n' 'You win {2} out of {0} tests if you insist on the initial choice'.format( n_tests, change_mind_wins, insist_wins))
def sample_except(limit, excluded): candidate = nr.choice(limit - 1) if candidate >= excluded: candidate += 1 return candidate
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def split(_fts, _lbs, test_ratio): indices = range(_lbs.size) i_test = rnd.choice(indices, size=round(test_ratio * _lbs.size)) i_train = np.array([i for i in indices if i not in i_test]) return (_fts[i_train, :], _lbs[i_train]), (_fts[i_test, :], _lbs[i_test])
sys.stderr.write("Calculating intra-gene LD...\n") # calculate SFS for gene_name in allele_counts_map.keys(): print gene_name locations = numpy.array([location for chromosome, location in allele_counts_map[gene_name]['4D']['locations']])*1.0 allele_counts = allele_counts_map[gene_name]['4D']['alleles'] if len(allele_counts)==0: # no diversity to look at! continue # pick a random gene somewhere else as a control control_gene_name = gene_name control_allele_counts = [] while gene_name==control_gene_name or len(control_allele_counts)==0: control_gene_name = choice(allele_counts_map.keys()) control_allele_counts = allele_counts_map[control_gene_name]['4D']['alleles'] allele_counts = allele_counts[:,desired_samples,:] control_allele_counts = control_allele_counts[:,desired_samples,:] #compute the distances between all pairs of sites # None in the two index positions results in a transpose of the vector relative to each other # Subtraction between the two vectors results in pairwise subtraction of each element in each vector. distances = numpy.fabs(locations[:,None]-locations[None,:]) low_freq=0.3 high_freq=0.5 rsquared_numerators, rsquared_denominators = diversity_utils.calculate_rsquared_condition_freq(allele_counts, allele_counts, low_freq, high_freq) control_rsquared_numerators, control_rsquared_denominators = diversity_utils.calculate_rsquared_condition_freq(allele_counts, control_allele_counts, low_freq, high_freq)
def ADAM(A_input, b_input, x_input, probDist, maxIters, errorType=0, utType="Reg", tpType="Reg", momentumMult=0.1): numStates, numFeatures = A_input.shape A_proc, b_proc = np.array(A_input, copy=True), np.array(b_input, copy=True) x_first = np.array(x_input, copy=True).reshape(numFeatures) x_last = x_first x_proc = np.array(x_input, copy=True).reshape(numFeatures) x_prev = x_proc # Error vectors errors = np.zeros(maxIters) errors[0] = errorCalcs.getErrorMethod(A_proc, b_proc, x_proc, probDist, errorType, norm=2) iters = 1 beta2 = 0.999 beta1 = 0.9 meanSquareGradientAccumulator = np.zeros(x_proc.shape) momentumAccumulator = np.zeros(x_proc.shape) while iters < maxIters: numSamples = 3 sampledRows = random.choice(numStates, numSamples, p=probDist) tp1 = TPCore.TPAlgosampledRows(A_proc, b_proc, x_proc, sampledRows=sampledRows) tp2 = TPCore.TPAlgosampledRows(A_proc, b_proc, tp1, sampledRows=sampledRows) dTP1 = tp1 - x_proc dTP2 = tp2 - tp1 ddTP = dTP2 - dTP1 kappa = utils.twoNorm(ddTP) / (utils.twoNorm(dTP1))**2 # Radius of osculating circle radius = 1 / kappa radiusByNorm_dTP1 = radius / utils.twoNorm(dTP1) alpha = 1 / (iters * numSamples / numStates + 1) alpha = alpha * radiusByNorm_dTP1 # Notice that we have multiplied and divided by utils.twoNorm(dTP1) one, # which was done for clarity and may be skipped. # alpha = alpha * radius momentumAccumulator = beta1 * momentumAccumulator + (1 - beta1) * dTP1 meanSquareGradientAccumulator = beta2 * meanSquareGradientAccumulator + ( 1 - beta2) * dTP1**2 mHat = momentumAccumulator / (1 - beta1**(iters + 1)) vHat = meanSquareGradientAccumulator / (1 - beta2**(iters + 1)) epsilon = 1e-6 # momentumTerm = alpha * mHat / ((vHat) ** 0.5 + epsilon) - alpha * dTP1 momentumTerm = alpha * mHat / ((vHat)**0.5 + epsilon) x_prev = x_proc x_proc = x_proc + momentumTerm errors[iters] = errorCalcs.getErrorMethod(A_proc, b_proc, x_proc, probDist, errorType, norm=2) iters += 1 return x_proc, errors
def forward(self, arguments, outputs, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap bottom = arguments # map of shape (..., H, W) height, width = bottom[0].shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1][0, :] # im_info im_info = bottom[2] # remove zero padded ground truth boxes keep = np.where(((gt_boxes[:, 2] - gt_boxes[:, 0]) > 0) & ((gt_boxes[:, 3] - gt_boxes[:, 1]) > 0)) gt_boxes = gt_boxes[keep] if DEBUG: print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) #print ('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg["TRAIN"].RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg["TRAIN"].RPN_POSITIVE_OVERLAP] = 1 if cfg["TRAIN"].RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg["TRAIN"].RPN_FG_FRACTION * cfg["TRAIN"].RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: if self._determininistic_mode: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] else: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg["TRAIN"].RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: if self._determininistic_mode: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] else: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) if DEBUG: self._sums += bbox_targets[labels == 1, :].sum(axis=0) self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print('rpn: num_positive avg', self._fg_sum / self._count) print('rpn: num_negative avg', self._bg_sum / self._count) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) outputs[self.outputs[0]] = np.ascontiguousarray(labels) # bbox_targets bbox_targets = bbox_targets.reshape( (1, height, width, A * 4)).transpose(0, 3, 1, 2) outputs[self.outputs[1]] = np.ascontiguousarray(bbox_targets) # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width outputs[self.outputs[2]] = np.ascontiguousarray(bbox_inside_weights) # No state needs to be passed to backward() so we just pass None return None
def randomLocPair(self, study_id): activeLocID = self.study2activeLocID[study_id] a, b = rnd.choice(activeLocID.keys(), size=2, replace=False) return self.locs[a], self.locs[b]
def getNextRule(self, validRulesRS, validRulesRSint, time): #for x in self.communicatedIntentions: #print("TL is", self.getName(),". The intention is", self.communicatedIntentions[x].getAction()) self.numOfRulesSelected += 1 # First, select a rule from RS and communicate it intendedRule = self.getAssignedIndividual().selectRule( validRulesRS) # Get intended rule to apply #print("Intended rule is", intendedRule, "!\n\n\n") if intendedRule == -1: self.numOfTimesNoRSRuleWasValid += 1 if self.currentRule is None or self.currentRule == -1: self.setIntention( Intention(self, len(self.getAgentPool().getActionSet()) - 1, time)) # Return the Do Nothing action else: #print("Using current rule instead. It is", self.currentRule) self.setIntention( Intention(self, self.currentRule.getAction(), time)) else: if self.currentRule is None or self.currentRule == -1: #print('In else. Intended rule is', intendedRule) self.setIntention( Intention(self, len(self.getAgentPool().getActionSet()) - 1, time)) else: self.setIntention( Intention(self, intendedRule.getAction(), time)) # If intended rule isn't user-defined, select a rule from RSint and then decide between the two coopRule = self.getAssignedIndividual().selectCoopRule(validRulesRSint) if coopRule == -1: self.numOfTimesNoCoopRuleWasValid += 1 #print("No valid rule from RSint.") if intendedRule == -1 and coopRule == -1: #print("Neither intended nor coopRule valid.") if self.currentRule is None or self.currentRule == -1: #print('In if statement. Current rule is', self.currentRule) self.setIntention( Intention(self, len(self.getAgentPool().getActionSet()) - 1, time)) return -1 else: #print("Returning currentRule with action", self.currentRule.getAction()) self.setIntention( Intention(self, self.currentRule.getAction(), time)) return self.currentRule # If no valid rules apply from RSint, return the intented rule from RS elif coopRule == -1 and intendedRule != -1: #print("CoopRule invalid. Applying intended rule:", intendedRule) self.setIntention(Intention(self, intendedRule.getAction(), time)) return intendedRule elif coopRule != -1 and intendedRule == -1: #print("Intended rule invalid. Applying coop rule:", coopRule) self.setIntention(Intention(self, coopRule.getAction(), time)) return coopRule elif coopRule.getWeight() >= intendedRule.getWeight(): #print("CoopRule has higher weight than intended rule. Applying it:", coopRule) self.setIntention(Intention(self, coopRule.getAction(), time)) return coopRule else: rule = choice([coopRule, intendedRule], 1, p=[ pCoop, (1 - pCoop) ]) # Select one of the two rules based on pCoop value #print("The rule options are", rule, "and we chose", rule[0]) self.setIntention(Intention(self, rule[0].getAction(), time)) return rule[ 0] # Choice returns an array, so we take the only element of it
def my_rand(i, w): normed = [elem / sum(w) for elem in w] return choice(i, p=normed)
def getRandomStudy(self): return self.getStudy(self.biased_studs[rnd.choice( len(self.biased_studs))]['_id'])
def __call__(self, image, masks, boxes=None, labels=None): height, width, _ = image.shape while True: # randomly choose a mode mode = random.choice(self.sample_options) if mode is None: return image, masks, boxes, labels min_iou, max_iou = mode if min_iou is None: min_iou = float('-inf') if max_iou is None: max_iou = float('inf') # max trails (50) for _ in range(50): current_image = image w = random.uniform(0.3 * width, width) h = random.uniform(0.3 * height, height) # aspect ratio constraint b/t .5 & 2 if h / w < 0.5 or h / w > 2: continue left = random.uniform(width - w) top = random.uniform(height - h) # convert to integer rect x1,y1,x2,y2 rect = np.array( [int(left), int(top), int(left + w), int(top + h)]) # calculate IoU (jaccard overlap) b/t the cropped and gt boxes overlap = jaccard_numpy(boxes, rect) # This piece of code is bugged and does nothing: # https://github.com/amdegroot/ssd.pytorch/issues/68 # # However, when I fixed it with overlap.max() < min_iou, # it cut the mAP in half (after 8k iterations). So it stays. # # is min and max overlap constraint satisfied? if not try again if overlap.min() < min_iou and max_iou < overlap.max(): continue # cut the crop from the image current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :] # keep overlap with gt box IF center in sampled patch centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 # mask in all gt boxes that above and to the left of centers m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) # mask in all gt boxes that under and to the right of centers m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) # mask in that both m1 and m2 are true mask = m1 * m2 # [0 ... 0 for num_gt and then 1 ... 1 for num_crowds] num_crowds = labels['num_crowds'] crowd_mask = np.zeros(mask.shape, dtype=np.int32) if num_crowds > 0: crowd_mask[-num_crowds:] = 1 # have any valid boxes? try again if not # Also make sure you have at least one regular gt if not mask.any() or np.sum(1 - crowd_mask[mask]) == 0: continue # take only the matching gt masks current_masks = masks[mask, :, :].copy() # take only matching gt boxes current_boxes = boxes[mask, :].copy() # take only matching gt labels labels['labels'] = labels['labels'][mask] current_labels = labels # We now might have fewer crowd annotations if num_crowds > 0: labels['num_crowds'] = np.sum(crowd_mask[mask]) # should we use the box left and top corner or the crop's current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2]) # adjust to crop (by substracting crop's left,top) current_boxes[:, :2] -= rect[:2] current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:]) # adjust to crop (by substracting crop's left,top) current_boxes[:, 2:] -= rect[:2] # crop the current masks to the same dimensions as the image current_masks = current_masks[:, rect[1]:rect[3], rect[0]:rect[2]] return current_image, current_masks, current_boxes, current_labels
def main(_): sns.set() sns.set_palette(sns.color_palette('hls', 10)) npr.seed(FLAGS.seed) logging.info('Starting experiment.') # Create model folder for outputs try: gfile.MakeDirs(FLAGS.work_dir) except gfile.GOSError: pass stdout_log = gfile.Open('{}/stdout.log'.format(FLAGS.work_dir), 'w+') # use mean/std of svhn train train_images, _, _ = datasets.get_dataset_split( name=FLAGS.train_split.split('-')[0], split=FLAGS.train_split.split('-')[1], shuffle=False) train_mu, train_std = onp.mean(train_images), onp.std(train_images) del train_images # BEGIN: fetch test data and candidate pool test_images, test_labels, _ = datasets.get_dataset_split( name=FLAGS.test_split.split('-')[0], split=FLAGS.test_split.split('-')[1], shuffle=False) pool_images, pool_labels, _ = datasets.get_dataset_split( name=FLAGS.pool_split.split('-')[0], split=FLAGS.pool_split.split('-')[1], shuffle=False) n_pool = len(pool_images) test_images = (test_images - train_mu) / train_std # normalize w train mu/std pool_images = (pool_images - train_mu) / train_std # normalize w train mu/std # augmentation for train/pool data if FLAGS.augment_data: augmentation = data.chain_transforms(data.RandomHorizontalFlip(0.5), data.RandomCrop(4), data.ToDevice) else: augmentation = None # END: fetch test data and candidate pool # BEGIN: load ckpt opt_init, opt_update, get_params = optimizers.sgd(FLAGS.learning_rate) if FLAGS.pretrained_dir is not None: with gfile.Open(FLAGS.pretrained_dir, 'rb') as fpre: pretrained_opt_state = optimizers.pack_optimizer_state( pickle.load(fpre)) fixed_params = get_params(pretrained_opt_state)[:7] ckpt_dir = '{}/{}'.format(FLAGS.root_dir, FLAGS.ckpt_idx) with gfile.Open(ckpt_dir, 'wr') as fckpt: opt_state = optimizers.pack_optimizer_state(pickle.load(fckpt)) params = get_params(opt_state) # combine fixed pretrained params and dpsgd trained last layers params = fixed_params + params opt_state = opt_init(params) else: ckpt_dir = '{}/{}'.format(FLAGS.root_dir, FLAGS.ckpt_idx) with gfile.Open(ckpt_dir, 'wr') as fckpt: opt_state = optimizers.pack_optimizer_state(pickle.load(fckpt)) params = get_params(opt_state) stdout_log.write('finetune from: {}\n'.format(ckpt_dir)) logging.info('finetune from: %s', ckpt_dir) test_acc, test_pred = accuracy(params, shape_as_image(test_images, test_labels), return_predicted_class=True) logging.info('test accuracy: %.2f', test_acc) stdout_log.write('test accuracy: {}\n'.format(test_acc)) stdout_log.flush() # END: load ckpt # BEGIN: setup for dp model @jit def update(_, i, opt_state, batch): params = get_params(opt_state) return opt_update(i, grad_loss(params, batch), opt_state) @jit def private_update(rng, i, opt_state, batch): params = get_params(opt_state) rng = random.fold_in(rng, i) # get new key for new random numbers return opt_update( i, private_grad(params, batch, rng, FLAGS.l2_norm_clip, FLAGS.noise_multiplier, FLAGS.batch_size), opt_state) # END: setup for dp model n_uncertain = FLAGS.n_extra + FLAGS.uncertain_extra ### BEGIN: prepare extra points picked from pool data # BEGIN: on pool data pool_embeddings = [apply_fn_0(params[:-1], pool_images[b_i:b_i + FLAGS.batch_size]) \ for b_i in range(0, n_pool, FLAGS.batch_size)] pool_embeddings = np.concatenate(pool_embeddings, axis=0) pool_logits = apply_fn_1(params[-1:], pool_embeddings) pool_true_labels = np.argmax(pool_labels, axis=1) pool_predicted_labels = np.argmax(pool_logits, axis=1) pool_correct_indices = \ onp.where(pool_true_labels == pool_predicted_labels)[0] pool_incorrect_indices = \ onp.where(pool_true_labels != pool_predicted_labels)[0] assert len(pool_correct_indices) + \ len(pool_incorrect_indices) == len(pool_labels) pool_probs = stax.softmax(pool_logits) if FLAGS.uncertain == 0 or FLAGS.uncertain == 'entropy': pool_entropy = -onp.sum(pool_probs * onp.log(pool_probs), axis=1) stdout_log.write('all {} entropy: min {}, max {}\n'.format( len(pool_entropy), onp.min(pool_entropy), onp.max(pool_entropy))) pool_entropy_sorted_indices = onp.argsort(pool_entropy) # take the n_uncertain most uncertain points pool_uncertain_indices = \ pool_entropy_sorted_indices[::-1][:n_uncertain] stdout_log.write('uncertain {} entropy: min {}, max {}\n'.format( len(pool_entropy[pool_uncertain_indices]), onp.min(pool_entropy[pool_uncertain_indices]), onp.max(pool_entropy[pool_uncertain_indices]))) elif FLAGS.uncertain == 1 or FLAGS.uncertain == 'difference': # 1st_prob - 2nd_prob assert len(pool_probs.shape) == 2 sorted_pool_probs = onp.sort(pool_probs, axis=1) pool_probs_diff = sorted_pool_probs[:, -1] - sorted_pool_probs[:, -2] assert min(pool_probs_diff) > 0. pool_uncertain_indices = onp.argsort(pool_probs_diff)[:n_uncertain] # END: on pool data # BEGIN: cluster uncertain pool points big_pca = sklearn.decomposition.PCA(n_components=pool_embeddings.shape[1]) big_pca.random_state = FLAGS.seed # fit PCA onto embeddings of all the pool points big_pca.fit(pool_embeddings) # For uncertain points, project embeddings onto the first K components pool_uncertain_projected_embeddings, _ = utils.project_embeddings( pool_embeddings[pool_uncertain_indices], big_pca, FLAGS.k_components) n_cluster = int(FLAGS.n_extra / FLAGS.ppc) cluster_method = get_cluster_method('{}_nc-{}'.format( FLAGS.clustering, n_cluster)) cluster_method.random_state = FLAGS.seed pool_uncertain_cluster_labels = cluster_method.fit_predict( pool_uncertain_projected_embeddings) pool_uncertain_cluster_label_indices = { x: [] for x in set(pool_uncertain_cluster_labels) } # local i within n_uncertain for i, c_label in enumerate(pool_uncertain_cluster_labels): pool_uncertain_cluster_label_indices[c_label].append(i) # find center of each cluster # aka, the most representative point of each 'tough' cluster pool_picked_indices = [] pool_uncertain_cluster_label_pick = {} for c_label, indices in pool_uncertain_cluster_label_indices.items(): cluster_projected_embeddings = \ pool_uncertain_projected_embeddings[indices] cluster_center = onp.mean(cluster_projected_embeddings, axis=0, keepdims=True) if FLAGS.distance == 0 or FLAGS.distance == 'euclidean': cluster_distances = euclidean_distances( cluster_projected_embeddings, cluster_center).reshape(-1) elif FLAGS.distance == 1 or FLAGS.distance == 'weighted_euclidean': cluster_distances = weighted_euclidean_distances( cluster_projected_embeddings, cluster_center, big_pca.singular_values_[:FLAGS.k_components]) sorted_is = onp.argsort(cluster_distances) sorted_indices = onp.array(indices)[sorted_is] pool_uncertain_cluster_label_indices[c_label] = sorted_indices center_i = sorted_indices[0] # center_i in 3000 pool_uncertain_cluster_label_pick[c_label] = center_i pool_picked_indices.extend( pool_uncertain_indices[sorted_indices[:FLAGS.ppc]]) # BEGIN: visualize cluster of picked pool if FLAGS.visualize: this_cluster = [] for i in sorted_indices: idx = pool_uncertain_indices[i] img = utils.denormalize(pool_images[idx], train_mu, train_std) if idx in pool_correct_indices: border_color = 'green' else: border_color = 'red' img = utils.mark_labels(img, pool_predicted_labels[idx], pool_true_labels[idx]) img = utils.add_border(img, width=2, color=border_color) this_cluster.append(img) utils.tile_image_list( this_cluster, '{}/picked_uncertain_pool_cid-{}'.format( FLAGS.work_dir, c_label)) # END: visualize cluster of picked pool # END: cluster uncertain pool points pool_picked_indices = list(set(pool_picked_indices)) n_gap = FLAGS.n_extra - len(pool_picked_indices) gap_indices = list(set(pool_uncertain_indices) - set(pool_picked_indices)) pool_picked_indices.extend(npr.choice(gap_indices, n_gap, replace=False)) stdout_log.write('n_gap: {}\n'.format(n_gap)) ### END: prepare extra points picked from pool data finetune_images = copy.deepcopy(pool_images[pool_picked_indices]) finetune_labels = copy.deepcopy(pool_labels[pool_picked_indices]) stdout_log.write('{} points picked via {}\n'.format( len(finetune_images), FLAGS.uncertain)) logging.info('%d points picked via %s', len(finetune_images), FLAGS.uncertain) assert FLAGS.n_extra == len(finetune_images) # END: gather points to be used for finetuning stdout_log.write('Starting fine-tuning...\n') logging.info('Starting fine-tuning...') stdout_log.flush() for epoch in range(1, FLAGS.epochs + 1): # BEGIN: finetune model with extra data, evaluate and save num_extra = len(finetune_images) num_complete_batches, leftover = divmod(num_extra, FLAGS.batch_size) num_batches = num_complete_batches + bool(leftover) finetune = data.DataChunk(X=finetune_images, Y=finetune_labels, image_size=32, image_channels=3, label_dim=1, label_format='numeric') batches = data.minibatcher(finetune, FLAGS.batch_size, transform=augmentation) itercount = itertools.count() key = random.PRNGKey(FLAGS.seed) start_time = time.time() for _ in range(num_batches): # tmp_time = time.time() b = next(batches) if FLAGS.dpsgd: opt_state = private_update( key, next(itercount), opt_state, shape_as_image(b.X, b.Y, dummy_dim=True)) else: opt_state = update(key, next(itercount), opt_state, shape_as_image(b.X, b.Y)) # stdout_log.write('single update in {:.2f} sec\n'.format( # time.time() - tmp_time)) epoch_time = time.time() - start_time stdout_log.write('Epoch {} in {:.2f} sec\n'.format(epoch, epoch_time)) logging.info('Epoch %d in %.2f sec', epoch, epoch_time) # accuracy on test data params = get_params(opt_state) test_pred_0 = test_pred test_acc, test_pred = accuracy(params, shape_as_image(test_images, test_labels), return_predicted_class=True) test_loss = loss(params, shape_as_image(test_images, test_labels)) stdout_log.write( 'Eval set loss, accuracy (%): ({:.2f}, {:.2f})\n'.format( test_loss, 100 * test_acc)) logging.info('Eval set loss, accuracy: (%.2f, %.2f)', test_loss, 100 * test_acc) stdout_log.flush() # visualize prediction difference between 2 checkpoints. if FLAGS.visualize: utils.visualize_ckpt_difference(test_images, np.argmax(test_labels, axis=1), test_pred_0, test_pred, epoch - 1, epoch, FLAGS.work_dir, mu=train_mu, sigma=train_std) # END: finetune model with extra data, evaluate and save stdout_log.close()
from_) altInpMsg, update_id, from_ = getMessage(update_id, from_) altInpMsg = altInpMsg.strip().lower().split() if "yes" in altInpMsg or "yup" in altInpMsg: ansMatch = match(inputMsg, "data/final_concat.csv") bot.sendMessage(ansMatch, from_) altInpMsg = '' elif "no" in altInpMsg or "nope" in altInpMsg: bot.sendMessage("Your choice.. I was just trying to help.", from_) altInpMsg = '' else: continue else: bot.sendMessage(ans, from_) randAltAns = choice(list(altans.keys()), size=2, replace=False) if len(altans) != 0: QUES = "Do you wish to know more about " # alternate answers bot.sendMessage(QUES + ", ".join(randAltAns) + "?", from_) altInpMsg, update_id, from_ = getMessage(update_id, from_) altInpMsg = altInpMsg.strip().lower().split() if ("yes" in altInpMsg): string = "" if len(altans1) != 0: for w in randAltAns: if w in altInpMsg: string += altans[w] + "\n" if string == "": for w in randAltAns: string += altans[w] + "\n"
def meta_strategy(self, results, opponent): """Using the numpy.random choice function to sample with weights""" return choice(results, p=self.distribution)
import pandas as pd from scipy.spatial.distance import pdist,squareform from scipy import exp from scipy.linalg import eigh Dataset_originall=pd.read_csv('../parkinson.csv')#ファイルの読み込み Dataset=Dataset_originall.values#DataframeオブジェクトをNumpy配列に入れる m=5#特徴空間の射影は次元でとってくるのか sigma=10#カーネル関数の分散 beta=0.5#ガウス回帰の値 #42このデータのうち30個を学習に、12個をテストに使う from numpy import random Domain_number=30 training_person=random.choice(list(range(1,43)),Domain_number,replace=False)#重複なしに取り出す training_person.sort()#ガウス過程で突っ込む際に順番に並んであった方がわかりやすいので #まず各人のデータを区別して保存していきます # # # Training_Dataset=[]#トレーニングデータ用の人のデータをここに格納していきます for i in training_person: X=Dataset_originall[Dataset_originall['subject#']==i]#subjectが1(1番の人)全体をdataframeそのものとして取り出す X1=[X.iloc[:,4].values,X.iloc[:,5].values,X.iloc[:,6:].values]#取り出したDataFrameからラベルyと特徴量Xを取り出す Training_Dataset.append(X1) #[[1番目の人のデータ]、[2番目の人のデータ]・・・・]とリストになっている #[1番目の人のデータ]=[array[yの値*149個分]、array[y2の値*149]、array[Xの値[[149],[16]]と格納されている。 #testdataの方も格納していきます
from pandas import array xs = array([1, 2, 3]) print(f'{xs * 2 = }') xs = array([1, 2, 3, nan, nan]) print(f'{xs * 2 = }') from pandas import Series s = Series([1, 2, 3], index=[2, 1, 0]) print(s) print(f'{s[0] = }') print(f'{s[0:1] = }') print(f'{s.loc[0] = }') print(f'{s.iloc[0] = }') from pandas import DataFrame from numpy.random import choice, normal from string import ascii_lowercase df = DataFrame({ 'ticker': choice([*ascii_lowercase], size=((size:=10), 4)).view('<U4').ravel(), 'price': normal(size=size) }) df = df.set_index('ticker') print(df) print(df.index) print(df.columns) print(df._data) print(df.stack()) print(df.unstack()) print(df.melt())
def sample(self, X, Y, params): sample_size = int(self.minibatch_frac * len(Y)) idxs = np_rnd.choice(np.arange(len(Y)), sample_size, replace=False) return idxs, X[idxs, :], Y[idxs], params[idxs, :]
def makeComboPics(vcf_list, sample_list, outdir, bam_dir, samplot_directory, bcftools_executable, num_pics, num_samps, ref_id, length_threshold=100000): for i in vcf_list: if os.path.exists(i[1]): if i[1].endswith("vcf"): suf = i[0] vcf = VCF(i[1]) vcf_dir = i[1].split("/")[-1].replace(".vcf", "_combos") # pdb.set_trace() Outdir = f"{outdir}/{vcf_dir}" if not os.path.exists(Outdir): os.mkdir(Outdir) if sample_list == "-9": samps = vcf.samples else: samps = sample_list.split(",") for variant in vcf: svtype = variant.INFO.get('SVTYPE') if svtype == "CNV": svtype = variant.INFO.get('GSCNCATEGORY') svLen = variant.INFO.get('GSELENGTH') if svtype == "None": print( "Change Type to String for GSCNCATEGORY in VCF header" ) genos = variant.format('CN').tolist() genos = [x[0] for x in genos] if variant.format('FT') is not None: filts = [ j for j, x in enumerate(variant.format('FT')) if x != "PASS" ] else: filts = [] if samps.index(ref_id) in filts: continue else: ref_allele = genos[samps.index(ref_id)] genos = [ 0 if x == ref_allele else 3 for x in genos ] genos = [ -9 if j in filts else x for j, x in enumerate(genos) ] else: svLen = variant.INFO.get('SVLEN') genos = variant.gt_types if svLen < length_threshold: alts = [j for j, x in enumerate(genos) if x == 3] refs = [j for j, x in enumerate(genos) if x == 0] if len(alts) > num_samps and len( refs ) > num_samps: #CHANGE NEEDED HERE TO ALLOW FOR 3 AND 3 OR X AND X ALT/REF SAMPS for k in range(0, num_pics): Samps = [ samps[ii] for ii in random.choice( alts, num_samps, replace=False) ] + [ samps[ii] for ii in random.choice( refs, num_samps, replace=False) ] # alt = [samps[i] for i in random.choice(alts, num_samps, replace=False)] # ref = [samps[i] for i in random.choice(refs, num_samps, replace=False)] Bams = [f"{bam_dir}/{x}{suf}" for x in Samps] png_file = f"{svtype}_{variant.CHROM}_{variant.start}_{variant.end}.png" cmd = f"{samplot_directory}/samplot.py -n {','.join(Samps)} -b {','.join(Bams)} -o {Outdir}/{png_file} -s {variant.start} -e {variant.end} -c {variant.CHROM} -a -t {svtype}" print(cmd) elif i[1].endswith("gz"): print("unzip vcf file: ", i[1]) else: print(i[1], "does not exist") return ()
def IBPFM(X, iteration, burnin=0, design=None, stdData=False, initZA=None, initSB=None, initK=None, initF=None, proposeK=True, updateZA=True, updateSB=True, updateF=True, nonGaussianF=False, updateIBPa_fm=True, updateIBPa_reg=True, updatePrec_x=True, updatePrec_a=True, updatePrec_b=True, prec_x_iso=False, learn_scale_xb=True, prec_a_iso=False, learn_scale_ab=True, prec_b_iso=False, learn_scale_bb=True, prec_x=None, prec_xa=1, prec_xb=1, prec_xb_a=1, prec_xb_b=1, prec_a=None, prec_aa=1, prec_ab=1, prec_ab_a=1, prec_ab_b=1, prec_b=None, prec_ba=1, prec_bb=1, prec_bb_a=1, prec_bb_b=1, fmIBPa=None, fmIBPa_a=1, fmIBPa_b=1, regIBPa=None, regIBPa_a=1, regIBPa_b=1, DPa=None, DPa_a=1, DPa_b=1, saveIteration=False, printIteration=100): """Factor model with IBP prior using Gibbs sampler.""" # Model: X = BH + (Z o A)F + noise # @X: (D x N) data matrix # @B: (D x P) regression coefficient for observed covariates # @H: (P x N) design matrix for observed covariates # @Z: (D x K) binary factor assignment matrix # @A: (D x K) factor loading matrix # @noise: (D x N) residuals # @iteration: # of simulation # @data: (D x N) data matrix # @design: (D x P) design matrix for covariates # OPTIONAL ARGUMENTS # @stdData: standardize data if necessary # @initZA: initial state of (Z o A) matrix. override initK if it's not None # @initK: initial number of features # @initF: initial state of F matrix # @proposeK: enable non-parametric approach for feature count K # @updateZA: update matrix Z and A # @updateF: update matrix F # @nonGaussianF: use Dirichlet process for F # @updatePrec_x: update inverse of residual variance # @updatePrec_a: update inverse of factor loading variance # #prec_x_iso: use isotropic residual variance? # @prec_a_iso: use isotropic factor loading variance? # @updateIBPa: update IBP parameter # @prec_x: initial state of prec_x (scalar) # @prec_xa: Gamma shape parameter for P(prec_x) # @prec_xb: Gamma rate parameter for P(prec_x) # @prec_xb_a: Gamma shape parameter for P(prec_xb) # @prec_xb_b: Gamma rate parameter for P(prec_xb) # @prec_a: initial state of prec_a (scalar) # @prec_aa: Gamma shape parameter for P(prec_a) # @prec_ab: Gamma rate parameter for P(prec_a) # @prec_ab_a: Gamma shape parameter for P(prec_ab) # @prec_ab_b: Gamma rate parameter for P(prec_ab) # @fmIBPa: IBP alpha Parameter for factor model part # @fmIBPa_a: Gamma shape parameter for P(fmIBPa) # @fmIBPa_b: Gamma rate parameter for P(fmIBPa) # @regIBPa: IBP alpha Parameter for regression part # @regIBPa_a: Gamma shape parameter for P(regIBPa) # @regIBPa_b: Gamma rate parameter for P(regIBPa) # @DPa: concentration parameter for Dirichlet process # @DPa_b: Gamma shape parameter for P(DPa) # @DPa_b: Gamma rate parameter for P(DPa) # @saveIteration: save output for each iteration as file D, N = X.shape # Create a matrix with missing indicators Xmask = np.isnan(X).astype(np.int) if stdData: X = (X - np.nanmean(X, axis=1, keepdims=True)) /\ np.nanstd(X, axis=1, keepdims=True) # Initialize noise variance from Gamma prior if prec_x is None: prec_x = np.ones(D) * nr.gamma(prec_xa, 1. / prec_xb) else: prec_x = np.ones(D) * prec_x # Initialize IBP parameter alpha from Gamma prior if fmIBPa is None: fmIBPa = nr.gamma(fmIBPa_a, fmIBPa_b) if design is not None and regIBPa is None: regIBPa = nr.gamma(regIBPa_a, regIBPa_b) if initZA is None: # Generate binary feature assignment matrix Z if initK is not None: K = initK Z = nr.binomial(1, 0.5, (D, K)) else: Z = simulateIBP(regIBPa, D) Z = Z.astype(np.int) K = Z.shape[1] # Initialize feature loading variance from Gamma prior if prec_a is None: prec_a = np.ones(K) * nr.gamma(prec_aa, 1. / prec_ab) else: prec_a = np.ones(K) * prec_a # Simulate feature loading matrix A based on N(A_dk | 0, sigma_a) A = np.copy(Z) A = A.astype(np.float) for (d, k) in zip(*A.nonzero()): A[d, k] = nr.normal(0, np.sqrt(1 / prec_a[k])) else: A = initZA.astype(np.float) Z = (A != 0).astype(np.int) K = Z.shape[1] # Initialize feature loading variance from Gamma prior if prec_a is None: prec_a = np.ones(K) * nr.gamma(prec_aa, 1. / prec_ab) else: prec_a = np.ones(K) * prec_a # Check A is float type and Z is integer type assert (np.issubsctype(A, np.float) and np.issubsctype(Z, np.int)) # Initialize feature score matrix F if initF is None: if nonGaussianF: from utils.sampleF_DP import sampleF if DPa is None: # Draw DP concentration parameter DPa = nr.gamma(DPa_a, 1. / DPa_b) while True: # Redraw DP concentration parameter until E(# of cluster) > 0 J = int(DPa * np.log(N)) if (J > 0): break else: DPa = nr.gamma(DPa_a, 1. / DPa_b) # Initialize cluster indicators for samples clus_ind = nr.choice(range(J), size=N) # Initialize unique factor vectors clus_theta = nr.normal(0, 1, (K, J)) F = np.empty((K, N)) * np.nan for i in range(N): F[:, i] = clus_theta[:, clus_ind[i]] else: from utils.sampleF import sampleF F = nr.normal(0, 1, size=(K, N)) else: F = initF.astype(np.float) assert (initF.shape == (K, N)) if nonGaussianF: from utils.sampleF_DP import sampleF if (DPa is None): # Draw DP concentration parameter DPa = nr.gamma(DPa_a, 1. / DPa_b) # Extract unique score vectors and class indicators clus_theta = np.vstack({tuple(row) for row in F.T}).T assert (clus_theta.shape[0] == K) J = clus_theta.shape[1] clus_ind = np.array([], dtype=np.int) for i in range(N): label = np.where((F[:, [i]] == clus_theta).all(axis=0)) clus_ind = np.append(clus_ind, label) assert (len(clus_ind) == N) else: from utils.sampleF import sampleF # Impute missing values if they exist if np.sum(Xmask) > 0: Xtemp = np.dot(A, F) X[Xmask == 1] = Xtemp[Xmask == 1] nonMissing = X[Xmask == 0] # Regression components for design and control variables if design is not None: H = design P = H.shape[0] assert (H.shape[1] == N) if initSB is None: # Generate binary feature assignment matrix Z intercept = np.ones(D).reshape(D, 1) S1 = nr.binomial(1, 0.5, (D, P - 1)) S = np.hstack((intercept, S1)).astype(np.int) # Initialize feature loading variance from Gamma prior if prec_b is None: prec_b = np.ones(P) * nr.gamma(prec_ba, 1. / prec_bb) else: prec_b = np.ones(P) * prec_b # Simulate coefficient matrix B based on N(B_dk | 0, sigma_b) B = np.copy(S) B = S.astype(np.float) mean = np.nanmean(X, axis=1) for (d, p) in zip(*B.nonzero()): if p == 0: # Start intercept with variable mean B[d, p] = mean[d] else: B[d, p] = nr.normal(0, np.sqrt(1 / prec_b[p])) else: B = initZA.astype(np.float) S = (B != 0).astype(np.int) assert (B.shape == (D, P)) # Initialize feature loading variance from Gamma prior if prec_b is None: prec_b = np.ones(P) * nr.gamma(prec_ba, 1. / prec_bb) else: prec_b = np.ones(P) * prec_b # Check B is float type and S is integer type assert (np.issubsctype(B, np.float) and np.issubsctype(S, np.int)) for s in range(iteration): # Save initial parameters if (s == 0): K_save = K fmIBPa_save = fmIBPa if design is not None: regIBPa_save = regIBPa psi_x_save = 1. / prec_x if nonGaussianF: DPa_save = DPa if design is not None: loglikelihood = logLik(X, F, A, prec_x, N, D, prec_x_iso, H, B) else: loglikelihood = logLik(X, F, A, prec_x, N, D, prec_x_iso, H=None, B=None) logLik_save = loglikelihood if proposeK is False: tau_a_save = 1. / prec_a Z_sum = np.zeros((D, K)) A_sum = np.zeros((D, K)) F_sum = np.zeros((K, N)) if design is not None: S_sum = np.zeros((D, P)) B_sum = np.zeros((D, P)) time = datetime.datetime.now() print("=========================================================") print("Started at " + str(time)) print("Data shape: observations = %d\t variables = %d" % (N, D)) print("K = %d\tIBP_alpha = %.3f" % (K, fmIBPa)) print("=========================================================") # Update coefficient matrix if design is not None: X_reg = X - np.dot(A, F) if updateSB: (S, B) = sampleSB(X_reg, H, S, B, prec_x, prec_b, regIBPa, P, N, D, prec_ba, prec_bb, prec_b_iso) X_fm = X - np.dot(B, H) # Update IBP parameter for regression part if updateIBPa_reg: regIBPa = sampleIBPa(regIBPa_a, regIBPa_b, P, D) # Update coefficient variance if updatePrec_b: if (learn_scale_bb and not prec_b_iso): from utils.samplePrec_b import samplePrec_bb prec_bb = samplePrec_bb(P, prec_b, prec_ba, prec_bb, prec_bb_a, prec_bb_b) prec_b = samplePrec_b(X_reg, S, B, P, prec_ba, prec_bb, prec_b_iso) else: X_fm = X # Update factor assignment matrix Z and factor loading matrix A if updateZA: (F, Z, A, K, prec_a) = sampleZA(X_fm, F, Z, A, prec_x, prec_a, fmIBPa, K, N, D, proposeK, prec_aa, prec_ab, prec_a_iso) # Update factor score matrix if updateF and nonGaussianF: (F, clus_ind, clus_theta, J, DPa) = sampleF(X_fm, F, A, prec_x, clus_ind, clus_theta, J, N, D, K, DPa, DPa_a, DPa_b) if updateF and not nonGaussianF: F = sampleF(X_fm, A, prec_x, N, D, K) # Update factor loading variance if updatePrec_a: if (learn_scale_ab and not prec_a_iso): from utils.samplePrec_a import samplePrec_ab prec_ab = samplePrec_ab(K, prec_a, prec_aa, prec_ab, prec_ab_a, prec_ab_b) prec_a = samplePrec_a(X_fm, Z, A, K, prec_aa, prec_ab, prec_a_iso) # Update IBP parameter for factor model part if updateIBPa_fm: fmIBPa = sampleIBPa(fmIBPa_a, fmIBPa_b, K, D) # Update residual variance if updatePrec_x: if (learn_scale_xb and not prec_x_iso): from utils.samplePrec_x import samplePrec_xb prec_xb = samplePrec_xb(D, prec_x, prec_xa, prec_xb, prec_xb_a, prec_xb_b) if design is not None: residue = X - np.dot(B, H) - np.dot(A, F) else: residue = X - np.dot(A, F) prec_x = samplePrec_x(residue, N, D, prec_xa, prec_xb, prec_x_iso) # Update missing values based on posterior distribution if np.sum(Xmask > 0): # Predictive mean if design is not None: Xpred = np.dot(B, H) + np.dot(A, F) else: Xpred = np.dot(A, F) # Add noise covNoise = np.diag(1. / prec_x) noise = nr.multivariate_normal(np.zeros(D), covNoise, N).T Xpred += noise # Update missing values X[Xmask == 1] = Xpred[Xmask == 1] assert (all(nonMissing == X[Xmask == 0])) if design is not None: loglikelihood = logLik(X, F, A, prec_x, N, D, prec_x_iso, H, B) else: loglikelihood = logLik(X, F, A, prec_x, N, D, prec_x_iso, H=None, B=None) if (s + 1) % printIteration == 0: print("Iteration %d: K = %d\tIBP_alpha = %.3f\tlogLik= %.3f" % ((s + 1), K, fmIBPa, loglikelihood)) # Save parameters for each iteration K_save = np.append(K_save, K) fmIBPa_save = np.append(fmIBPa_save, fmIBPa) if design is not None: regIBPa_save = np.append(regIBPa_save, regIBPa) psi_x_save = np.vstack((psi_x_save, 1. / prec_x)) if nonGaussianF: DPa_save = np.append(DPa_save, DPa) logLik_save = np.append(logLik_save, loglikelihood) if proposeK is False: tau_a_save = np.vstack((tau_a_save, 1. / prec_a)) # Accumulate Z, A, F to calculate posterior mean if (s >= burnin): Z_sum = Z_sum + Z A_sum = A_sum + A F_sum = F_sum + F if design is not None: S_sum = S_sum + S B_sum = B_sum + B if saveIteration and s >= burnin: saveIter(s, Z, A, F, prec_x, prec_a) fmIBPa_mean = np.mean(fmIBPa_save[(burnin + 1):]) psi_mean = psi_x_save[(burnin + 1):, :].mean(axis=0) np.savetxt("mIBPalpha_Fm.txt", np.array([fmIBPa_mean]), delimiter="\t") np.savetxt("mPsi.txt", psi_mean.reshape(1, psi_mean.shape[0]), delimiter="\t") np.savetxt("logLik.txt", logLik_save, delimiter="\t") if proposeK is False: NMCsample = iteration - burnin Z_mean = Z_sum.astype(np.float) / NMCsample A_mean = A_sum / NMCsample F_mean = F_sum / NMCsample tau_mean = tau_a_save[(burnin + 1):, :].mean(axis=0) np.savetxt("mZ.txt", Z_mean, delimiter="\t") np.savetxt("mA.txt", A_mean, delimiter="\t") np.savetxt("mF.txt", F_mean, delimiter="\t") np.savetxt("mTau.txt", tau_mean.reshape(1, tau_mean.shape[0]), delimiter="\t") if nonGaussianF: DPa_mean = np.mean(DPa_save[(burnin + 1):]) np.savetxt("mDPalpha.txt", np.array([DPa_mean]), delimiter="\t") else: np.savetxt("K.txt", K_save, delimiter="\t") if design is not None: regIBPa_mean = np.mean(regIBPa_save[(burnin + 1):]) np.savetxt("mRegIBPalpha_Reg.txt", np.array([regIBPa_mean]), delimiter="\t") NMCsample = iteration - burnin S_mean = S_sum / NMCsample B_mean = B_sum / NMCsample np.savetxt("mS.txt", S_mean, delimiter="\t") np.savetxt("mB.txt", B_mean, delimiter="\t") return
def __call__(self, image, boxes=None, labels=None): height, width, _ = image.shape while True: # randomly choose a mode mode = random.choice(self.sample_options) if mode is None: return image, boxes, labels min_iou, max_iou = mode if min_iou is None: min_iou = float('-inf') if max_iou is None: max_iou = float('inf') # max trails (50) for _ in range(50): current_image = image w = random.uniform(0.3 * width, width) h = random.uniform(0.3 * height, height) # aspect ratio constraint b/t .5 & 2 if h / w != 1: continue left = random.uniform(width - w) top = random.uniform(height - h) # convert to integer rect x1,y1,x2,y2 rect = np.array( [int(left), int(top), int(left + w), int(top + h)]) # calculate IoU (jaccard overlap) b/t the cropped and gt boxes overlap = object_converage_numpy(boxes, rect) # is min and max overlap constraint satisfied? if not try again if overlap.max() < min_iou or overlap.min() > max_iou: continue # cut the crop from the image current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :] # keep overlap with gt box IF center in sampled patch centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 # mask in all gt boxes that above and to the left of centers m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) # mask in all gt boxes that under and to the right of centers m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) # mask in that both m1 and m2 are true mask = m1 * m2 # have any valid boxes? try again if not if not mask.any(): continue # take only matching gt boxes current_boxes = boxes[mask, :].copy() # take only matching gt labels current_labels = labels[mask] # should we use the box left and top corner or the crop's current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2]) # adjust to crop (by substracting crop's left,top) current_boxes[:, :2] -= rect[:2] current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:]) # adjust to crop (by substracting crop's left,top) current_boxes[:, 2:] -= rect[:2] return current_image, current_boxes, current_labels
def _get_any_hidden_neuron(self) -> HiddenNeuron: return choice(self.hidden_neurons)
def train(self): s_size = int(self.pop_size / self.pp) sf_pop = [self.create_solution() for _ in range(0, self.pop_size)] s_pop = [self.create_solution() for _ in range(0, s_size)] sf_gbest = self.get_global_best_solution(sf_pop, self.ID_FIT, self.ID_MIN_PROB) s_gbest = self.get_global_best_solution(s_pop, self.ID_FIT, self.ID_MIN_PROB) for epoch in range(0, self.epoch): ## Calculate lamda_i using Eq.(7) ## Update the position of sailfish using Eq.(6) for i in range(0, self.pop_size): PD = 1 - len(sf_pop) / (len(sf_pop) + len(s_pop)) lamda_i = 2 * uniform() * PD - PD sf_pop[i][self.ID_POS] = s_gbest[self.ID_POS] - lamda_i * ( uniform() * (sf_gbest[self.ID_POS] + s_gbest[self.ID_POS]) / 2 - sf_pop[i][self.ID_POS]) ## Calculate AttackPower using Eq.(10) AP = self.A * (1 - 2 * (epoch + 1) * self.epxilon) if AP < 0.5: alpha = int(len(s_pop) * abs(AP)) beta = int(self.problem_size * abs(AP)) ### Random choice number of sardines which will be updated their position list1 = choice(range(0, len(s_pop)), alpha) for i in range(0, len(s_pop)): if i in list1: #### Random choice number of dimensions in sardines updated, remove third loop by numpy vector computation list2 = choice(range(0, self.problem_size), beta, replace=False) s_pop[i][self.ID_POS][list2] = ( uniform(0, 1, self.problem_size) * (sf_gbest[self.ID_POS] - s_pop[i][self.ID_POS] + AP))[list2] else: ### Update the position of all sardine using Eq.(9) for i in range(0, len(s_pop)): s_pop[i][self.ID_POS] = uniform() * ( sf_gbest[self.ID_POS] - s_pop[i][self.ID_POS] + AP) ## Recalculate the fitness of all sardine for i in range(0, len(s_pop)): s_pop[i][self.ID_FIT] = self.get_fitness_position( s_pop[i][self.ID_POS], self.ID_MIN_PROB) ## Sort the population of sailfish and sardine (for reducing computational cost) sf_pop = sorted(sf_pop, key=lambda temp: temp[self.ID_FIT]) s_pop = sorted(s_pop, key=lambda temp: temp[self.ID_FIT]) for i in range(0, self.pop_size): for j in range(0, len(s_pop)): ### If there is a better position in sardine population. if sf_pop[i][self.ID_FIT] > s_pop[j][self.ID_FIT]: sf_pop[i] = deepcopy(s_pop[j]) del s_pop[j] break #### This simple keyword helped reducing ton of comparing operation. #### Especially when sardine pop size >> sailfish pop size s_pop = s_pop + [ self.create_solution() for _ in range(0, s_size - len(s_pop)) ] sf_gbest = self.update_global_best_solution( sf_pop, self.ID_MIN_PROB, sf_gbest) s_gbest = self.update_global_best_solution(s_pop, self.ID_MIN_PROB, s_gbest) self.loss_train.append(sf_gbest[self.ID_FIT]) if self.verbose: print("> Epoch: {}, Best fit: {}".format( epoch + 1, sf_gbest[self.ID_FIT])) self.solution = sf_gbest return sf_gbest[self.ID_POS], sf_gbest[self.ID_FIT], self.loss_train
def _get_any_axon(self) -> Axon: any_neuron = self._get_any_non_input_neuron() if any_neuron.incoming_axons: return choice(any_neuron.incoming_axons) return None
def _get_any_non_output_neuron(self) -> Neuron: return choice(self.input_neurons + self.hidden_neurons)
def run_simulation(repeat): infection_reporting_prob = npr.choice([0.1, 0.2, 0.3, 0.4, 0.5]) haz_rate_scale = pairs_dict[infection_reporting_prob] contact_tracing_success_prob = npr.uniform(0.7, 0.95) contact_trace_delay_par = npr.uniform(1.5, 2.5) reduce_contacts_by = npr.uniform(0.0, 0.9) # Scenario A # reduce_contacts_by=(0.68, 0.83, 0.83, 0.821, 0.846, 0.836) # Scenario B # reduce_contacts_by=(0.638, 0.786, 0.76, 0.733, 0.765, 0.755) # Scenario C # reduce_contacts_by=(0.628, 0.76, 0.685, 0.632, 0.668, 0.668) #Scenario D # reduce_contacts_by=(0.561, 0.698, 0.61, 0.543, 0.589, 0.577) # Scenario E # reduce_contacts_by = (0.413, 0.544, 0.393, 0.278, 0.348, 0.315) #do_2_step = npr.choice([True, False]) prob_has_trace_app = npr.uniform(0, 0.5) backwards_trace = True probable_infections_need_test = False backwards_tracing_time_limit = npr.choice(list(range(7, 22))) simulation = hct.uk_model(haz_rate_scale=haz_rate_scale, household_haz_rate_scale=0.77729, contact_tracing_success_prob=contact_tracing_success_prob, contact_trace_delay_par=contact_trace_delay_par, overdispersion=0.36, infection_reporting_prob=infection_reporting_prob, contact_trace=True, reduce_contacts_by=reduce_contacts_by, test_before_propagate_tracing=False, probable_infections_need_test=probable_infections_need_test, backwards_trace=backwards_trace, backwards_tracing_time_limit=float('inf'), number_of_days_to_trace_forwards=float('inf'), number_of_days_to_trace_backwards=float('inf'), prob_has_trace_app=prob_has_trace_app, starting_infections=starting_infections) simulation.run_simulation(days_to_simulate) parameters = [ haz_rate_scale, infection_reporting_prob, contact_tracing_success_prob, contact_trace_delay_par, reduce_contacts_by, prob_has_trace_app, backwards_trace, probable_infections_need_test ] return(parameters + simulation.inf_counts)
def _get_any_neuron(self) -> Neuron: return choice(self.get_all_neurons())
def add_places(self, places, p): draw = choice(np.arange(len(places)), 200, p=p) self.places = [places[i] for i in draw]
def _sample_graph(roidb, num_fg_rois, num_rois, rels_per_image): """ Sample a graph from the foreground rois of an image :param: roidb: roidb of an image rois_per_image: maximum number of rois per image :return: roi_inds: 1d-array, the indexes of rois that are considered in the sampled graph. fg:bg ~ 1:3, fg may less than num_fg_rois(32) rels: (N, 3)-array for (sub, obj, rel), N is not certain. negative rel is no more than num_neg_rels """ gt_rels = roidb['gt_relations'] # index of assigned gt box for foreground boxes fg_gt_ind_assignments = roidb['fg_gt_ind_assignments'] # find all fg proposals that are mapped to a gt gt_to_fg_roi_inds = {} all_fg_roi_inds = [] for ind, gt_ind in fg_gt_ind_assignments.items(): if gt_ind not in gt_to_fg_roi_inds: gt_to_fg_roi_inds[gt_ind] = [] gt_to_fg_roi_inds[gt_ind].append(ind) all_fg_roi_inds.append(ind) # print('gt rois = %i' % np.where(roidb['max_overlaps']==1)[0].shape[0]) # print('assigned gt = %i' % len(gt_to_fg_roi_inds.keys())) # dedup the roi inds all_fg_roi_inds = np.array(list(set(all_fg_roi_inds))) # find all valid relations in fg objects pos_rels = [] for rel in gt_rels: for sub_i in gt_to_fg_roi_inds[rel[0]]: for obj_i in gt_to_fg_roi_inds[rel[1]]: pos_rels.append([sub_i, obj_i, rel[2]]) # print('num fg rois = %i' % all_fg_roi_inds.shape[0]) rels = [] rels_inds = [] roi_inds = [] if len(pos_rels) > 0: # de-duplicate the relations _, indices = np.unique(["{} {}".format(i, j) for i,j,k in pos_rels], return_index=True) pos_rels = np.array(pos_rels)[indices, :] pos_inds = pos_rels[indices, :2].tolist() #print('num pos rels = %i' % pos_rels.shape[0]) # construct graph based on valid relations for rel in pos_rels: roi_inds += rel[:2].tolist() roi_inds = list(set(roi_inds)) # keep roi inds unique rels.append(rel) rels_inds.append(rel[:2].tolist()) if len(roi_inds) >= num_fg_rois or len(rels_inds) >= rels_per_image: # here it usually limit the num of pos rel break #print('sampled rels = %i' % len(rels)) roi_candidates = np.setdiff1d(all_fg_roi_inds, roi_inds) num_rois_to_sample = min(num_fg_rois - len(roi_inds), len(roi_candidates)) # if not enough rois, sample fg rois if num_rois_to_sample > 0: roi_sample = npr.choice(roi_candidates.astype(np.int32), size=num_rois_to_sample, replace=False) roi_inds = np.hstack([roi_inds, roi_sample]) #print('sampled fg rois = %i' % num_rois_to_sample) # sample background relations sample_rels = [] sample_rels_inds = [] for i in roi_inds: for j in roi_inds: if i != j and [i, j] not in rels_inds and [i, j] not in pos_inds: sample_rels.append([i,j,0]) sample_rels_inds.append([i,j]) #print('background rels= %i' % len(sample_rels)) if len(sample_rels) > 0: # randomly sample negative edges to prevent no edges num_neg_rels = np.minimum(len(sample_rels), rels_per_image-len(rels_inds)) #fprint('sampled background rels= %i' % num_neg_rels) inds = npr.choice(np.arange(len(sample_rels)), size=num_neg_rels, replace=False) rels += [sample_rels[i] for i in inds] rels_inds += [sample_rels_inds[i] for i in inds] # if still not enough rois, sample bg rois num_rois_to_sample = num_rois - len(roi_inds) if num_rois_to_sample > 0: bg_roi_inds = _sample_bg_rois(roidb, num_rois_to_sample) roi_inds = np.hstack([roi_inds, bg_roi_inds]) roi_inds = np.array(roi_inds).astype(np.int64) # print('sampled rois = %i' % roi_inds.shape[0]) return roi_inds.astype(np.int64), np.array(rels).astype(np.int64)