def get_optimizer(model): """This is where users choose their optimizer and define the hyperparameter space they'd like to search.""" optimizer_class = optim.SGD lr = choose(np.logspace(-5, 0, base=10)) momentum = choose(np.linspace(0.1, .9999)) return optimizer_class(model.parameters(), lr=lr, momentum=momentum)
def train_pairwise_model2(motif, pc=1 / 16.0, decay_timescale=10000, take_stock=1000, eta=0.01, stop_crit=0.01): L = len(motif[0]) N = len(motif) fs = get_pairwise_freqs(motif, pc=pc) ws = [{(b1, b2): 0 for (b1, b2) in dinucs} for _ in range(int(choose(L, 2)))] iteration = 0 while True: cur_motif = [ sample_model(ws, x0=site, iterations=10 * L)[-1] for site in motif ] current_fs = get_pairwise_freqs(cur_motif) sse = 0 for w, f, cur_f in zip(ws, fs, current_fs): for b1, b2 in dinucs: delta = f[b1, b2] - cur_f[b1, b2] sse += delta**2 w[b1, b2] += eta * ( delta) #* exp(-iteration/float(decay_timescale)) #sses[iteration/take_stock] = sse sse_per_col_pair = sse / choose(L, 2) print iteration, sse_per_col_pair, ws[0]['A', 'A'] print "motif_ic:", motif_ic(cur_motif) if iteration > 0 and sse_per_col_pair < stop_crit: print "breaking:", sse, sse_per_col_pair break iteration += 1 return ws
def get_optimizer(): global tf """This is where users choose their optimizer and define the hyperparameter space they'd like to search.""" optimizer_class = tf.keras.optimizers.SGD lr = choose(np.logspace(-5, 0, base=10)) momentum = choose(np.linspace(0.1, .9999)) return optimizer_class(lr=lr, momentum=momentum)
def create_game_data(word_size: int = 8, min_words: int = 7, max_words: int = 15) -> ([str], str): words = tuple(words_of_size(word_size)) chosen_words = { w: None for w in choose(random.randint(min_words, max_words), words) } return chosen_words, next(choose(1, chosen_words.keys()))
def get_optimizer(): import numpy as np from tensorflow.keras.optimizers import SGD from utils import choose """This is where users choose their optimizer and define the hyperparameter space they'd like to search.""" optimizer_class = SGD lr = choose(np.logspace(-5, 0, base=10)) momentum = choose(np.linspace(0.1, .9999)) return optimizer_class(lr=lr, momentum=momentum)
def num_genotypes(s,r,n,L): """return number of genotypes with r sites in recognizer, s in motif recognized""" K = 4**L num_recs_with_r = choose(K,r) # perms_of_rec_sites = N_dig_balls_in_k_bins(s,r) # perms_of_unrec_sites = N_dig_balls_in_k_bins(n-s,(4**L)-r) # num_motifs = perms_of_rec_sites * perms_of_unrec_sites * choose(n,s) num_motifs = (r**s)*((K-r)**(n-s)) ans = num_recs_with_r * num_motifs*choose(n,s) # why this extra factor of choose(n,s)??? print "s,r,n,L:",s,r,n,L,"num_recs_with_r:",num_recs_with_r,"num_motifs:",num_motifs,"ans:",ans return ans
def train_pairwise_model(motif, pc=1 / 16.0, decay_timescale=10000, take_stock=1000, eta=0.01, stop_crit=0.01): L = len(motif[0]) N = len(motif) fs = get_pairwise_freqs(motif, pc=pc) ws = [{(b1, b2): 0 for (b1, b2) in dinucs} for _ in range(int(choose(L, 2)))] x = random_site(L) log_y = score(ws, x) chain = [] # sses = [0.0] * (int(iterations/take_stock) + 1) #chain = [] #for iteration in xrange(iterations): iteration = 0 stock_counter = take_stock while True: xp = mutate_site(x) log_yp = score(ws, xp) if log(random.random()) < log_yp - log_y: x = xp log_y = log_yp chain.append(x) if iteration > 0 and iteration % stock_counter == 0: current_fs = get_pairwise_freqs( sample(N, chain[iteration - stock_counter:iteration], replace=False)) sse = 0 for w, f, cur_f in zip(ws, fs, current_fs): for b1, b2 in dinucs: delta = f[b1, b2] - cur_f[b1, b2] sse += delta**2 w[b1, b2] += eta * ( delta) #* exp(-iteration/float(decay_timescale)) #sses[iteration/take_stock] = sse sse_per_col_pair = sse / choose(L, 2) print iteration, stock_counter, sse_per_col_pair, exp( -iteration / float(decay_timescale)), ws[0]['A', 'A'] stock_counter += random.randrange(2) #print "motif_ic:", motif_ic(chain[iteration-stock_counter : iteration]) if iteration > 0 and sse_per_col_pair < stop_crit: print "breaking:", sse, sse_per_col_pair break log_y = score(ws, x) # recalculate this because weights change #stock_counter += take_stock * (iteration > take_stock) iteration += 1 return ws
def analyze_collection(prok_motifs, euk_motifs): prok_correlated_pairses = map(analyze_motif,tqdm(prok_motifs,desc='motifs')) with open("prok_correlated_pairses.pkl",'w') as f: cPickle.dump(f,prok_correlated_pairses) euk_correlated_pairses = map(analyze_motif,tqdm(euk_motifs,desc='motifs')) with open("euk_correlated_pairses.pkl",'w') as f: cPickle.dump(f,euk_correlated_pairses) prok_corrs = np.array(map(len,prok_correlated_pairses)) euk_corrs = np.array(map(len,euk_correlated_pairses)) prok_depths = np.array([len(motif) for motif in prok_motifs]) euk_depths = np.array([len(motif) for motif in euk_motifs]) prok_lens = np.array([len(motif[0]) for motif in prok_motifs]) euk_lens = np.array([len(motif[0]) for motif in euk_motifs]) prok_lc2s = np.array([choose(L,2) for L in prok_lens]) euk_lc2s = np.array([choose(L,2) for L in euk_lens])
def loadGame(name=None): global usr, usrFile, previousVendor users = [] for file in os.listdir(utils.fileDir + '/saves'): if file.endswith('.save') or file.endswith('.db'): users.append(file.split('.')[0]) try: usr = utils.choose('List of users:', users, 'What is your username?') usrFile = usr + '.save' except KeyboardInterrupt: play() try: entities.worldEntities = utils.loadInfo(usr, 'worldEntities') entities.player = utils.loadInfo(usr, 'player.' + usr) previousVendor = utils.loadInfo(usr, 'previousVendor') except KeyError: print('Savefile is broken. Creating new savefile...') newGame(usr) print('Game save loaded.') try: if entities.player.location == entities.getLocation('Inventory'): locations.inventory() elif entities.player.location == entities.getLocation('Market'): utils.goToVendor(previousVendor) elif entities.player.location == entities.getLocation('Interact'): utils.fight(entities.player.location.entity, entities.player.location.entity.weapon) inventory() else: commandLine() except KeyboardInterrupt or EOFError: sys.exit(1)
def L_vs_sigma_plot(filename=None, with_bio=False): if with_bio: tfdf = extract_motif_object_from_tfdf() motifs = [getattr(tfdf, tf) for tf in tfdf.tfs] Ls = [len(motif[0]) for motif in motifs] cs = [len(motif) for motif in motifs] ics = [motif_ic(motif) for motif in motifs] ic_density = [ic / L for ic, L in zip(ics, Ls)] sigmas = [mean(map(sd, make_pssm(motif))) for motif in motifs] ginis = [motif_gini(motif, correct=False) for motif in motifs] mi_density = [ total_motif_mi(motif) / choose(L, 2) for motif, L in zip(motifs, Ls) ] min_sigma = 0.1 max_sigma = 10 plt.xlim(0, max_sigma) plt.ylim(0, 60) plt.plot(*pl(crit_L, np.linspace(min_sigma, max_sigma, 1000)), label="Binding Transition") plt.plot([min_sigma, max_sigma], [log(G, 2) / 2, log(G, 2) / 2], linestyle='--', label="Info Theory Threshold") # plt.plot(*pl(lambda sigma:log(G)/sigma,np.linspace(min_sigma,max_sigma,1000)), # linestyle='--',label="Zero Discrimination Asymptote") if with_bio: plt.scatter(sigmas, Ls, label="Biological Motifs") plt.xlabel("sigma") plt.ylabel("L") plt.legend() maybesave(filename)
def expected_mi(sigma, Ne, L, copies): """How much MI should you expect due to ROR effect?""" ps = ps_from_copies(sigma, Ne, L, copies) misX = sum(k * p for k, p in enumerate(ps)) / L matX = 1 - misX misY = misX matY = matX L = float(L) matYmatX = sum(ps[k] * ((L - k) / L) * (L - k - 1) / (L - 1) for k in range(int(L + 1))) matYmisX = sum(ps[k] * ((L - k) / L) * (k) / (L - 1) for k in range(int(L + 1))) misYmatX = matYmisX misYmisX = sum(ps[k] * (k / L) * (k - 1) / (L - 1) for k in range(int(L + 1))) #print "joints sum to:", (matYmatX + matYmisX + misYmatX + misYmisX) HX = HY = -(matX * log2(matX) + 3 * (misX / 3) * log2(misX / 3)) #print "HX:",HX MI_ref = (misYmisX * log2(misYmisX / (misY * misX)) + matYmisX * log2(matYmisX / (matY * misX)) + misYmatX * log2(misYmatX / (misY * matX)) + matYmatX * log2(matYmatX / (matY * matX))) MI = (9 * (misYmisX / 9) * log2( (misYmisX / 9) / ((misY / 3) * (misX / 3))) + 3 * (matYmisX / 3) * log2( (matYmisX / 3) / (matY * (misX / 3))) + 3 * (misYmatX / 3) * log2( (misYmatX / 3) / ((misY / 3) * matX)) + matYmatX * log2(matYmatX / (matY * matX))) return (MI) * choose(int(L), 2)
def test(env: gym.Wrapper, model: tf.keras.Model, log_dir: Path) -> None: """Test the DQN on Pong. Args: env: The Atari Pong environment model: The model to be trained log_dir: Path where to save the video """ env = Monitor( env, log_dir, force=True, # overwrite existing videos video_callable=lambda count: True, # force save this episode ) state = Deque[tf.Tensor](maxlen=STATE_FRAMES) state.append(preprocess(env.reset())) # initial state print("Starting testing...") while True: if len(state) < STATE_FRAMES: initial = None action = env.action_space.sample() else: initial = tf.stack(state, axis=-1) action = choose(model, initial, 0) # choose greedily state_new, _, done, _ = env.step(action) state_new = preprocess(state_new) state.append(state_new) if done: break print("Testing done")
def fitness(motif): eps = [sigma * sum(b != "A" for b in site) for site in motif] Zb = G * sum( exp(-sigma * i) * choose(L, i) * (1 / 4.0)**i * (3 / 4.0)**(L - i) for i in range(L + 1)) fg = sum(exp(-ep) for ep in eps) return fg / (fg + Zb)
def train(self): """ Gibbs sampling """ self._initialize() for i in range(self.max_iteration): print('iteration:{}'.format(i + 1)) for doc_index, doc in enumerate(self.document): for word_index, word in enumerate(doc): current_topic_index = self.current_word_topic_matrix[ doc_index][word_index] # exclude the counts related to current topic self.doc_topic_matrix[doc_index, current_topic_index] -= 1 self.topic_word_matrix[current_topic_index, word_index] -= 1 self.topic_matrix[current_topic_index] -= 1 # (n_{d,-i}^k+a_k)*(n_{k,-i}^t+b_t)/sum_t(n_{k,-i}^t+b_t) topic_distribution = (self.doc_topic_matrix[doc_index] + self.alpha) *\ (self.topic_word_matrix[:, word_index] + self.eta) /\ (self.topic_matrix[current_topic_index] + self.N * self.eta) new_topic_index = choose(range(self.K), topic_distribution) self.current_word_topic_matrix[doc_index][ word_index] = new_topic_index # add the counts related to new topic self.doc_topic_matrix[doc_index, new_topic_index] += 1 self.topic_word_matrix[new_topic_index, word_index] += 1 self.topic_matrix[new_topic_index] += 1
def market(): entities.player.location = entities.getLocation('Market') print(''' +-----------------------------------------------------+ | Welcome to the Market! | | Type an item\'s name to purchase it. | | Type "info <item>" for more information on an item. | | Type "exit" to leave the store. | +-----------------------------------------------------+''') isVendor = False while not isVendor: vendors = [] for vendor in entities.vendors: vendors.append(vendor.name) command = utils.choose('\nPlease type the vendor you want to visit.', vendors) for vendor in entities.vendors: if vendor.name == command: vendorToVisit = vendor isVendor = True break if command == 'exit': print('You left the store.') return else: print('Vendor or command not found.') break utils.goToVendor(vendorToVisit)
def rate_matrix(q,koffs,verbose=False): """Generate the stochastic rate matrix for the givens system.""" # Chromosome states can be represented by binary numerals; order the # states this way. G = len(koffs) states = enumerate_states(G,q) num_states = len(states) assert len(states) == sum(choose(G,i) for i in range(q+1)) R = np.zeros((num_states,num_states)) for i,state_i in enumerate(states): for j,state_j in enumerate(states): if verbose: print "considering:",i,state_i,"->",j,state_j dist = hamming(state_i,state_j) if dist != 1: # deal with diagonal elements later... if verbose: print "distance is:",dist,"continuing..." continue if sum(state_j) == sum(state_i) + 1: R[i][j] = q - sum(state_i) if verbose: print i,state_i,"->",j,state_j, "is an on-reaction, rate:",R[i][j] elif sum(state_j) == sum(state_i) - 1: diff_idx,diff_site = find(lambda (idx,(si,sj)):si != sj,enumerate(zip(state_i,state_j))) R[i][j] = koffs[diff_idx] if verbose: print i,state_i,"->",j,state_j, "is an off-reaction (at site",diff_idx,") rate:",R[i][j] # deal with diagonal elements for i in range(num_states): R[i][i] = -sum(R[i]) print "finished rate matrix" return R
def lda(self, number_of_topics, iterations, alpha, beta): ''' Model topics. ''' print "Gibbs sampling process..." # Get vocabulary and number of documents. self.build_vocabulary() number_of_documents = len(self.documents) vocabulary_size = len(self.vocabulary) # Create the counter arrays. self.document_topic_counts = np.zeros([number_of_documents, number_of_topics], dtype=np.int) self.topic_word_counts = np.zeros([number_of_topics, len(self.vocabulary)], dtype=np.int) self.current_word_topic_assignments = [] self.topic_counts = np.zeros(number_of_topics) # Initialize print "Initializing..." for d_index, document in enumerate(self.documents): word_topic_assignments = [] for word in document.words: if word in self.vocabulary: # Select random starting topic assignment for word. w_index = self.vocabulary.index(word) starting_topic_index = np.random.randint(number_of_topics) # randomly assign topic to every word word_topic_assignments.append(starting_topic_index) # Set current topic assignment, increment doc-topic and word-topic counters. self.document_topic_counts[d_index, starting_topic_index] += 1 self.topic_word_counts[starting_topic_index, w_index] += 1 self.topic_counts[starting_topic_index] += 1 self.current_word_topic_assignments.append(np.array(word_topic_assignments)) # Run the sampler. for iteration in range(iterations): print "Iteration #" + str(iteration + 1) + "..." for d_index, document in enumerate(self.documents): for w, word in enumerate(document.words): if word in self.vocabulary: w_index = self.vocabulary.index(word) # Get the topic that the word is currently assigned to. current_topic_index = self.current_word_topic_assignments[d_index][w] # Decrement counts. self.document_topic_counts[d_index, current_topic_index] -= 1 self.topic_word_counts[current_topic_index, w_index] -= 1 self.topic_counts[current_topic_index] -= 1 # Get new topic. print beta topic_distribution = (self.topic_word_counts[:, w_index] + beta) * \ (self.document_topic_counts[d_index] + alpha) / \ (self.topic_counts + beta) # changed by hitalex #new_topic_index = np.random.multinomial(1, np.random.dirichlet(topic_distribution)).argmax() # choose a new topic index according to topic distribution new_topic_index = choose(range(number_of_topics), topic_distribution) # Reassign and notch up counts. self.current_word_topic_assignments[d_index][w] = new_topic_index self.document_topic_counts[d_index, new_topic_index] += 1 self.topic_word_counts[new_topic_index, w_index] += 1 self.topic_counts[new_topic_index] += 1
def test_expected_mi(L, n): plt.plot([expected_mi(L, k) * choose(L, 2) for k in range(L + 1)], label="Expected MI") plt.plot([expected_mi2(L, k) for k in range(L + 1)], label="Expected MI2") plt.scatter(range(L + 1), [ mean((total_motif_mi(mm_motif(L, n, k))) for i in range(10)) for k in trange(L + 1) ])
def analyze_collection(prok_motifs, euk_motifs): prok_correlated_pairses = map(analyze_motif, tqdm(prok_motifs, desc='motifs')) with open("prok_correlated_pairses.pkl", 'w') as f: cPickle.dump(f, prok_correlated_pairses) euk_correlated_pairses = map(analyze_motif, tqdm(euk_motifs, desc='motifs')) with open("euk_correlated_pairses.pkl", 'w') as f: cPickle.dump(f, euk_correlated_pairses) prok_corrs = np.array(map(len, prok_correlated_pairses)) euk_corrs = np.array(map(len, euk_correlated_pairses)) prok_depths = np.array([len(motif) for motif in prok_motifs]) euk_depths = np.array([len(motif) for motif in euk_motifs]) prok_lens = np.array([len(motif[0]) for motif in prok_motifs]) euk_lens = np.array([len(motif[0]) for motif in euk_motifs]) prok_lc2s = np.array([choose(L, 2) for L in prok_lens]) euk_lc2s = np.array([choose(L, 2) for L in euk_lens])
def V(xj,xjp): if xj == xjp > 0: retval = 10**10 else: retval = (eps[xj] + eps[xjp])/choose(J,2) # divide by choose(J,2) since we're summing over pairs if random.random() < 0: print "V(%s,%s) = %s" % (xj,xjp,retval) return retval
def main(): game_type = choose('Game Type?', {'Normal': 'n', 'Fast Forward': 's'}) if game_type == 'n': normal_game() elif game_type == 's': generations_game()
def on_off_occ2(sigma, L, G=5 * 10**6): ef = -sigma * L p = 1 / 4.0 log_Zb = log(G) + log( sum( choose(L, i) * p**i * (1 - p)**(L - i) * exp(sigma * i) for i in range(L + 1))) log_actual_occ = -log(1 + exp(log_Zb + ef)) return exp(log_actual_occ)
def propensity(self, stoich_vector, rate_constant): choices = [choose(x_j, -v_j) for x_j, v_j in zip(self.state, stoich_vector) if v_j < 0] # print choices propensity = rate_constant * product(choices) # print "state:",self.state,"stoich:",stoich_vector,"rate const:",rate_constant,"prop:",propensity if propensity < 0: print "propensity less than zero:", stoich_vector, rate_constant raise Exception return propensity
def logP( k, n ): total = 0 curr = choose( n, k / 2 + k % 2 ) for r in xrange( k / 2 + k % 2, k + 1 ): total += curr curr = curr * ( n - r ) / ( r + 1 ) return log( total ) + log( fact( k ) )
def p( k, n ): total = 0 curr = choose( n, k / 2 + k % 2 ) for r in xrange( k / 2 + k % 2, k + 1 ): total += curr curr = curr * ( n - r ) / ( r + 1 ) return total * fact( k )
def results_of_analyze_bio_motifs(results): # IC Ls = np.array([len(getattr(Escherichia_coli,tf)[0]) for tf in Escherichia_coli.tfs]) Ls_choose_2 = np.array([choose(L,2) for L in Ls]) bio_ics = np.array([motif_ic(getattr(Escherichia_coli,tf)) for tf in Escherichia_coli.tfs]) sim_ics = np.array([results[tf][0] for tf in Escherichia_coli.tfs]) sim_ic_errs = np.array([1.96*results[tf][1] for tf in Escherichia_coli.tfs]) bio_ics_norm = bio_ics/Ls sim_ics_norm = sim_ics/Ls sim_ic_norm_errs = sim_ic_errs/Ls bio_ginis = np.array([motif_gini(getattr(Escherichia_coli,tf)) for tf in Escherichia_coli.tfs]) sim_ginis = np.array([results[tf][2] for tf in Escherichia_coli.tfs]) sim_gini_errs = np.array([1.96*results[tf][3] for tf in Escherichia_coli.tfs]) bio_mis_norm = np.array([total_motif_mi(getattr(Escherichia_coli,tf))/choose(L,2) for tf,L in zip(Escherichia_coli.tfs,Ls)]) sim_mis_norm = np.array([results[tf][4]/choose(L,2) for tf,L in zip(Escherichia_coli.tfs,Ls)]) sim_mis_norm_errs = np.array([1.96*results[tf][5]/choose(L,2) for tf,L in zip(Escherichia_coli.tfs,Ls)]) plt.subplot(1,4,1) plt.errorbar(bio_ics,sim_ics, yerr=sim_ic_errs,fmt='o') plt.plot([0,20],[0,20]) plt.xlabel("IC") plt.subplot(1,4,2) plt.errorbar(bio_ics_norm,sim_ics_norm, yerr=sim_ic_norm_errs,fmt='o') plt.plot([0,2],[0,2]) plt.xlabel("IC/base") plt.subplot(1,4,3) plt.errorbar(bio_ginis,sim_ginis, yerr=sim_gini_errs,fmt='o') plt.plot([0,1],[0,1]) plt.xlabel("Gini coefficient") plt.subplot(1,4,4) plt.errorbar(bio_mis_norm,sim_mis_norm, yerr=sim_mis_norm_errs,fmt='o') plt.plot([0,0.5],[0,0.5]) plt.xlabel("MI/pair") print "IC:", pearsonr(bio_ics, sim_ics) print "normalized IC:", pearsonr(bio_ics_norm, sim_ics_norm) print "Gini:", pearsonr(bio_ginis, sim_ginis) print "normalized MI:", pearsonr(bio_mis_norm, sim_mis_norm)
def train_episode( self, epsilon: float, global_step: int ) -> Tuple[Optional[tf.Tensor], int]: """Run one episode and train the model on it. Args: epsilon: Current value of epsilon for the epsilon-greedy policy global_step: The no. of frames processed so far Returns: The first state encountered The updated global step """ state = Deque[tf.Tensor](maxlen=STATE_FRAMES) state.append(preprocess(self.env.reset())) # initial state first: Optional[tf.Tensor] = None while True: if len(state) < STATE_FRAMES: initial = None action = self.env.action_space.sample() else: initial = tf.stack(state, axis=-1) action = choose(self.model, initial, epsilon) state_new, reward, done, _ = self.env.step(action) state_new = preprocess(state_new) state.append(state_new) if initial is not None: # The inputs for this transition are well-defined, ie. a # proper x-frames state, so add it to the replay buffer. self.replay.append((initial, state_new, action, reward, done)) if first is None: first = initial if len(self.replay) >= self.config.batch_size: loss = self.exp_replay( sample_replay(self.replay, self.config.batch_size) ) if global_step % self.log_steps == 0: with self.writer.as_default(), tf.name_scope("losses"): tf.summary.scalar("loss", loss, step=global_step) if global_step % self.config.reset_steps == 0: self.fixed.set_weights(self.model.get_weights()) global_step += 1 if done: break # Needed for logging metrics return first, global_step
def do( N ): ways = ( choose( 25, N ) * product( xrange( 75, 100 - N ) ) * fact( 75 ) ) all = fact( 100 ) ret = ways * 10 ** 50 / all import sys print >>sys.stderr, (ret, N) return ret
def count_partitions(n): d = defaultdict(int) for kmer in make_kmers(n): d[tuple(partition_sequence(kmer))] += 1 for k,v in sorted(d.items(),key=lambda (k,v):k): abstract_sequences = exp(log_fac(n) - sum(log_fac(i) for i in k)) assignments = choose(4,sum(1 for i in k if i > 0)) foo = [k.count(i) for i in set(k)] bar = fac(4)/prod(fac(i) for i in foo) print k,v,abstract_sequences*bar#,abstract_sequences,float(v)/abstract_sequences,foo,bar return d
def get_pairwise_freqs(motif, pc=1/16.0): cols = transpose(motif) L = len(cols) N = len(motif) fs = [{(b1, b2):0 for (b1,b2) in dinucs} for _ in range(int(choose(L,2)))] for f, (col1, col2) in zip(fs, choose2(cols)): for b1, b2 in zip(col1, col2): f[b1, b2] += 1 for b1, b2 in dinucs: f[b1, b2] += pc f[b1, b2] /= float(N + 16*pc) return fs
def normal_game(): player_types = { 'Human': (HumanPlayer, 'Human'), 'Random': (RandomPlayer, 'Random'), 'PolicyNet': (PolicyNetPlayer, 'PolicyNet') } display = choose('Display?', {'Display': True, 'No Display': False}) delay = choose( 'Delay?', { 'None': None, '50 milliseconds': 0.050, '250 milliseconds': 0.250, '1 second': 1, '5 seconds': 5 }) num_moves = choose('Moves Before Draw?', { '100': 100, '50': 50, '200': 200, '1,000': 1000 }) player1, player1_name = choose('Player 1 Type?', player_types) player1 = player1() player2, player2_name = choose('Player 2 Type?', player_types) player2 = player2() game = Game(player1, player2, player1_name=player1_name, player2_name=player2_name, display=display, moves=num_moves, delay=delay) game.run()
def train_pairwise_model(motif, pc=1/16.0, decay_timescale=10000, take_stock=1000, eta=0.01, stop_crit=0.01): L = len(motif[0]) N = len(motif) fs = get_pairwise_freqs(motif, pc=pc) ws = [{(b1, b2):0 for (b1,b2) in dinucs} for _ in range(int(choose(L,2)))] x = random_site(L) log_y = score(ws, x) chain = [] # sses = [0.0] * (int(iterations/take_stock) + 1) #chain = [] #for iteration in xrange(iterations): iteration = 0 stock_counter = take_stock while True: xp = mutate_site(x) log_yp = score(ws, xp) if log(random.random()) < log_yp - log_y: x = xp log_y = log_yp chain.append(x) if iteration > 0 and iteration % stock_counter == 0: current_fs = get_pairwise_freqs(sample(N,chain[iteration-stock_counter : iteration], replace=False)) sse = 0 for w, f, cur_f in zip(ws, fs, current_fs): for b1, b2 in dinucs: delta = f[b1, b2] - cur_f[b1,b2] sse += delta**2 w[b1, b2] += eta*(delta) #* exp(-iteration/float(decay_timescale)) #sses[iteration/take_stock] = sse sse_per_col_pair = sse/choose(L,2) print iteration, stock_counter, sse_per_col_pair, exp(-iteration/float(decay_timescale)), ws[0]['A','A'] stock_counter += random.randrange(2) #print "motif_ic:", motif_ic(chain[iteration-stock_counter : iteration]) if iteration > 0 and sse_per_col_pair < stop_crit: print "breaking:", sse, sse_per_col_pair break log_y = score(ws, x) # recalculate this because weights change #stock_counter += take_stock * (iteration > take_stock) iteration += 1 return ws
def get_pairwise_freqs(motif, pc=1 / 16.0): cols = transpose(motif) L = len(cols) N = len(motif) fs = [{(b1, b2): 0 for (b1, b2) in dinucs} for _ in range(int(choose(L, 2)))] for f, (col1, col2) in zip(fs, choose2(cols)): for b1, b2 in zip(col1, col2): f[b1, b2] += 1 for b1, b2 in dinucs: f[b1, b2] += pc f[b1, b2] /= float(N + 16 * pc) return fs
def basic_statistics(tfdf=None,filename="basic_motif_statistics.png"): if tfdf is None: tfdf = extract_motif_object_from_tfdf() motifs = [getattr(tfdf,tf) for tf in tfdf.tfs] Ls = [len(motif[0]) for motif in motifs] ns = [len(motif) for motif in motifs] ics = [motif_ic(motif) for motif in motifs] ic_density = [ic/L for ic,L in zip(ics,Ls)] sigmas = [mean(map(sd,make_pssm(motif))) for motif in motifs] ginis = [motif_gini(motif,correct=False) for motif in motifs] mi_density = [total_motif_mi(motif)/choose(L,2) for motif,L in zip(motifs,Ls)] plt.subplot(2,3,1) #plt.tick_params(axis='x',pad=15) plt.xticks(rotation=90) plt.hist(Ls) plt.xlabel("Length (bp)") plt.subplot(2,3,2) #plt.tick_params(axis='x',pad=30) plt.xticks(rotation=90) plt.hist(ns) plt.xlabel("Number of sites") plt.subplot(2,3,3) plt.hist(ics) plt.xticks(rotation=90) plt.xlabel("IC (bits)") plt.subplot(2,3,4) #plt.tick_params(axis='x',pad=30) plt.xticks(rotation=90) plt.hist(ic_density) plt.xlabel("IC Density (bits/bp)") plt.subplot(2,3,5) #plt.tick_params(axis='x',pad=15) plt.xticks(rotation=90) plt.hist(ginis) plt.xlabel("Gini coeff") plt.subplot(2,3,6) #plt.tick_params(axis='x',pad=30) plt.xticks(rotation=90) plt.hist(mi_density) plt.xlabel("MI Density (bits/comparison)") plt.tight_layout() if filename: plt.savefig(filename,dpi=600) plt.close()
def analyze_motif(motif, trials=1000): cols = transpose(motif) L = len(cols) ps = [] for col1, col2 in (choose2(cols)): actual_mi = dna_mi(col1,col2) perm_mis = [dna_mi(col1,permute(col2)) for i in xrange(trials)] p = percentile(actual_mi, perm_mis) #print p ps.append(p) q = fdr(ps) correlated_pairs = [(i,j) for (i,j),p in zip(choose2(range(L)),ps) if p < q] num_correlated = len(correlated_pairs) print "correlated column pairs:", num_correlated, "%1.2f" % ((num_correlated)/choose(L,2)) return correlated_pairs
def train_pairwise_model2(motif, pc=1/16.0, decay_timescale=10000, take_stock=1000, eta=0.01, stop_crit=0.01): L = len(motif[0]) N = len(motif) fs = get_pairwise_freqs(motif, pc=pc) ws = [{(b1, b2):0 for (b1,b2) in dinucs} for _ in range(int(choose(L,2)))] iteration = 0 while True: cur_motif = [sample_model(ws,x0=site,iterations=10*L)[-1] for site in motif] current_fs = get_pairwise_freqs(cur_motif) sse = 0 for w, f, cur_f in zip(ws, fs, current_fs): for b1, b2 in dinucs: delta = f[b1, b2] - cur_f[b1,b2] sse += delta**2 w[b1, b2] += eta*(delta) #* exp(-iteration/float(decay_timescale)) #sses[iteration/take_stock] = sse sse_per_col_pair = sse/choose(L,2) print iteration, sse_per_col_pair, ws[0]['A','A'] print "motif_ic:", motif_ic(cur_motif) if iteration > 0 and sse_per_col_pair < stop_crit: print "breaking:", sse, sse_per_col_pair break iteration += 1 return ws
def runcommand(cmdline, options, command_dict): """Split commands in fabric style, returns (cmdname, cmdkwargs)""" if ':' in cmdline: cmdname, args = cmdline.split(':', 1) else: cmdname, args = cmdline, '' #print cmdname, "-", args command_name = utils.choose(cmdname, command_dict.keys()) name = command_name command_func = command_dict[name] try: kwargs = utils.make_kwargs(command_func, args) except ValueError as e: raise errors.CommandArgumentError(unicode(e)) return command_func(options, **kwargs)
def analyze_motif(motif, trials=1000): cols = transpose(motif) L = len(cols) ps = [] for col1, col2 in (choose2(cols)): actual_mi = dna_mi(col1, col2) perm_mis = [dna_mi(col1, permute(col2)) for i in xrange(trials)] p = percentile(actual_mi, perm_mis) #print p ps.append(p) q = fdr(ps) correlated_pairs = [(i, j) for (i, j), p in zip(choose2(range(L)), ps) if p < q] num_correlated = len(correlated_pairs) print "correlated column pairs:", num_correlated, "%1.2f" % ( (num_correlated) / choose(L, 2)) return correlated_pairs
def analyze_correlation_positions(all_tests, alpha="fdr"): if alpha == "fdr": alpha = fdr(concat(all_tests)) print "alpha:",alpha ds = [] d_controls = [] for tests in all_tests: K = len(tests) L = find(lambda l:round(choose(l,2))==K, range(50)) if L is None: print K raise Exception() for k, (i,j) in enumerate(choose2(range(L))): if j == i + 1 and tests[k] <= alpha: d = i/float(L) ds.append(d) d_controls.append(random.randrange(L-1)/float(L)) plt.scatter(d, tests[k]) return ds, d_controls
def play(): while True: try: print(''' +----------------------------------------------+ | Welcome to textbasedgame! | | This game is released under the GPL. | | Copyright V1Soft 2016 | +----------------------------------------------+''') choice = utils.choose('\nDo you want to:', [['Start a new game', 'new'], ['Continue from a previous save', 'continue'], ['Exit the game', 'quit']], '', False) if choice == 'NEW': newGame() elif choice == 'CONTINUE': loadGame() elif choice == 'QUIT': sys.exit(0) else: while True: if utils.confirm('Invalid option. Do you want to quit?'): sys.exit(0) else: break except KeyboardInterrupt or EOFError: sys.exit(0)
def expected_mi(sigma,Ne,L,copies): """How much MI should you expect due to ROR effect?""" ps = ps_from_copies(sigma,Ne,L,copies) misX = sum(k*p for k,p in enumerate(ps))/L matX = 1-misX misY = misX matY = matX L = float(L) matYmatX = sum(ps[k]*((L-k)/L)*(L-k-1)/(L-1) for k in range(int(L+1))) matYmisX = sum(ps[k]*((L-k)/L)*(k)/(L-1) for k in range(int(L+1))) misYmatX = matYmisX misYmisX = sum(ps[k]*(k/L)*(k-1)/(L-1) for k in range(int(L+1))) #print "joints sum to:", (matYmatX + matYmisX + misYmatX + misYmisX) HX = HY = -(matX*log2(matX) + 3*(misX/3)*log2(misX/3)) #print "HX:",HX MI_ref = (misYmisX*log2(misYmisX/(misY*misX)) + matYmisX*log2(matYmisX/(matY*misX)) + misYmatX*log2(misYmatX/(misY*matX)) + matYmatX*log2(matYmatX/(matY*matX))) MI = (9*(misYmisX/9)*log2((misYmisX/9)/((misY/3)*(misX/3))) + 3*(matYmisX/3)*log2((matYmisX/3)/(matY*(misX/3))) + 3*(misYmatX/3)*log2((misYmatX/3)/((misY/3)*matX)) + matYmatX*log2(matYmatX/(matY*matX))) return (MI)*choose(int(L),2)
def lda(self, number_of_topics, iterations, alpha, beta): ''' Model topics. ''' print "Gibbs sampling process..." # Get vocabulary and number of documents. self.build_vocabulary() number_of_documents = len(self.documents) vocabulary_size = len(self.vocabulary) # Create the counter arrays. self.document_topic_counts = np.zeros( [number_of_documents, number_of_topics], dtype=np.int) self.topic_word_counts = np.zeros( [number_of_topics, len(self.vocabulary)], dtype=np.int) self.current_word_topic_assignments = [] self.topic_counts = np.zeros(number_of_topics) # Initialize print "Initializing..." for d_index, document in enumerate(self.documents): word_topic_assignments = [] for word in document.words: if word in self.vocabulary: # Select random starting topic assignment for word. w_index = self.vocabulary.index(word) starting_topic_index = np.random.randint( number_of_topics ) # randomly assign topic to every word word_topic_assignments.append(starting_topic_index) # Set current topic assignment, increment doc-topic and word-topic counters. self.document_topic_counts[d_index, starting_topic_index] += 1 self.topic_word_counts[starting_topic_index, w_index] += 1 self.topic_counts[starting_topic_index] += 1 self.current_word_topic_assignments.append( np.array(word_topic_assignments)) # Run the sampler. for iteration in range(iterations): print "Iteration #" + str(iteration + 1) + "..." for d_index, document in enumerate(self.documents): for w, word in enumerate(document.words): if word in self.vocabulary: w_index = self.vocabulary.index(word) # Get the topic that the word is currently assigned to. current_topic_index = self.current_word_topic_assignments[ d_index][w] # Decrement counts. self.document_topic_counts[d_index, current_topic_index] -= 1 self.topic_word_counts[current_topic_index, w_index] -= 1 self.topic_counts[current_topic_index] -= 1 # Get new topic. topic_distribution = (self.topic_word_counts[:, w_index] + beta) * \ (self.document_topic_counts[d_index] + alpha) / \ (self.topic_counts + beta) # changed by hitalex #new_topic_index = np.random.multinomial(1, np.random.dirichlet(topic_distribution)).argmax() # choose a new topic index according to topic distribution new_topic_index = choose(range(number_of_topics), topic_distribution) # Reassign and notch up counts. self.current_word_topic_assignments[d_index][ w] = new_topic_index self.document_topic_counts[d_index, new_topic_index] += 1 self.topic_word_counts[new_topic_index, w_index] += 1 self.topic_counts[new_topic_index] += 1
def test_choose_premium_price_raise_type_error(self): with self.assertRaises(TypeError): ut.choose('a')
def num_combs_with_replacement(n,L): return choose(L+n,L)
def w(n, L, rho): """count number of motifs rho mismatches from ringer""" return choose(n * L, rho) * 3**rho
def approx_Zf_ref(rho, n, L, sigma): p = rho / float(n * L) # probability of mismatch per base q = 1 - p return n * sum( exp(-sigma * k) * choose(L, k) * p**k * (q)**(L - k) for k in range(L + 1))
def total_occupancy(L, sigma, G, mu): return G * sum( choose(L, k) * 3**k * 1**(L - k) * (1.0 / (1 + exp(k * sigma + mu))) for k in range(L + 1)) / (4.0**L)
def compute_Zb(n,L,sigma,G): return G * sum(exp(-sigma*i)*choose(L,i)*(3/4.0)**i*(1/4.0)**(L-i) for i in range(L+1))
def log_choose(N,k): return log(choose(N,k))
def expected_mi2(L,k): possible_sites = choose(L,k)*(1**(L-k))*(3**k) total_IC = 2*L - log2(possible_sites) return total_IC - expected_ic(L,k)
def random_pairwise_model(L, sigma=1): return [{(b1,b2):random.gauss(0,sigma) for b1 in "ACGT" for b2 in "ACGT"} for _ in range(int(choose(L,2)))]
def dbinom(k,N,p): """Compute probability of k out N successes at probability p""" return choose(N,k)*p**k*(1-p)**(N-k)
def log_dbinom(k,N,p): """Compute log probability of k out N successes at probability p""" return log(choose(N,k)) + k*log(p) + (N-k)*log(1-p)
import pygame import utils fpsClock = pygame.time.Clock() reusedValue = -1 screen,screct = utils.pygameSetup(1280, 720, "Code Playground") choice = utils.choose("Are you epileptic? 1 or True = yes, 0 or False = no") if choice: while True: fpsClock.tick(75) reusedValue = utils.strobe(screen, reusedValue) pygame.display.flip()
def prior(k,L): """prior probability of observing k mismatches in L sites""" return choose(L,k)*(1/4.0)**(L-k)*(3/4.0)**k
def prob_motif_with_mismatch(n, L, k): N = n * L return choose(N, k) * (3 / 4.0)**k * (1 / 4.0)**(N - k)
import os from typing import Any, Callable, Dict, List, Optional, Union, Tuple import csv import pandas as pd from utils import choose from bit_manipulations import nums_to_bits, bits_to_nums, popcount64d # Need to figure out how to have one file for bit manip. CASH5_FIELD_COUNT = 43 CASH5_PICKED_COUNT = 5 CASH5_PRIZE = 100000.0 MAX_BITS = 63 CASH5_ODDS_DICT = { CASH5_PICKED_COUNT - i: choose(CASH5_PICKED_COUNT, CASH5_PICKED_COUNT - i) * choose(CASH5_FIELD_COUNT - CASH5_PICKED_COUNT, i) / choose(CASH5_FIELD_COUNT, CASH5_PICKED_COUNT) for i in range(CASH5_PICKED_COUNT + 1) } CASH5_PRIZE_DICT = {5: 100000, 4: 250, 3: 5, 2: 1, 1: 0} def back_test(nums: str, cash5_df: pd.DataFrame, date: str = "") -> pd.DataFrame: bits = nums_to_bits(nums=nums, bit_length=MAX_BITS, max_num=CASH5_FIELD_COUNT + 1,
def generations_game(): player_types = { 'Random': (RandomPlayer, 'Random'), 'PolicyNet': (PolicyNetPlayer, 'PolicyNet') } num_generations = choose( 'Number of Simulations?', { '10': 10, '100': 100, '1,000': 1000, '10,000': 10000, '100,000': 100000, '1,000,000': 1000000 }) num_moves = choose('Moves Before Draw?', { '100': 100, '50': 50, '200': 200, '1,000': 1000 }) print_interval = choose( 'Update Interval?', { 'Every Round': 1, '10 Rounds': 10, '100 Rounds': 100, '1,000 Rounds': 1000, '10,000 Rounds': 10000 }) player1, player1_name = choose('Player 1 Type?', player_types) player1 = player1() player2, player2_name = choose('Player 2 Type?', player_types) player2 = player2() game = Game(player1, player2, player1_name=player1_name, player2_name=player2_name, display=None, moves=num_moves) player1_wins = 0 player2_wins = 0 draws = 0 for gen in range(num_generations): winner = game.run() if winner is not None: if winner is player1: player1_wins += 1 else: player2_wins += 1 else: draws += 1 if gen % print_interval == 0: # prevents divide by zero gen += 1 clear() print( 'Complete: {}/{}, {} 1 Wins: {:5}%, {} 2 Wins: {:5}%, Draws: {:5}%' .format(gen, num_generations, player1_name, round(player1_wins / gen * 100, 2), player2_name, round(player2_wins / gen * 100, 2), round(draws / gen * 100, 2)))
def test_expected_mi(L,n): plt.plot([expected_mi(L,k)*choose(L,2) for k in range(L + 1)], label="Expected MI") plt.plot([expected_mi2(L,k) for k in range(L + 1)], label="Expected MI2") plt.scatter(range(L+1),[mean((total_motif_mi(mm_motif(L,n,k))) for i in range(10)) for k in trange(L + 1)])
#Problem 53 # real 0m0.072s # user 0m0.052s # sys 0m0.012s from utils import choose num = 0 for n in range(1,101): for r in range(4,n-2): num += 1 if choose(n,r) > 1000000 else 0 print(num)