def generate_text(session, model, config, starting_text='<eos>', stop_length=100, stop_tokens=None, temp=1.0): """Generate text from the model. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split()] # Use starting_text to compute the initial_state (LIBIN) for wd in tokens: feed = {model.input_placeholder: np.array([[wd]]), model.initial_state: state, model.dropout_placeholder: 1.0} state, y_pred = session.run([model.final_state, model.predictions[-1]], feed_dict=feed) # First word predicted by starting_text # Add it to tokens and use it as input of next step next_word_idx = sample(y_pred[0], temperature=temp) tokens.append(next_word_idx) for i in xrange(stop_length): ### YOUR CODE HERE # input_placeholder is of shape : (batch_size, num_steps) # We have batch_size=1 and num_steps=1 here feed = {model.input_placeholder: np.array([[tokens[-1]]]), model.initial_state: state, model.dropout_placeholder: 1.0} state, y_pred = session.run([model.final_state, model.predictions[-1]], feed_dict=feed) ### END YOUR CODE # y_pred shape : (1, len(vocab)) # And y_pred[0] gives a (len(vocad), ) 1-D tensor, each element of which gives the # probability of current word # And next_word_idx would be the index of the word that has the highest probability next_word_idx = sample(y_pred[0], temperature=temp) tokens.append(next_word_idx) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] return output
def main(*args): import argparse parser = argparse.ArgumentParser( description='Run Recommendations', formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument('-u', '--user', type=str, choices=USER_FILES, default='test_user', metavar='USER', help='user file, e.g.\n' + '{{{}}}'.format(','.join(sample(USER_FILES, 3)))) parser.add_argument('-k', '--k', type=int, help='for k-means') parser.add_argument('-q', '--query', choices=CATEGORIES, metavar='QUERY', help='search for restaurants by category e.g.\n' '{{{}}}'.format(','.join(sample(CATEGORIES, 3)))) parser.add_argument('-p', '--predict', action='store_true', help='predict ratings for all restaurants') parser.add_argument('-r', '--restaurants', action='store_true', help='outputs a list of restaurant names') args = parser.parse_args() # Output a list of restaurant names if args.restaurants: print('Restaurant names:') for restaurant in sorted(ALL_RESTAURANTS, key=restaurant_name): print(repr(restaurant_name(restaurant))) exit(0) # Select restaurants using a category query if args.query: restaurants = search(args.query, ALL_RESTAURANTS) else: restaurants = ALL_RESTAURANTS # Load a user assert args.user, 'A --user is required to draw a map' user = load_user_file('{}.dat'.format(args.user)) # Collect ratings if args.predict: print(241, restaurants) ratings = rate_all(user, restaurants, feature_set()) else: restaurants = user_reviewed_restaurants(user, restaurants) names = [restaurant_name(r) for r in restaurants] ratings = {name: user_rating(user, name) for name in names} # Draw the visualization if args.k: centroids = k_means(restaurants, min(args.k, len(restaurants))) else: centroids = [restaurant_location(r) for r in restaurants] draw_map(centroids, restaurants, ratings)
def main(*args): import argparse parser = argparse.ArgumentParser(description="Run Recommendations", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( "-u", "--user", type=str, choices=USER_FILES, default="test_user", metavar="USER", help="user file, e.g.\n" + "{{{}}}".format(",".join(sample(USER_FILES, 3))), ) parser.add_argument("-k", "--k", type=int, help="for k-means") parser.add_argument( "-q", "--query", choices=CATEGORIES, metavar="QUERY", help="search for restaurants by category e.g.\n" "{{{}}}".format(",".join(sample(CATEGORIES, 3))), ) parser.add_argument("-p", "--predict", action="store_true", help="predict ratings for all restaurants") args = parser.parse_args() # Select restaurants using a category query if args.query: results = search(args.query, RESTAURANTS.values()) restaurants = {restaurant_name(r): r for r in results} else: restaurants = RESTAURANTS # Load a user assert args.user, "A --user is required to draw a map" user = load_user_file("{}.dat".format(args.user)) # Collect ratings if args.predict: ratings = rate_all(user, restaurants, feature_set()) else: restaurants = user_reviewed_restaurants(user, restaurants) ratings = {name: user_rating(user, name) for name in restaurants} # Draw the visualization restaurant_list = list(restaurants.values()) if args.k: centroids = k_means(restaurant_list, min(args.k, len(restaurant_list))) else: centroids = [restaurant_location(r) for r in restaurant_list] draw_map(centroids, restaurant_list, ratings)
def main(FLAGS): """ """ if FLAGS.mode == 'train': # Process the data train_data, test_data = process_data( data_dir=FLAGS.data_dir, split_ratio=FLAGS.split_ratio, ) # Sample sample( data=train_data, data_dir=FLAGS.data_dir, ) # Load components with open(os.path.join(basedir, FLAGS.data_dir, 'char2index.json'), 'r') as f: char2index = json.load(f) # Training train( data_dir=FLAGS.data_dir, char2index=char2index, train_data=train_data, test_data=test_data, num_epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size, num_filters=FLAGS.num_filters, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, dropout_p=FLAGS.dropout_p, ) elif FLAGS.mode == 'infer': # Inference infer( data_dir=FLAGS.data_dir, model_name=FLAGS.model_name, sentence=FLAGS.sentence, ) else: raise Exception('Choose --mode train|infer')
def generate_text(session, model, config, starting_text='<eos>', stop_length=100, stop_tokens=None, temp=1.0): """Generate text from the model. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split()] for i in xrange(stop_length): ### YOUR CODE HERE raise NotImplementedError ### END YOUR CODE next_word_idx = sample(y_pred[0], temperature=temp) tokens.append(next_word_idx) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] return output
def compute(self, size): """ self.points is a vector with n rows and d cols bi its a vector of with klogn rows and d dols dist(i) represents the sens(p_i) as in the formula discussed. """ e = w_kmeans.Kmeans(self.points, np.expand_dims(self.weights, axis=0), self.k, 10) bi = e.compute() dist = utils.get_dist_to_centers(self.points, bi) #find distance of each point to its nearset cluster if self.weights is not None: # its always not none!!! dist /= np.sum(dist) #norm dist *= 2 c = utils.get_centers(self.points, bi)#get centers c = self.find_cluester_size_weighted(c, W=self.weights)#get weighted size of center's cluster dist += ((4.0)/(c)) #add to each point the size of its cluster as at the formula t = np.sum(dist*self.weights) weights = 1/(dist*size) weights *= t # print t dist *= self.weights dist /= np.sum(dist) prob = dist # its actually the sampling probability points, weights = utils.sample(self.points, prob, size, weights=weights) return points, weights
def choose_random_class(self): """ Choose a random class, weighted by its size :return: the class name """ return sample([(class_name, len(data['class'].body) + 1) for class_name, data in self._inheritance_graph.nodes_iter(data=True)])
def choose_random_method(self): """ Choose a random method, weighted by its size :return: the method name """ return sample([(method_name, len(data['method'].body) + 1) for method_name, data in self._method_call_graph.nodes_iter(True)])
def generate_caption(self, session, img_feature,toSample=False): dp = 1 img_template = np.zeros([self.config.batch_size, self.config.img_dim]) img_template[0,:] = img_feature sent_pred = np.ones([self.config.batch_size, 1])*3591 # <SOS> while sent_pred[0,-1] != 3339 and (sent_pred.shape[1] - 1) < 50: feed = {self._sent_placeholder: sent_pred, self._img_placeholder: img_template, self._targets_placeholder: np.ones([self.config.batch_size,1]), # dummy variable self._dropout_placeholder: dp} idx_next_pred = np.arange(1, self.config.batch_size + 1)*(sent_pred.shape[1] + 1) - 1 if toSample: logits = session.run(self.logits, feed_dict=feed) next_logits = logits[idx_next_pred,:] raw_predicted = [] for row_idx in range(next_logits.shape[0]): idx = sample(next_logits[row_idx,:]) raw_predicted.append(idx) raw_predicted = np.array(raw_predicted) else: raw_predicted = session.run(self._predictions, feed_dict=feed) raw_predicted = raw_predicted[idx_next_pred] next_pred = np.reshape(raw_predicted, (self.config.batch_size,1)) sent_pred = np.concatenate([sent_pred, next_pred], 1) predicted_sentence = ' '.join(self.index2token[idx] for idx in sent_pred[0,1:-1]) return predicted_sentence
def gini_vs_mi_comparison(filename=None): sys.path.append("/home/pat/jaspar") from parse_jaspar import euk_motifs euk_motifs = [motif if len(motif) <= 200 else sample(200,motif,replace=False) for motif in euk_motifs] prok_ginis = map(motif_gini,bio_motifs) prok_mis = map(total_motif_mi,tqdm(bio_motifs)) prok_mipps = map(motif_mi_pp,tqdm(bio_motifs)) eu_ginis = map(motif_gini,jaspar_motifs) eu_mis = map(total_motif_mi,tqdm(jaspar_motifs)) eu_mipps = map(motif_mi_pp,tqdm(jaspar_motifs)) plt.subplot(1,2,1) plt.scatter(prok_ginis,prok_mipps) plt.xlabel("Gini Coefficient") plt.ylabel("MI (bits / column pair)") plt.title("Prokaryotic Motifs") plt.xlim(-.1,.7) plt.ylim(-0.1,0.7) plt.subplot(1,2,2) plt.scatter(eu_ginis,eu_mipps) plt.xlabel("Gini Coefficient") plt.xlim(-.1,.7) plt.ylim(-0.1,0.7) plt.title("Eukaryotic Motifs") plt.suptitle("Mutual Information vs Gini Coefficient") maybesave(filename)
def bio_detector_experiment_prok_euk(filename=None,pickle_filename=None): #use data from prok_euk_ic_gini_experiment; Figure 4 in Gini Paper if pickle_filename is None: prok_motifs = bio_motifs euk_motifs = [motif if len(motif) <= 200 else sample(200,motif,replace=False) for motif in euk_motifs] with open("prok_euk_ic_gini_experiment.pkl") as f: (prok_maxents, prok_uniforms, euk_maxents, euk_uniforms) = cPickle.load(f) prok_bio_ginis = map(motif_gini, prok_motifs) euk_bio_ginis = map(motif_gini, euk_motifs) prok_ps = [percentile(bio_gini,map(motif_gini,spoofs)) for bio_gini,spoofs in zip(prok_bio_ginis,prok_maxents)] prok_spoofs = [spoofs[0] for spoofs in prok_maxents] prok_neg_ps = [percentile(motif_gini(spoof),map(motif_gini,spoofs)) for spoof,spoofs in zip(prok_spoofs,prok_maxents)] euk_ps = [percentile(bio_gini,map(motif_gini,spoofs)) for bio_gini,spoofs in zip(euk_bio_ginis,euk_maxents)] euk_spoofs = [spoofs[0] for spoofs in euk_maxents] euk_neg_ps = [percentile(motif_gini(spoof),map(motif_gini,spoofs)) for spoof,spoofs in zip(euk_spoofs,euk_maxents)] with open("bio_detector_experiment_prok_euk.pkl",'w') as f: cPickle.dump((prok_ps,euk_ps,prok_neg_ps,euk_neg_ps),f) else: with open(pickle_filename) as f: (prok_ps,euk_ps,prok_neg_ps,euk_neg_ps) = cPickle.load(f) sns.set_style('white') #sns.set_palette('gray') sns.set_palette(sns.cubehelix_palette(3)) roc_curve(prok_ps + euk_ps,prok_neg_ps + euk_neg_ps,color='black') plt.xlabel("FPR",fontsize='large') plt.ylabel("TPR",fontsize='large') maybesave(filename)
def compose_async(song_key): model = get_model() while True: diversity = random.uniform(0.7, 1.0) sentence = '#' * MEMORY_LENGTH + 'X:' sentence = sentence[-MEMORY_LENGTH:] generated = 'X:' while True: x = np.zeros((1, MEMORY_LENGTH, len(model.chars))) for t, char in enumerate(sentence): x[0, t, model.char_indices[char]] = 1. preds = model.predict(x, verbose=0)[0] next_index = utils.sample(preds, diversity) next_char = model.indices_char[next_index] sentence = sentence[-MEMORY_LENGTH + 1:] + next_char generated += next_char if generated.endswith('$$$'): try: song = Song.objects.get(key=song_key) song.song = generated.rstrip('$') song.save() writer.write(song_key) except WriterException: break else: return if len(generated) > MAX_SONG_LENGTH: break
def get_arca_reads(N=None): """Return N downsampled reads from ArcA dataset""" filename = '/home/pat/chip_seq_inference/data/chip_seq_datasets/ArcA_park_et_al/SRR835423/SRR835423.map' arca_reads = read_map(filename) sampled_arca_reads = sample(N, arca_reads) if N else arca_reads sampled_read_fraction = len(sampled_arca_reads)/float(len(arca_reads)) print "sampled %1.2f%% of %s reads" % (sampled_read_fraction*100, len(arca_reads)) return sampled_arca_reads
def solve_n_queens_problem(number_of_queens, population_size=10**3, max_iterations=10**4): assert 0 < number_of_queens < 256 indices = numpy.arange(number_of_queens) # def swap_2_random_rows(population, all_rows=indices): # perm, rand_rows = sample(population, 1)[0], sample(all_rows, 2) # new_perm = perm.copy() # new_perm[rand_rows[::-1]] = perm[rand_rows[0]], perm[rand_rows[1]] # return new_perm swap_2_random_rows = lambda population, all_rows=indices: swap(sample(population, 1)[0], sample(all_rows, 2)) numb_of_parents = 2 chromo_length = number_of_queens/numb_of_parents slices = tuple(imap( apply, repeat(slice), izip_longest(*imap(xrange, (0, chromo_length), repeat(number_of_queens - 1), repeat(chromo_length))), )) def merge_2_random_solutions(population): return permutation_from_inversion( # merge two solutions by merging their inversion sequence ... numpy.fromiter( chain.from_iterable( imap( # get inversion sequence from each donor parent ... item, imap(tuple, imap(permutation_inversion, sample(population, numb_of_parents))), slices ) ), count=number_of_queens, dtype=board_element_type ) ) operators = merge_2_random_solutions, swap_2_random_rows def genetic_operators(population, sample_size, prob_of_mutation=.3): return sorted( imap(apply, imap(operators.__getitem__, random(sample_size) < prob_of_mutation), repeat((population,))), key=fitness_function, reverse=True ) return genetic_algorithm( sorted( starmap( sample, repeat((numpy.arange(number_of_queens, dtype=board_element_type), number_of_queens), population_size) ), key=fitness_function, reverse=True ), selection, genetic_operators, sort_population, lambda perm: len(perm) - fitness_function(perm), max_iterations=max_iterations )
def nn_classify(self, N, test_lc, train_files): best_matches = [] best_distances = [] best_files = [] # Read index of each lc file upto = 0 for filename in train_files: #if upto % 200 == 0: # print upto upto += 1 # Read all the light curve data into an array lc_data = open(self._testdir + '/' + filename) lc_class = filename.strip().split('_')[0] lc = [[], []] for line in lc_data: line = line.strip().split(',') lc[0].append(float(line[0])) lc[1].append(float(line[1])) lc_data.close() normalise(lc) lc = sample(lc, 400) lc = distribute(lc) # Update the nearest neighbour distance = self._distance_fn(test_lc, lc) # Find insert point insert_point = 0 found = False for insert_point, bd in enumerate(best_distances): if bd >= distance: found = True break if found or len(best_distances) == 0: best_distances.insert(insert_point, distance) best_matches.insert(insert_point, lc_class) best_files.insert(insert_point, filename) # Pop from the top of the list if it's too long if len(best_distances) > N: best_distances.pop() best_matches.pop() best_files.pop() # Compute nearest neighbor by majority near_count = {} for c in best_matches: if c not in near_count.keys(): near_count[c] = 1 else: near_count[c] += 1 #print sorted(near_count.items(), key=itemgetter(1)) return [sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files]
def k_means(restaurants, k, max_updates=100): """Use k-means to group RESTAURANTS by location into K clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing K different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids "*** YOUR CODE HERE ***" n += 1 return centroids
def make_jaspar_spoofs(): sys.path.append("/home/pat/jaspar") from parse_jaspar import jaspar_motifs jaspar_motifs = [motif if len(motif) <= 200 else sample(200,motif,replace=False) for motif in jaspar_motifs] maxent_spoofs = [spoof_motifs_maxent(motif,10,verbose=True) for motif in tqdm(jaspar_motifs,desc='jaspar_motifs')] uniform_spoofs = [spoof_motifs_uniform(motif,10,verbose=True) for motif in tqdm(jaspar_motifs,desc='jaspar_motifs')] oo_spoofs = [spoof_motifs_oo(motif,10) for motif in tqdm(jaspar_motifs,desc='jaspar_motifs')] gle_spoofs = [spoof_motifs_gle(motif,10,verbose=True) for motif in tqdm(jaspar_motifs,desc='jaspar_motifs')]
def seed(self): k = self.k - 1 centers = [] prob = self.w / np.sum(self.w) center = utils.sample(self.p, 1, prob) centers.append(center[0]) min_dist = None while k > 0: np_centers = np.array(centers) if min_dist is None: d = utils.get_sq_distances(x=self.p, y=np_centers).ravel() min_dist = d else: d = utils.get_sq_distances(x=self.p, y=np.array([np_centers[-1]])).ravel() min_dist = np.minimum(min_dist, d) dist = np.array(min_dist) dist *= self.w prob = dist / np.sum(dist) center = utils.sample(self.p, 1, prob) centers.append(center[0]) k -= 1 return np.array(centers, dtype=np.float64)
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 centroids=[find_centroid(x) for x in group_by_centroid(restaurants,centroids)] # END Question 6 n += 1 return centroids
def k_means(events, k, max_updates=100): """Use k-means to group events by location into K clusters.""" assert len(events) >= k, 'Not enough events to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing K different restaurants centroids = [event for event in sample(events, k)] with_centroids = [] while old_centroids != centroids and n < max_updates: old_centroids = centroids clusterlist = group_by_centroid(events, centroids) centroids = [find_centroid(r) for r in clusterlist] n += 1 return group_by_centroid(events, centroids)
def k_means(restaurants, k, max_updates=100): """Use k-means to group RESTAURANTS by location into K clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing K different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids groups = group_by_centroid(restaurants, centroids) centroids = [find_centroid(groups[i]) for i in range(len(groups))] n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group `restaurants` by location into `k` clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 "*** REPLACE THIS LINE ***" # END Question 6 n += 1 return centroids
def merge_2_random_solutions(population): return permutation_from_inversion( # merge two solutions by merging their inversion sequence ... numpy.fromiter( chain.from_iterable( imap( # get inversion sequence from each donor parent ... item, imap(tuple, imap(permutation_inversion, sample(population, numb_of_parents))), slices ) ), count=number_of_queens, dtype=board_element_type ) )
def compute(self, size, grnds=10, ginit=1): q = w_KMeans.KMeans(self.p, np.expand_dims(self.w , axis=0), self.k, grnds, ginit).compute() # this is my kmeans for the coreset. sq_d = utils.get_sq_distances(self.p, q) # Squared distances from each point to each center dist = utils.get_dist_to_centers(d=sq_d) # I get the sq dist from each point its center. dist /= np.sum(dist) # Norm dist *= 2 # according to the paper c = utils.get_centers(d=sq_d) # I get the index of the center c = self._find_cluster_size(c) # Find the size of the cluster for each point. s = dist + 4.0/c # I add it, the 4 is according to the paper. t = np.sum(s*self.w) # This is the t from the paper. u = t/(s*size) # the new weights for coreset. prob = s*self.w/t # the probability for sampling p, w = utils.sample(self.p, size, prob=prob, weights=u) # sample coreset: points + weights. return p, w
def k_means(restaurants, k, max_updates=100): """Use k-means to group `restaurants` by location into `k` clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids clusters_lst=group_by_centroid(restaurants, old_centroids) centroids=[] for cluster in clusters_lst: centroids+=[find_centroid(cluster)] n += 1 return centroids
def TestPosModel(model, dimIn, dimOut): print('Testing model...') text = [w for s in texts[5] for w in s]+\ [w for s in texts[6] for w in s] success = [0,0,0,0] failure = [0,0,0,0] for run in range(0,10): for iteration in range(0,100): start = random.randint(0, len(text) - maxlen - 2) x = np.zeros((1, dimIn, dimOut), dtype=np.bool) for t,token in enumerate(text[start:start+maxlen]): for p in word_indices[getFuzzyMatch(token, word_indices)][1]: x[0,t,pos_indices[p]]=1 next = np.zeros((dimOut), dtype=np.bool) for p in word_indices[getFuzzyMatch(text[start+maxlen], word_indices)][1]: next[pos_indices[p]] = 1 preds = model.predict(x)[0] if next[sample(preds)]: success[0] += 1 success[1] += 1 success[2] += 1 success[3] += 1 else: failure[0] += 1 if next[sample(preds)]: success[1] += 1 success[2] += 1 else: failure[1] += 1 if next[sample(preds)] or next[sample(preds)]: success[2] += 1 success[3] += 1 else: failure[2] += 1 if next[sample(preds)] or next[sample(preds)] or next[sample(preds)] or next[sample(preds)]: success[3] += 1 else: failure[3] += 1 print('round: '+str(run+1)) print(rate(success[0],failure[0])) print(rate(success[1],failure[1])) print(rate(success[2],failure[2])) print(rate(success[3],failure[3]))
def k_means(restaurants, k, max_updates=100): """Use k-means to group RESTAURANTS by location into K clusters.""" assert len(restaurants) >= k, "Not enough restaurants to cluster" old_centroids, n = [], 0 # Select initial centroids randomly by choosing K different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids lst = group_by_centroid(restaurants, centroids) centroids = [] for r in lst: centroids.append(find_centroid(r)) n += 1 return centroids
def create_class(self): superclass_name = None if random() > self.p_no_inherit: class_names = [] num_subclasses = [] for node, in_degree in self.inheritance_graph.in_degree_iter(): class_names.append(node) num_subclasses.append(in_degree) superclass_name = sample(class_names, [n + 1 for n in num_subclasses]) klass = self.code_modifier.create_class(superclass_name) self.inheritance_graph.add_node(klass.name, {'class': klass}) if superclass_name: self.inheritance_graph.add_edge(klass.name, superclass_name) return klass
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids clusters = group_by_centroid(restaurants, centroids) new_centroid = [] for i in clusters: new_centroid.append(find_centroid(i)) centroids = new_centroid n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group RESTAURANTS by location into K clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing K different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids lst = group_by_centroid(restaurants, centroids) centroids = [] for r in lst: centroids.append(find_centroid(r)) n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group `restaurants` by location into `k` clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 clus = group_by_centroid(restaurants, old_centroids) centroids = map_and_filter(clus, find_centroid, lambda x: len(x)) # END Question 6 n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids clusters = group_by_centroid(restaurants, centroids) centroids = [find_centroid(cluster) for cluster in clusters] """ 1. group restaurants next to closest centroids 2. find new centroid for cluster """ n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] #i guess eventually centroids becomes as optimized as possible? while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 clusters = group_by_centroid(restaurants, centroids) centroids = [find_centroid(c) for c in clusters] # END Question 6 n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group RESTAURANTS by location into K clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing K different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids "*** YOUR CODE HERE ***" # group restaurants by the closest centroid centroid_restaurants = group_by_centroid(restaurants, centroids) # get the centroid of the each grouped restaurants centroids = [find_centroid(x) for x in centroid_restaurants] n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 "*** YOUR CODE HERE ***" clusters = group_by_centroid(restaurants, centroids) centroids = [find_centroid(cluster) for cluster in clusters] # END Question 6 n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 centroids = list( map(lambda restaurant: find_centroid(restaurant), group_by_centroid(restaurants, centroids))) # END Question 6 n += 1 return centroids
def generate_text(session, model, config, starting_text='<eos>', stop_length=100, stop_tokens=None, temp=1.0): """Generate text from the model. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. # 输入text,输出List of word idxs,来generate_text Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split() ] # 把输入text分解成word,再转化成one hot向量 for i in xrange(stop_length): ### YOUR CODE HERE feed = { model.input_placeholder: [tokens[-1:]], model.initial_state: state, model.dropout_placeholder: 1 } state, y_pred = session.run( [model.final_state, model.predictions[-1]], feed_dict=feed) # 用model去预测,得到state, y_pred ### END YOUR CODE next_word_idx = sample(y_pred[0], temperature=temp) # 下一个单词在词库里的位置idx tokens.append(next_word_idx) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] # 将tokens里的one hot向量解码成word return output
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] # BEGIN Question 6 while old_centroids != centroids and n < max_updates: old_centroids = centroids "*** REPLACE THIS LINE ***" clusters = group_by_centroid(restaurants, centroids) centroids = [] for cluster in clusters: centroids.append(find_centroid(cluster)) # END Question 6 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants #1) create a cluster for each centroid consisting of all elements closest to xthat centroid. centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 "*** REPLACE THIS LINE ***" group = group_by_centroid(restaurants, old_centroids) centroids = [find_centroid(i) for i in group] # END Question 6 n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] cluster = [] while old_centroids != centroids and n < max_updates: old_centroids = centroids cluster = group_by_centroid(restaurants, centroids) centroids = [] # BEGIN Question 6 for x in range(len(cluster)): centroids += [find_centroid(cluster[x])] # END Question 6 n += 1 return centroids
def on_epoch_end(epoch, logs): c = "S" print("\n", end="") for _ in range(predict_length): print(c, end="") inp = np.zeros([batch_size, 1, VOCAB_SIZE]) for i in range(batch_size): inp[i][0][encode(ord(c))] = 1.0 prob = model.predict(inp, batch_size=batch_size) prob = np.reshape(prob, [batch_size, VOCAB_SIZE]) prob = np.sum(prob, axis=0) rc = sample(prob) c = chr(decode(rc)) print("\n")
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 cluster = group_by_centroid(restaurants, old_centroids) #先按照中心点,将他们分割成几个集合 centroids = [] for item in cluster: centroids.append(find_centroid(item)) # END Question 6 n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: # != is not equal to old_centroids = centroids # BEGIN Question 6 clusters = group_by_centroid(restaurants, old_centroids) for index in range(0, len(clusters)): centroids[index] = find_centroid(clusters[index]) # END Question 6 n += 1 return centroids
def mod_extra_robot_parts(self, visible=True): """add distractor parts of robots in the lower area of the camera frame""" N = 3 self._set_visible("robot_part", N, visible) if not visible: return # Project difference into camera coordinate frame cam_pos = self.model.cam_pos[0] cam_quat = np.quaternion(*self.model.cam_quat[0]) lower_range = Range3D([0.0, 0.0], [-0.2, -0.3], [-0.2, -0.3]) lower_size = Range3D([0.2, 0.6], [0.01, 0.15], [0.01, 0.15]) lower_angle = Range3D([-85.0, -95.0], [-180, 180], [-85, -95]) upper_range = Range3D([-0.6, 0.6], [-0.05, 0.05], [-0.05, 0.05]) upper_size = Range3D([0.005, 0.05], [0.005, 0.05], [0.01, 0.3]) upper_angle = Range3D([-85.0, -95.0], [-180, 180], [-85, -95]) name = "robot_part0" lower_bid = self.model.body_name2id(name) lower_gid = self.model.geom_name2id(name) lower_pos = cam_pos + quaternion.rotate_vectors( cam_quat, sample_xyz(lower_range)) self.model.body_pos[lower_bid] = lower_pos self.model.geom_size[lower_gid] = sample_xyz(lower_size) self.model.geom_quat[lower_gid] = sample_quat(lower_angle) self.model.geom_type[lower_gid] = sample_geom_type(reject=["capsule"]) if self.model.geom_type[lower_gid] == 5: self.model.geom_size[lower_gid][0] = self.model.geom_size[ lower_gid][2] for i in range(1, 10): name = "robot_part{}".format(i) upper_bid = self.model.body_name2id(name) upper_gid = self.model.geom_name2id(name) upper_pos = lower_pos + sample_xyz(upper_range) self.model.body_pos[upper_bid] = upper_pos self.model.geom_size[upper_gid] = sample_xyz(upper_size) self.model.geom_type[upper_gid] = sample_geom_type() # 50% of the time, choose random angle instead reasonable angle if sample([0, 1]) < 0.5: self.model.geom_quat[upper_gid] = sample_quat(upper_angle) else: self.model.geom_quat[upper_gid] = random_quat()
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 listCentroids, clusterList = [], [] clustersList = group_by_centroid(restaurants, old_centroids) for i in clustersList: listCentroids.append(find_centroid(i)) centroids = listCentroids # END Question 6 n += 1 return centroids
def mod_lights(self): """Randomize pos, direction, and lights""" # light stuff LIGHT_RX = Range(LEFTX, RIGHTX) LIGHT_RY = Range(BINY, DIGY) LIGHT_RZ = Range(AFZ, AFZ + ZHIGH) LIGHT_RANGE3D = Range3D(LIGHT_RX, LIGHT_RY, LIGHT_RZ) LIGHT_UNIF = Range3D(Range(0, 1), Range(0, 1), Range(0, 1)) for i, name in enumerate(self.model.light_names): lid = self.model.light_name2id(name) # random sample 80% of any given light being on self.light_modder.set_active(name, sample([0, 1]) < 0.8) #self.light_modder.set_active(name, 0) dir_xyz = sample_light_dir() self.light_modder.set_pos(name, sample_xyz(LIGHT_RANGE3D)) self.light_modder.set_dir(name, dir_xyz) self.light_modder.set_specular(name, sample_xyz(LIGHT_UNIF))
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: # initializing new centroids new_centroids = [] old_centroids = centroids # return a group of restaurants that have same centroid groups = group_by_centroid(restaurants, centroids) for group in groups: new_centroids.append(find_centroid(group)) # Bind centroids to a new list of the centroids of all the clusters centroids = new_centroids n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 "*** REPLACE THIS LINE ***" centroids = [ find_centroid(cluster) for cluster in group_by_centroid(restaurants, centroids) ] #call find_centroid on each cluster of restaurants created by group_by_centroidsa # END Question 6 n += 1 return centroids
def main(): codec, model, config = setup() from utils.sample import sample with open(os.path.join('submit', 'samples.txt'), 'w', encoding='utf-8') as f: for i in range(len(titles)): start_text = titles[i] start_text = codec.encode(start_text).to(device) text = sample(model, start_text, config, codec) text = codec.decode(text.tolist()[0]) f.write('=' * 50 + " SAMPLE_{} ".format(i) + '=' * 50 + '\n') f.write(text + '\n') print('=' * 50 + " SAMPLE_{} ".format(i) + '=' * 50 + '\n') print("Prompt: " + titles[i]) print(text) print("# Samples written to samples.txt.") return 0
def generate_text(session, model, config, starting_text='<eos>', stop_length=100, stop_tokens=None, temp=1.0): """Generate text from the model. Note that batch_size and num_steps are both 1. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Hint: Dropout rate should be 1 for this work. Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split()] print("tokens in 'generate_text': ", tokens) for i in range(stop_length): feed = { model.input_placeholder: [tokens[-1:]], model.initial_state: state, model.dropout_placeholder: 1 } state, y_pred = session.run([model.final_state, model.predictions[-1]], feed_dict=feed) next_word_idx = sample(y_pred[0], temperature=temp) tokens.append(next_word_idx) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] return output
def mod_extra_judges(self, visible=True): """mod NASA judges around the perimeter of the arena""" # TODO: might want to add regions on the sides of the arena, but these # may be covered by the distractors already N = 5 self._set_visible("judge", N, visible) if not visible: return JUDGE_XRANGE = Range(0.1, 0.2) JUDGE_YRANGE = Range(0.1, 0.2) JUDGE_ZRANGE = Range(0.75, 1.0) JUDGE_SIZE_RANGE = Range3D(JUDGE_XRANGE, JUDGE_YRANGE, JUDGE_ZRANGE) digwall_bid = self.model.body_name2id("dig_wall") digwall_gid = self.model.geom_name2id("dig_wall") digwall_center = self.model.body_pos[digwall_bid] digwall_geo = self.model.geom_size[digwall_gid] digwall_xrange = Range(-1.0 + digwall_center[0] - digwall_geo[0], 1.0 + digwall_center[0] + digwall_geo[0]) digwall_yrange = Range(digwall_center[1] + 0.5, digwall_center[1] + 1.5) digwall_zrange = JUDGE_ZRANGE - 0.75 digwall_range = Range3D(digwall_xrange, digwall_yrange, digwall_zrange) for i in range(N): name = "judge{}".format(i) judge_bid = self.model.body_name2id(name) judge_gid = self.model.geom_name2id(name) #self.model.geom_quat[judge_gid] = jitter_quat(self.start_geom_quat[judge_gid], 0.05) self.model.geom_quat[judge_gid] = random_quat() self.model.geom_size[judge_gid] = sample_xyz(JUDGE_SIZE_RANGE) self.model.geom_type[judge_gid] = sample_geom_type() if self.model.geom_type[judge_gid] == 3 or self.model.geom_type[ judge_gid] == 5: self.model.geom_size[judge_gid][1] = self.model.geom_size[ judge_gid][2] self.model.body_pos[judge_bid] = sample_xyz(digwall_range) # 50% chance of invisible self.model.geom_rgba[judge_gid][-1] = sample([0, 1]) < 0.5
def build_b_kernel(self, velocity: ti.template(), kappa: ti.template(), n: ti.template(), b: ti.template()): offset = 0.5 * (1 - ti.Vector.unit(self.dim, self.d)) for I in ti.grouped(b): b[I] = velocity[I] / self.dt scale = self.sigma * self.simulator.level_set.delta( \ utils.sample(self.simulator.level_set.phi, I + offset)) # calculate Du/Dn grad_u = ti.Vector.zero(self.real, self.dim) for k in ti.static(range(self.dim)): unit = ti.Vector.unit(self.dim, k) # grad_u[k] -= velocity[I] # if I[k] + 1 < self.res[k]: grad_u[k] += velocity[I + unit] # grad_u[k] /= self.dx grad_u[k] += (utils.sample(velocity, I + unit * 0.5) - \ utils.sample(velocity, I - unit * 0.5)) / self.dx Du_Dn = grad_u.dot(n[I]) # calculate D2u/Dn2 D2 = ti.Matrix.zero(self.real, self.dim, self.dim) for k in ti.static(range(self.dim)): unit = ti.Vector.unit(self.dim, k) if I[k] - 1 >= 0: D2[k, k] += velocity[I - unit] - velocity[I] if I[k] + 1 < self.res[k]: D2[k, k] += velocity[I + unit] - velocity[I] D2[k, k] /= (self.dx**2) for k1 in ti.static(range(self.dim)): for k2 in ti.static(range(self.dim)): unit1 = ti.Vector.unit(self.dim, k1) unit2 = ti.Vector.unit(self.dim, k2) # v00 = velocity[I] # v10 = velocity[I + unit1] if I[k1] + 1 < self.res[k1] else 0 # v01 = velocity[I + unit2] if I[k2] + 1 < self.res[k2] else 0 # v11 = velocity[I + unit1 + unit2] if I[k1] + 1 < self.res[k1] and I[k2] + 1 < self.res[k2] else 0 v00 = utils.sample(velocity, I - unit1 * 0.5 - unit2 * 0.5) v10 = utils.sample(velocity, I + unit1 * 0.5 - unit2 * 0.5) v01 = utils.sample(velocity, I - unit1 * 0.5 + unit2 * 0.5) v11 = utils.sample(velocity, I + unit1 * 0.5 + unit2 * 0.5) D2[k1, k2] = (v11 + v00 - v10 - v01) / (self.dx**2) D2u_Dn2 = (n[I].transpose() @ D2 @ n[I])[0, 0] b[I] -= scale * (kappa[I] * n[I][self.d] + self.dt * (D2u_Dn2 + kappa[I] * Du_Dn))
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 "*** REPLACE THIS LINE ***" "We make an array temp to gather values, which values will later be reassigned to centroids" temp=[] for cluster in group_by_centroid(restaurants,centroids): temp+=[find_centroid(cluster)] centroids=temp # END Question 6 n += 1 return centroids
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids new_list = [] # BEGIN Question 6 answer = group_by_centroid(restaurants, old_centroids) for elem in answer: answer_2 = find_centroid(elem) new_list += [answer_2] centroids = new_list # END Question 6 n += 1 return centroids
def _get_pool_data(self): IMAGE_NOISE_RVARIANCE = Range(0.0, 0.0001) cam_imgs, ground_truths = self.pool.render(640, 360, camera_name='camera1', randomize=True) ground_truths = list(ground_truths) cam_imgs = list( cam_imgs[:, ::-1, :, :]) # Rendered images are upside-down. for i in range(len(cam_imgs)): image_noise_variance = sample(IMAGE_NOISE_RVARIANCE) cam_imgs[i] = (skimage.util.random_noise( cam_imgs[i], mode='gaussian', var=image_noise_variance) * 255).astype(np.uint8) cam_imgs[i] = preproc_image(cam_imgs[i]) return cam_imgs, ground_truths
def start_text(start_text, n_words=250): # Here we have loaded in a model that trained over 20 epochs `rnn_20_epoch.net` with open('saved_model/rnn_20_epoch.net', 'rb') as f: checkpoint = torch.load(f) loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers']) loaded.load_state_dict(checkpoint['state_dict']) generated_text = sample(loaded, n_words, top_k=5, prime='{} '.format(start_text)) generated_text = generated_text.replace('\n', ' ') generated_text = '{}.'.format(generated_text.split('.')[0]) return generated_text
def generate_samples(): weights_fpath = 'weights.pickle' # weights from which to initialize text_fpath = 'parsed.txt' # training data text file, to build vocabulary grad_clipping = 100. num_hidden = 512 train_seq_length, sample_seq_length = 20, 200 text, vocab = utils.parse(text_fpath) # need to build the same encoder as during training, could pickle encoder = LabelEncoder() encoder.fit(list(vocab)) vocab_size = len(vocab) layers = char_rnn.build_model( (None, train_seq_length, vocab_size), # input_shape num_hidden, vocab_size, grad_clipping) print('loading model weights from %s' % (weights_fpath)) char_rnn.load_weights(layers['l_out'], weights_fpath) print('compiling theano function for sampling') sample = theano_funcs.create_sample_func(layers) try: while True: # prompt the user for a phrase to initialize the sampling phrase = raw_input('start a phrase of at least %d chars:\n' % (train_seq_length)) if len(phrase) < train_seq_length: print('len(phrase) = %d, need len(phrase) >= %d' % (len(phrase), train_seq_length)) continue generated_phrase = utils.sample(sample, phrase, train_seq_length, sample_seq_length, vocab_size, encoder) print('%s\n' % (generated_phrase)) except KeyboardInterrupt: print('caught ctrl-c') print('done')
def generate_text(session, model, config, starting_text='<eos>', stop_length=100, stop_tokens=None, temp=1.0): """Generate text from the model. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split()] for i in xrange(stop_length): ### YOUR CODE HERE # raise NotImplementedError # todo, load embeddings # todo, predict self.final_state # todo, get projections # todo, sample from projections ### END YOUR CODE next_word_idx = sample(y_pred[0], temperature=temp) tokens.append(next_word_idx) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] return output
def k_means(restaurants, k, max_updates=100): """Use k-means to group restaurants by location into k clusters.""" assert len(restaurants) >= k, 'Not enough restaurants to cluster' old_centroids, n = [], 0 # Select initial centroids randomly by choosing k different restaurants centroids = [restaurant_location(r) for r in sample(restaurants, k)] while old_centroids != centroids and n < max_updates: old_centroids = centroids # BEGIN Question 6 "*** YOUR CODE HERE ***" # Create a cluster for each centroid consisting of all elements closest to # that centroid. clusters = group_by_centroid(restaurants, old_centroids) # Find the centroid (average position) of each cluster. centroids = [find_centroid(cluster) for cluster in clusters] # END Question 6 n += 1 return centroids