def load_classifier(self, path): """Loads a trained classifier from file. IMPORTANT: Doesn't load the associated corpus, so it can't train, just classify/test.""" main_clf = utils.load_object(path) sub_clfs = utils.load_object(path + '1') self.clf.set_classifier(main_clf) self.clf.subclassifiers = sub_clfs
def crawl(self, **kwargs): from crawler.settings import spider, pipeline # 把引擎看成main函数 # 从数据库读取链接 spider = load_object(spider)(**kwargs) # 需要 start_urls pipeline = load_object(pipeline)(**kwargs) # 需要logger self.engine.open_spider(spider, pipeline, iter(spider.start_requests()))
def run_test(config): models = config['models'] total_mse = [0 for i in range(len(models))] total_rmse = [0 for i in range(len(models))] total_r2 = [0 for i in range(len(models))] total_adj_r2 = [0 for i in range(len(models))] total_accuracy = [0 for i in range(len(models))] total_balanced_accuracy = [0 for i in range(len(models))] print("Training and testing") for i, model in enumerate(models): print(model) temp_config = get_config({**config, 'model': model}) temp_config['print'] = config['print'] data_train, labels_train = load_object(temp_config['data_path']) data_test, labels_test = load_object(temp_config['test_path']) baseline_targets_train = np.array( load_object('./data_processed/baseline_targets_train.pkl'), dtype=object) baseline_targets_test = np.array( load_object('./data_processed/baseline_targets_test.pkl'), dtype=object) fold = [data_train, labels_train, data_test, labels_test] fold_base = [baseline_targets_train, baseline_targets_test] if model == 'baseline': mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model( temp_config, fold, fold_base) else: mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model( temp_config, fold) total_mse[i] += mse total_rmse[i] += rmse total_r2[i] += r2 total_adj_r2[i] += adj_r2 total_accuracy[i] += accuracy total_balanced_accuracy[i] += balanced_accuracy # Print the results in a table table = [['mse'] + total_mse, ['root_mse'] + total_rmse, ['r2_score'] + total_r2, ['adj_r2_score'] + total_adj_r2, ['accuracy'] + total_accuracy, ['bal_accuracy'] + total_balanced_accuracy] pd.DataFrame(table, columns=models).to_csv("final_results.csv") print(tabulate(table, headers=['metrics'] + models, tablefmt="fancy_grid")) # plain
def load(model_root, model_name): path = os.path.join(model_root, model_name) loss = utils.load_object(os.path.join(path, model_name + "_loss")) params = utils.load_object(os.path.join(path, model_name + "_params")) vocab = utils.load_object(os.path.join(path, model_name + "_vocab")) model = SkipGramModel(vocab, params["embedding_dim"], params["use_cuda"]) model.load_state_dict(torch.load(os.path.join(path, model_name))) return (model, loss, params)
def crawl(self, **kwargs): engine = Engine(self) # 需要考虑,两个爬虫是否共用一个engine;加入self会占用很多内存 from crawler.settings import spider, pipeline # 把引擎看成main函数 # 从数据库读取链接 if 'spider' in kwargs: spider = kwargs['spider'] spider = load_object(spider)(**kwargs) # 需要 start_urls if 'pipeline' in kwargs: pipeline = kwargs['pipeline'] pipeline = load_object(pipeline)(**kwargs) # 需要logger engine.open_spider(spider, pipeline, iter(spider.start_requests()))
def __init__(self, path, api: ApiAdapter): pushed_snapshot_path = os.path.join( path, os.path.join(".course_git", "pushed_snapshot.pkl")) commited_snapshot_path = os.path.join( path, os.path.join(".course_git", "commited_snapshot.pkl")) if not os.path.exists(".course_git"): raise Exception("There is no repository in this directory") if not os.path.exists(pushed_snapshot_path) or not os.path.exists( commited_snapshot_path): raise Exception("Invalid sync directory") self.pushed_snapshot = utils.load_object(pushed_snapshot_path) self.commited_snapshot = utils.load_object(commited_snapshot_path)
def generate_rdm_all_gradient(nnet, name, blanks, rdm_type=analysis.SPEARMAN, save_files=True, title="RDM training combined", from_file=False, delete_blank_states=True): if not from_file: if rdm_type != analysis.SPEARMAN: raise Exception("not implemented") hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all( nnet) hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari( nnet, blanks) hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev( nnet, blanks) print("Both: {0}, {1}".format(accuracy_totals_both, accuracy_fullseqs_both)) print("Ari: {0}, {1}".format(accuracy_totals_ari, accuracy_fullseqs_ari)) print("Bev: {0}, {1}".format(accuracy_totals_bev, accuracy_fullseqs_bev)) hidden = utils.flatten_onelevel(hidden_bev) +\ utils.flatten_onelevel(hidden_ari) +\ utils.flatten_onelevel(hidden_both) hidden_left = [] hidden_right = [] for vector in hidden: hidden_left.append(vector[:len(vector) // 2]) hidden_right.append(vector[len(vector) // 2:]) # Now cut the hidden layer in two. rdmatrix_left = analysis.rdm_spearman(hidden_left) rdmatrix_right = analysis.rdm_spearman(hidden_right) # save the massive rdm for debug purposes (so that I don't have to generate it all over again everytime). utils.save_object(name + "rdmatright", rdmatrix_right) utils.save_object(name + "rdmatleft", rdmatrix_left) else: rdmatrix_left = utils.load_object(name + "rdmatleft") rdmatrix_right = utils.load_object(name + "rdmatright") rdmatrix_left, labels = model2.process_matrix(rdmatrix_left, delete_blank_states) rdmatrix_right, _ = model2.process_matrix(rdmatrix_right, delete_blank_states) return rdmatrix_left, rdmatrix_right, labels
def generate_rdm_all(nnet, name, rdm_type=analysis.SPEARMAN, save_files=True, title="RDM training combined", from_file=False, delete_blank_states=True, collapse_rdm=True): if not from_file: if rdm_type != analysis.SPEARMAN: raise Exception("not implemented") hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all( nnet) hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari( nnet, blanks=True) hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev( nnet, blanks=True) print("Both: {0}, {1}".format(accuracy_totals_both, accuracy_fullseqs_both)) print("Ari: {0}, {1}".format(accuracy_totals_ari, accuracy_fullseqs_ari)) print("Bev: {0}, {1}".format(accuracy_totals_bev, accuracy_fullseqs_bev)) hidden = utils.flatten_onelevel(hidden_bev) +\ utils.flatten_onelevel(hidden_ari) +\ utils.flatten_onelevel(hidden_both) rdmatrix = analysis.rdm_euclidian(hidden) utils.save_object(name + "rdmat", rdmatrix) else: rdmatrix = utils.load_object(name + "rdmat") return model2.process_matrix(rdmatrix, delete_blank_states)
def make_rdm_and_mds_reg_hierarchy(name): model = utils.load_object(name, 1) # eg 'noise_test2' hidden = accuracy_test_reg_hierarchy(model) # Turn a list of tensors into a list of np vectors for i, tensor in enumerate(hidden): hidden[i] = tensor.numpy().reshape(-1) # Now cut that in two left_units = [vector[:len(hidden) / 2] for vector in hidden] right_units = [vector[len(hidden) / 2:] for vector in hidden] # Make the labels for the rdms labels = [] for i, sequence in enumerate(pnas2018task.seqs): for action in sequence[1:]: labels.append(str(i) + '_' + action) for side in [[left_units, "left units"], [right_units, "right_units"]]: rdm = analysis.rdm_spearman(side[0]) analysis.plot_rdm(rdm, labels, "Spearman rho matrix" + side[1]) #for i in range(4): # mdsy = analysis.mds(side[0][6*i:6*i+6]) # analysis.plot_mds_points(mdsy, range(len(mdsy)), labels=labels[6*i:6*i+6]) mdsy = analysis.mds(side[0]) for i, style in enumerate(['ro-', 'b|--', 'gx-.', 'k_:']): analysis.plot_mds_points(mdsy[6 * i:6 * i + 6], range(6), labels=labels[6 * i:6 * i + 6], style=style)
def plot_pd_overlap(subjects=[1],fix_pd=True,threshold=0.3): face_dataset = utils.load_object("data_1_50_fixPD_Label_False.pkl") figs = [] for subject_idx in subjects: # [0,70,140,...] start_idx = ((subject_idx*70)-70) # [69,139,209,...] stop_idx = (subject_idx*70)-1 # prepare pd_signal numpy array pd_signals = [] fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 12)) axes.grid(True) for i in range(start_idx,stop_idx+1): if fix_pd: output, _, _ = detect_glitch(face_dataset[i]['PD_avg_filtered'],threshold=threshold) else: output = face_dataset[i]['PD_avg_filtered'] pd_signals.append(output) axes.plot(output) fig.suptitle("Testsubject: " + str(subject_idx)) figs.append(fig) print(subject_idx) return figs
def results(): if request.method == 'GET': data = utils.load_object("data.pkl") elif request.method == 'POST': if 'file' not in request.files: print('No file part') file = request.files['file'] if file.filename == '': print('No selected file') if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) raw_reviews_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) print(raw_reviews_path) data = main.main(raw_reviews_path) utils.save_object(data, "data.pkl") new_data = get_processed_data(data) return render_template('results.html', bar_chart=new_data['bar_chart'], pie_chart=new_data['pie_chart'], ratings=data[5], stars=new_data['stars'], table=zip(new_data['markup_sents'], new_data['sentiment_colors']), categories=Const.CATEGORIES, tuples=new_data['pretty_tuples'], )
def get_faps_np_df(pickle_file='data_1_51.pkl'): face_dataset = utils.load_object(pickle_file) faps_df = pd.DataFrame(face_dataset[:]['faceFAP']) faps_df.columns = ['faps'] faps_df['ori_idx'] = [i for i in range(len(face_dataset))] faps_df['faps'] = faps_df['faps'].apply(lambda x: np.array(x)) return faps_df
def make_rdm_and_mds(name, with_goals=False): model = utils.load_object(name, 1) # eg 'noise_test2' if with_goals: hidden = pnashierarchy.accuracy_test_with_goals(model) else: hidden = accuracy_test(model, noise) # Turn into a list of simple vectors for i, tensor in enumerate(hidden): hidden[i] = tensor.numpy().reshape(-1) rdmatrix = analysis.rdm_spearman(hidden) labels = [] for i, sequence in enumerate(pnas2018task.seqs): for action in sequence[1:]: labels.append(str(i) + '_' + action) analysis.plot_rdm(rdmatrix, labels, "Spearman rho matrix") for i in range(4): mdsy = analysis.mds(hidden[6 * i:6 * i + 6]) analysis.plot_mds_points(mdsy, range(len(mdsy)), labels=labels[6 * i:6 * i + 6]) mdsy = analysis.mds(hidden) for i, style in enumerate(['ro-', 'b|--', 'gx-.', 'k_:']): analysis.plot_mds_points(mdsy[6 * i:6 * i + 6], range(6), labels=labels[6 * i:6 * i + 6], style=style, show=(i == 3))
def read_score(folder, dat_id, descr_id, clf_id): """Read test scores from a file and compute the average value Parameters ---------- folder : string Full path of the folder where data are saved. dat_id : string Short name of a dataset. descr_id : string Short name of a descriptor. clf_id : string Short name of a classifier. Returns ------- ts_avg : float Average of test scores. """ result_path = utils.filepath(folder, dat_id, descr_id, clf_id) if os.path.isfile(result_path): result = utils.load_object(result_path) test_scores = [ts for _, ts in result] ts_avg = 100 * np.mean(test_scores) return ts_avg else: return None
def get_user_embedding(sess, user_id): # If user_to_behave is already run path_user_to_behave = os.path.join(os.path.dirname(config.save_path), 'user_to_behave.pkl') if os.path.isfile(path_user_to_behave): user_to_behave = load_object(path_user_to_behave) else: user_to_behave = get_user_behave() # Get embeddings embeddings = get_embeddings(sess) # Get embeddings for each event embed_events = [] for event in user_to_behave[user_id]: embed_event = [] for feature in range(len(event)): num_cat_value = config.feature_desc[feature] if num_cat_value == 1: embed_event.append(list(event[feature])) else: embed_event.append((embeddings[num_cat_value][event[feature]])) embed_events.append(embed_event) return embed_events
def load(model_root, model_name): path = os.path.join(model_root, model_name) loss = utils.load_object(os.path.join(path, model_name + "_loss")) params = utils.load_object(os.path.join(path, model_name + "_params")) vocab_x = utils.load_object(os.path.join(path, model_name + "_vocab_x")) vocab_y = utils.load_object(os.path.join(path, model_name + "_vocab_y")) model = EmbedAlignModel(vocab_x, vocab_y, params["embedding_dim"], random_state=params["random_state"], use_cuda=params["use_cuda"]) model.load_state_dict(torch.load(os.path.join(path, model_name))) return (model, loss, params)
def aggregate_participants_by_mean(): dfs = load_object("data_processed/subdata_pr_su.pkl")[0] averages_df = [] for df in dfs: averages_per_variable = pd.DataFrame(df).mean(axis=0) averages_df.append(averages_per_variable) return pd.DataFrame(averages_df)
class Config: main_path = os.path.abspath("/home/mikey/Data/POETdataset/PascalImages/") class_names = [ 'dog', 'aeroplane', 'boat', 'bicycle', 'cat', 'cow', 'diningtable', 'horse', 'motorbike', 'sofa' ] IMG_SIZE = 60 * 4 * 2 PATCH_WIDTH = 120 SMALLER_IMG_SIZE = 150 T = int(IMG_SIZE / PATCH_WIDTH)**2 new_dir = 'soft_attention_features_' + str(PATCH_WIDTH) new_dir_img = 'soft_attention_images_' + str(PATCH_WIDTH) name_to_class_dict = { class_name: i for i, class_name in enumerate(class_names) } train_ids, test_ids = utils.load_object( '../train_ids.pkl'), utils.load_object('../test_ids.pkl')
def main(bo_input_directory, random_input_directory, output_directory): BO_iter = load_object(bo_input_directory + '/iterations.dat') random_iter = load_object(random_input_directory + '/iterations.dat') # 1.654 BO_vals = load_object(bo_input_directory + '/best_vals.dat') # 0.682 random_vals = load_object(random_input_directory + '/best_vals.dat') plt.figure() plt.plot( BO_iter, BO_vals, label='Bayesian Optimisation', ) plt.plot(random_iter, random_vals, label='Random Sampling') plt.xlabel('Number of Function Evaluations') plt.ylabel('Best Feasible Objective Function Value') plt.legend() pylab.savefig(output_directory + "/BO_vs_random.png")
def get_iterator(self, path): dataset = [ { 'sentence': torch.from_numpy(data_point['sentence']), 'score': torch.from_numpy(data_point['score']), } for data_point in load_object(path) ] return DataLoader(dataset, batch_size=args.batch_size, num_workers=args.n_cpus)
def run_model1_combined(): # COMBINED # #num_training_steps = 100000 #nnet = nn.ElmanGoalNet(size_hidden=15, initialization=nn.UNIFORM, size_goal1=0, size_goal2=0, # size_observation=len(task.symbols), size_action=len(task.symbols), learning_rate=0.01, algorithm=nn.ADAM) #nnet.L2_regularization = 0.00001 #train_all(nnet, num_training_steps) #utils.save_object("cogloadtasknet", nnet) nnet = utils.load_object("cogloadtasknet") generate_rdm_all(nnet, name="cogloadtasknet")
def main(): # Load configuration config = Config() # Parse user_list representations user_list, user_ids = [], [] with open(config.rep_path, 'r') as data_file: lines = data_file.readlines() for line in lines: user_ = line.split(':')[1].replace('[', '').replace(']"}', '').split() id_ = line.split(':')[0].replace('{', '').replace('"', '') user = [float(u) for u in user_[1:len(user_)]] user_list.append(user) user_ids.append(id_) user_list = np.array(user_list) # If tsne is already run path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne') if os.path.isfile(path_user_tsne): user_tsne = load_object(path_user_tsne) else: # Run TSNE model = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) user_tsne = model.fit_transform(user_list) # Save TSNE objects print "Save user_tsne." save_object(user_tsne, 'save/user_tsne') # Run DBSCAN db = DBSCAN(eps=3, min_samples=50, algorithm='brute').fit(user_tsne) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) # Save clustering results save_object(user_ids, 'save/user_ids_db') save_object(labels, 'save/labels_db') # Drawing clustering unique_labels = set(labels) colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): if k == -1: continue class_member_mask = (labels == k) xy = user_tsne[class_member_mask & core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) xy = user_tsne[class_member_mask & ~core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3) plt.title('Estimated number of clusters: %d' % n_clusters_) plt.show()
def classify(text): driver = Processor() doc = Document(raw_text=text) driver.process_document(doc) driver.clf = utils.load_object('saved_classifier-367') class_label = driver.clf.classify(doc) subclass_label = driver.clf.subclassify(doc, class_label) labels = [class_label, subclass_label] print 'Labels: ' print labels return labels
def make_rdm_and_mds(name): model = utils.load_object(name) # eg 'noise_test2' hidden = get_model_hidden_activations(model) rdmatrix = analysis.rdm_spearman(hidden) labels = [] for goal in tce.action_list: for action in tce.action_list[goal]: labels.append(goal + '_' + action) analysis.plot_rdm(rdmatrix, labels, "Spearman rho matrix") mdsy = analysis.mds(hidden) analysis.plot_mds_points(mdsy, range(len(mdsy)), labels=labels)
def main(_): # Rebuild the graph def_graph = tf.Graph().as_default() auto_encoder = AutoEncoder(config) auto_encoder.build_encoder(config.feature_desc) # Create session sess = tf.Session() sess.run(tf.initialize_all_variables()) # Load the auto encoding model saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state('save') if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Analyse DBScan results on t-sne user_ids_db = np.array(load_object('save/user_ids_db')) labels_db = load_object('save/labels_db') user_ids1 = user_ids_db[(labels_db == 2)][0:30] user_ids2 = user_ids_db[(labels_db == 6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_db') save_object(cluster2, 'save/cluster2_db') # Analyse K-means results on reps user_ids_km = np.array(load_object('save/user_ids_km')) labels_km = load_object('save/labels_km') user_ids1 = user_ids_km[(labels_km == 2)][0:30] user_ids2 = user_ids_km[(labels_km == 6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_km') save_object(cluster2, 'save/cluster2_km')
def main(_): # Rebuild the graph def_graph = tf.Graph().as_default() auto_encoder = AutoEncoder(config) auto_encoder.build_encoder(config.feature_desc) # Create session sess = tf.Session() sess.run(tf.initialize_all_variables()) # Load the auto encoding model saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state('save') if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Analyse DBScan results on t-sne user_ids_db = np.array(load_object('save/user_ids_db')) labels_db = load_object('save/labels_db') user_ids1 = user_ids_db[(labels_db==2)][0:30] user_ids2 = user_ids_db[(labels_db==6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_db') save_object(cluster2, 'save/cluster2_db') # Analyse K-means results on reps user_ids_km = np.array(load_object('save/user_ids_km')) labels_km = load_object('save/labels_km') user_ids1 = user_ids_km[(labels_km==2)][0:30] user_ids2 = user_ids_km[(labels_km==6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_km') save_object(cluster2, 'save/cluster2_km')
def make_rdm_noisy(name, num_networks, noise, num_runs_per_network=10, title="-", save_files=True, skips=[], rdm_type=analysis.SPEARMAN): # Make one rdm for each network rdmatrices = [] for i in range(num_networks + len(skips)): if i in skips: continue model = utils.load_object(name, i) hiddens = [] for j in range(num_runs_per_network): hidden, _ = accuracy_test(model, name=str(i), noise=noise) for k, tensor in enumerate(hidden): hidden[k] = tensor.numpy().reshape(-1) hiddens.append(hidden) rdmatrix = analysis.rdm_noisy_mahalanobis(hiddens) rdmatrices.append(rdmatrix) # Now average over all matrices avg_matrix = None for matrix in rdmatrices: if avg_matrix is None: avg_matrix = matrix else: avg_matrix += matrix avg_matrix = avg_matrix / num_networks name = name + '_' + rdm_type np.savetxt(name + "_rdm_mat.txt", avg_matrix, delimiter="\t", fmt='%.2e') labels = [] for i, sequence in enumerate(pnas2018task.seqs): for action in sequence[1:]: labels.append(str(i) + '_' + action) analysis.plot_rdm(avg_matrix, labels, title + " spearman rho matrix") if save_files: plt.savefig(name + '_rdm') plt.clf() mdsy = analysis.mds(avg_matrix) for i, style in enumerate(['ro-', 'b|--', 'gx-.', 'k_:']): analysis.plot_mds_points(mdsy[6 * i:6 * i + 6], range(6), labels=labels[6 * i:6 * i + 6], style=style) plt.title(title) if save_files: plt.savefig(name + '_mds') plt.clf() return avg_matrix
def make_rdm_multiple_predictive(name, num_networks,title="-", save_files=True): # Make one rdm for each network optimal_list = [] rdmatrices = [] for i in range(num_networks): model = utils.load_object(name, i) hidden, optimal = accuracy_test_predictive(model, i) optimal_list.append(optimal) if optimal: # Turn into a list of simple vectors for i, tensor in enumerate(hidden): hidden[i] = tensor.numpy().reshape(-1) rdmatrix = analysis.rdm_spearman(hidden) rdmatrices.append(rdmatrix) print("{0} networks, of which {1} achieve optimal accuracy".format(num_networks, optimal_list.count(True))) # Now average over all matrices avg_matrix = None for matrix in rdmatrices: if avg_matrix is None: avg_matrix = matrix else: avg_matrix += matrix avg_matrix = avg_matrix / len(rdmatrices) # delete the unwanted rows and columns: #avg_matrix = np.delete(avg_matrix, [0, 6, 12], 0) #avg_matrix = np.delete(avg_matrix, [0, 6, 12], 1) nps = 6 # number of elements per sequence if save_files: np.savetxt(name+".csv", avg_matrix, delimiter=",") labels = [] for i, sequence in enumerate(pnas2018task.seqs): for action in sequence[0:-1]: labels.append(str(i)+'_'+action) analysis.plot_rdm(avg_matrix, labels, title + " spearman rho matrix") if save_files: plt.savefig(name+'_rdm') plt.clf() mdsy = analysis.mds(avg_matrix) for i, style in enumerate(['ro-', 'b|--', 'gx-.']): analysis.plot_mds_points(mdsy[nps * i:nps * i + nps], range(nps), labels=labels[nps * i:nps * i + nps], style=style) plt.title(title) if save_files: plt.savefig(name + '_mds') plt.clf() return avg_matrix
def from_settings(cls, settings, engine): """ Build middleware pipeline from settings """ mwlist = cls._get_mwlist_from_settings(settings) middlewares = [] for clspath in mwlist: try: mwcls = utils.load_object(clspath) if hasattr(mwcls, 'from_settings'): mw = mwcls.from_settings(settings, engine) else: mw = mwcls(engine) middlewares.append(mw) except NotConfigured, e: if e.args: clsname = clspath.split('.')[-1] log.msg('Disabled %s: %s' % (clsname, e.args[0]))
def run_model1_ari(): # ARI # num_training_steps = 10000 nnet = nn.ElmanGoalNet(size_hidden=15, initialization=nn.UNIFORM, size_goal1=0, size_goal2=0, size_observation=len(task.symbols), size_action=len(task.symbols), learning_rate=0.01, algorithm=nn.ADAM) nnet.L2_regularization = 0.00001 train_ari(nnet, num_training_steps) utils.save_object("cogloadtasknet_ari", nnet) nnet = utils.load_object("cogloadtasknet_ari") generate_rdm_ari(nnet, name="cogloadtasknet_ari")
def get_features(folder, dataset, descriptor): """Return texture features for a single dataset and descriptor. Parameters ---------- folder : string Full path of the folder where data are saved. dataset : texdata.TextureDataset Object that encapsulates data of a texture dataset. descriptor : hep.HEP Object that encapsulates data of a texture descriptor. Returns ------- X : array Texture features. The number of rows is equal to the number of samples and the number of columns is equal to the dimensionality of the feature space. If an error occurs within the call to `apply_descriptor`, returns None. """ multiscale_features = [] dataset_id = dataset.acronym for rad in descriptor.radius: descr_single = copy.deepcopy(descriptor) descr_single.radius = [rad] descr_single_id = descr_single.abbrev() feat_path = utils.filepath(folder, dataset_id, descr_single_id) if os.path.isfile(feat_path): X = utils.load_object(feat_path) else: print(f'Computing {dataset_id}--{descr_single_id}') if hasattr(descr_single, 'components'): X = concatenate_feats(folder, dataset, descr_single) else: X = apply_descriptor(dataset, descr_single) if X is not None: utils.save_object(X, feat_path) else: break multiscale_features.append(X) else: X = np.concatenate(multiscale_features, axis=-1) return X
def get_results(args): # -get param-stamp param_stamp = get_param_stamp_from_args(args) # -check whether already run, and if not do so if os.path.isfile("{}/dict-{}.pkl".format(args.r_dir, param_stamp)): print("{}: already run".format(param_stamp)) else: print("{}: ...running...".format(param_stamp)) main_cl.run(args) # -get average precisions fileName = '{}/prec-{}.txt'.format(args.r_dir, param_stamp) file = open(fileName) ave = float(file.readline()) file.close() # -results-dict dict = utils.load_object("{}/dict-{}".format(args.r_dir, param_stamp)) # -return tuple with the results return (dict, ave)
def __init__(self): imageproc_cls = utils.load_object(settings.IMAGE_PROCESSOR) self.imageproc = imageproc_cls.from_settings(settings, self) statsd.Connection.set_defaults(host=settings.STATSD_HOST, port=settings.STATSD_PORT, sample_rate=1)
def main(): # Load configuration config = Config() # Parse user_list representations user_list = [] user_id_list = [] with open(config.rep_path, 'r') as data_file: lines = data_file.readlines() for line in lines: user_ = line.split(':')[1].replace('[','').replace(']"}','').split() user = [float(u) for u in user_[1:len(user_)]] user_list.append(user) user_id_list.append(line.split(':')[0].replace('{','').replace('"','')) user_list = np.array(user_list) user_id_list = np.array(user_id_list) # If tsne is already run path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne') if os.path.isfile(path_user_tsne): user_tsne = load_object(path_user_tsne) else: # Run TSNE model = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) user_tsne = model.fit_transform(user_list) # Save TSNE objects print "Save user_tsne." save_object(user_tsne, 'save/user_tsne') # Run KMeans clustering kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10) km = kmeans.fit(user_list) # Get cluster labels labels = km.labels_ unique_labels = set(labels) # Save clustering results save_object(user_id_list, 'save/user_ids_km') save_object(labels, 'save/labels_km') # Save the cluster_to_user dict cluster_to_user = dict() for k in unique_labels: class_member_mask = (labels == k) class_k = user_id_list[class_member_mask] cluster_to_user[k] = class_k save_object(cluster_to_user, 'save/cluster_to_user') # Save the user_to_cluster dict user_to_cluster = dict() for user, label in zip(user_id_list, labels): user_to_cluster[user] = label save_object(user_to_cluster, 'save/user_to_cluster') # Plot results colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): class_member_mask = (labels == k) xy = user_tsne[class_member_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3) plt.title('KMeans Clustering') plt.show()
def get_netflix_details_from_title(title): netflix_dict = utils.load_object('netflix_dict') unog_json = utils.load_object('unog_details') if(title in netflix_dict): return netflix_dict[title]
def get_user_of_cluster(cluster_id): cluster_to_user = load_object("save/cluster_to_user") return cluster_to_user[cluster_id]
def past_count(): if os.path.isfile('unog_details.pkl'): return utils.load_object('unog_details')['COUNT'] else: return 0