def generate_rdm_all(nnet, name, rdm_type=analysis.SPEARMAN, save_files=True, title="RDM training combined", from_file=False, delete_blank_states=True, collapse_rdm=True): if not from_file: if rdm_type != analysis.SPEARMAN: raise Exception("not implemented") hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all( nnet) hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari( nnet, blanks=True) hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev( nnet, blanks=True) print("Both: {0}, {1}".format(accuracy_totals_both, accuracy_fullseqs_both)) print("Ari: {0}, {1}".format(accuracy_totals_ari, accuracy_fullseqs_ari)) print("Bev: {0}, {1}".format(accuracy_totals_bev, accuracy_fullseqs_bev)) hidden = utils.flatten_onelevel(hidden_bev) +\ utils.flatten_onelevel(hidden_ari) +\ utils.flatten_onelevel(hidden_both) rdmatrix = analysis.rdm_euclidian(hidden) utils.save_object(name + "rdmat", rdmatrix) else: rdmatrix = utils.load_object(name + "rdmat") return model2.process_matrix(rdmatrix, delete_blank_states)
def results(): if request.method == 'GET': data = utils.load_object("data.pkl") elif request.method == 'POST': if 'file' not in request.files: print('No file part') file = request.files['file'] if file.filename == '': print('No selected file') if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) raw_reviews_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) print(raw_reviews_path) data = main.main(raw_reviews_path) utils.save_object(data, "data.pkl") new_data = get_processed_data(data) return render_template('results.html', bar_chart=new_data['bar_chart'], pie_chart=new_data['pie_chart'], ratings=data[5], stars=new_data['stars'], table=zip(new_data['markup_sents'], new_data['sentiment_colors']), categories=Const.CATEGORIES, tuples=new_data['pretty_tuples'], )
def train_and_save(num_models, name, num_episodes): for i in range(num_models): model = nn.ElmanGoalNet() train_supervised_teacoffeeenv(model, num_episodes) utils.save_object(name, model) print('Trained and saved model #{0} of {1}\n'.format( i + 1, num_models))
def benchmark_cost_function(data_sizes): polygon = pickle.load(open('utils/good_path_for_rectangle.pickle', 'rb')) save_folder = generate_folder_name() iterations = 10 results = np.empty((len(data_sizes), iterations, 2), dtype=float) for i, dsize in enumerate(data_sizes): data = dg.generate_rectangle_set(dsize) print("\nRun {} with value {}".format(i+1, dsize)) # Compile functions and warm up GPU acoc.cost_function_gpu(data.T, polygon) for j in range(iterations): utils.print_on_current_line('Iteration {}/{}'.format(j, iterations)) start_cpu = time.clock() acoc.cost_function(data.T, polygon) end_cpu = time.clock() results[i][j][0] = end_cpu - start_cpu start_gpu = time.clock() acoc.cost_function_gpu(data.T, polygon) end_gpu = time.clock() results[i][j][2] = end_gpu - start_gpu mean_results = np.mean(results, axis=1).T acoc_plotter.plot_bar_chart_gpu_benchmark(mean_results, data_sizes, ['CPython', 'GPU'], save_folder, 'results') np.set_printoptions(precision=7, suppress=False) print("\nResults: \n{}".format(mean_results)) utils.save_object(mean_results, save_folder, 'results')
def build_netflix_details_dict(unog_json): # TODO: get details of countries available in unog_dict = {} for movie in unog_json['ITEMS']: unog_dict[movie[1]] = movie[0] utils.save_object(unog_dict, 'netflix_dict')
def create_cls(img_path,ae_path,out_path): img_frame=images.read_image_frame(img_path) n_cats=data.get_n_cats(img_frame) print(n_cats) cls=comp.create_extractor(n_cats,ae_path) #cls=nn.built_nn_cls(n_cats) cls=deep.learning_iter(img_frame,cls,n_epochs=1000) utils.save_object(cls.get_model(),out_path) return cls
def create_cls(in_path,ae_path,out_path): imgs=data.read_image_frame(in_path) X=imgs['Images'].tolist() y=imgs['Category'].tolist() n_cats=max(y)+1 cls=comp.create_extractor(n_cats,ae_path) deep.learning_iter_super(cls,X,y,n_epochs=1000) utils.save_object(cls.get_model(),out_path) return cls
def save_estimators(self, save_path=Const.CE_ROOT + 'model/ann/best_{}.model'): ann_sklearn_model_index = len(self.pipeline.steps) - 1 estimators = self.pipeline.steps[ann_sklearn_model_index][ 1].estimators_ for i, estimator in enumerate(estimators): estimator.model.save(save_path.format(i)) utils.save_object( self.pipeline.steps[ann_sklearn_model_index][1].label_binarizer_, save_path.format('labelbinarizer'))
def create_sda(in_path,out_path,cls_config_path): imgs=data.read_image_frame(in_path) X=imgs['Images'].tolist() y=imgs['Category'].tolist() n_cats=max(y)+1 hyper_params=tools.read_hyper_params(cls_config_path) hyper_params['n_out']=n_cats cls=sda.built_sda_cls(hyper_params) deep.learning_iter_super(cls,X,y,n_epochs=1000) utils.save_object(cls.model,out_path) return cls
def save_reduction(in_path,out_path,nn_path,csv=False): dataset=load_data(in_path,1) dataset=[inst for inst in dataset] autoencoder=AutoEncoderReduction(nn_path) projected=autoencoder.transform(dataset) print("Save to file") utils.save_object(out_path,projected) if(csv): print("Save to csv file") csv_path=out_path.replace(".obj",".csv") utils.to_csv_file(csv_path,projected)
def train_multiple(number, filename, from_file=False): # train the models if not from_file: for i in range(number): print(i) model, _ = train() accuracy_test(model) utils.save_object(filename, model) # make the rdms make_rdm_multiple(filename, number)
def create_time_series(conf,dim=0): action_path=conf['action'] cls_path=conf['cls_ts'] cls_config=conf['cls_config'] out_path=conf['series'] actions=data.read_actions(action_path) extractor=sda.read_sda(cls_path,cls_config) all_t_series=[make_action_ts(extractor,action,dim) for action in actions] utils.make_dir(out_path) for action_ts in all_t_series: full_path=out_path+action_ts.name utils.save_object(action_ts,full_path)
def generate_rdm_all_gradient(nnet, name, blanks, rdm_type=analysis.SPEARMAN, save_files=True, title="RDM training combined", from_file=False, delete_blank_states=True): if not from_file: if rdm_type != analysis.SPEARMAN: raise Exception("not implemented") hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all( nnet) hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari( nnet, blanks) hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev( nnet, blanks) print("Both: {0}, {1}".format(accuracy_totals_both, accuracy_fullseqs_both)) print("Ari: {0}, {1}".format(accuracy_totals_ari, accuracy_fullseqs_ari)) print("Bev: {0}, {1}".format(accuracy_totals_bev, accuracy_fullseqs_bev)) hidden = utils.flatten_onelevel(hidden_bev) +\ utils.flatten_onelevel(hidden_ari) +\ utils.flatten_onelevel(hidden_both) hidden_left = [] hidden_right = [] for vector in hidden: hidden_left.append(vector[:len(vector) // 2]) hidden_right.append(vector[len(vector) // 2:]) # Now cut the hidden layer in two. rdmatrix_left = analysis.rdm_spearman(hidden_left) rdmatrix_right = analysis.rdm_spearman(hidden_right) # save the massive rdm for debug purposes (so that I don't have to generate it all over again everytime). utils.save_object(name + "rdmatright", rdmatrix_right) utils.save_object(name + "rdmatleft", rdmatrix_left) else: rdmatrix_left = utils.load_object(name + "rdmatleft") rdmatrix_right = utils.load_object(name + "rdmatright") rdmatrix_left, labels = model2.process_matrix(rdmatrix_left, delete_blank_states) rdmatrix_right, _ = model2.process_matrix(rdmatrix_right, delete_blank_states) return rdmatrix_left, rdmatrix_right, labels
def get_unog_json(): s = requests.session() r = s.get(base_url) php_session_cookie = r.cookies['PHPSESSID'] # Collect the total movies available "COUNT" r = s.get(unog_url.format(php_session_cookie=php_session_cookie, count=1), headers = headers) count = r.json()['COUNT'] if (int(count) > past_count()): # re-fire request to get all available movies r = s.get(unog_url.format(php_session_cookie=php_session_cookie, count=count), headers=headers) utils.save_object(r.json(), 'unog_details') build_netflix_details_dict(r.json())
def train_model(dataframe,feature_names): X = dataframe y= dataframe['y'].values.reshape(-1,) X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=11) grid_search_parameters = {'max_features':[15,20]} final_clf = grid_search_random_forest(X_train[feature_names].copy(),y_train,grid_search_parameters,cv=5,n_jobs=-1) save_object(final_clf,directory_config['root_dir'],directory_config['model_object_dir'],directory_config['model_object_name']) train_with_pred,test_with_pred = get_datasets_with_predictions(final_clf,X_train.copy(),X_test.copy(),feature_names) train_with_pred.to_csv(directory_config['root_dir'] + directory_config['train_predictions_dir'] + directory_config['model_object_name'] + '_train_predictions' + '.csv',index=False) test_with_pred.to_csv(directory_config['root_dir'] + directory_config['test_predictions_dir'] + directory_config['model_object_name'] + '_test_predictions' + '.csv',index=False) print("Evaluation on train") model_evaluation(train_with_pred) print("Evaluation on test") model_evaluation(test_with_pred) return final_clf,train_with_pred,test_with_pred
def save(self, file_name, artifact): if isinstance(artifact, NeuralNetwork): base = os.path.join(self.__root_dir, self.__models_dir) save_object(os.path.join(base, file_name + '.cfg'), (artifact.get_input_metadata(), artifact.get_output_metadata())) artifact.get_model().save_weights( os.path.join(base, file_name + '.wt')) elif isinstance(artifact, Experiment): basedir = os.path.join(self.__root_dir, self.__experiments_dir) save_object(os.path.join(basedir, file_name + '.exp'), artifact) else: raise Exception('Attempt to save unsupported artifact.')
def run_model1_ari(): # ARI # num_training_steps = 10000 nnet = nn.ElmanGoalNet(size_hidden=15, initialization=nn.UNIFORM, size_goal1=0, size_goal2=0, size_observation=len(task.symbols), size_action=len(task.symbols), learning_rate=0.01, algorithm=nn.ADAM) nnet.L2_regularization = 0.00001 train_ari(nnet, num_training_steps) utils.save_object("cogloadtasknet_ari", nnet) nnet = utils.load_object("cogloadtasknet_ari") generate_rdm_ari(nnet, name="cogloadtasknet_ari")
def save_model(self, model_root, model_name, loss, params): path = os.path.join(model_root, model_name) utils.mkdir(path) torch.save(self.state_dict(), os.path.join(path, model_name)) utils.save_object(self.vocab_x, os.path.join(path, model_name + "_vocab_x")) utils.save_object(self.vocab_y, os.path.join(path, model_name + "_vocab_y")) utils.save_object(loss, os.path.join(path, model_name + "_loss")) utils.save_object(params, os.path.join(path, model_name + "_params"))
def get_features(folder, dataset, descriptor): """Return texture features for a single dataset and descriptor. Parameters ---------- folder : string Full path of the folder where data are saved. dataset : texdata.TextureDataset Object that encapsulates data of a texture dataset. descriptor : hep.HEP Object that encapsulates data of a texture descriptor. Returns ------- X : array Texture features. The number of rows is equal to the number of samples and the number of columns is equal to the dimensionality of the feature space. If an error occurs within the call to `apply_descriptor`, returns None. """ multiscale_features = [] dataset_id = dataset.acronym for rad in descriptor.radius: descr_single = copy.deepcopy(descriptor) descr_single.radius = [rad] descr_single_id = descr_single.abbrev() feat_path = utils.filepath(folder, dataset_id, descr_single_id) if os.path.isfile(feat_path): X = utils.load_object(feat_path) else: print(f'Computing {dataset_id}--{descr_single_id}') if hasattr(descr_single, 'components'): X = concatenate_feats(folder, dataset, descr_single) else: X = apply_descriptor(dataset, descr_single) if X is not None: utils.save_object(X, feat_path) else: break multiscale_features.append(X) else: X = np.concatenate(multiscale_features, axis=-1) return X
def __init__(self, nodes_file, edges_file, org_id): self.nodes_file = nodes_file self.edges_file = edges_file self.org_id = org_id self.file_name = 'organism-{}.bak'.format(org_id) node_data = utils.load_json(nodes_file) self.id_to_node = {ind: node for ind, node in enumerate(node_data)} self.node_to_id = {node: ind for ind, node in enumerate(node_data)} edge_data = utils.load_json(edges_file) # dimensions of Incidence Matrix self.node_count = len(node_data) # self.edge_count = len(edge_data) self.dimensions = (len(node_data), len(edge_data)) # # incidence matrix would be too big -> ignored # self.incidence = np.zeros(dimensions) self.edges = set() self.adjacency = np.zeros((self.node_count, self.node_count)) for index, edge in enumerate(edge_data): n1 = self.node_to_id[edge[0]] n2 = self.node_to_id[edge[1]] self.edges.add((min(n1, n2), max(n1, n2))) # ignore edge weights self.adjacency[n1][n2] = 1 self.adjacency[n2][n1] = 1 self.degree = sum(self.adjacency) # # P = D^-1 * A # self.transition = self.adjacency / self.degree message = ('{} - Organism imported successfully').format(org_id) utils.print_log(message) message = ('{} - number of nodes and edges = {}').format( org_id, self.dimensions) utils.print_log(message) utils.save_object(self, self.file_name)
def save_subset(data_train, targets_train, data_test, targets_test, baseline_targets, config): # Now we want to save all the data if not os.path.exists(config['save_folder']): os.makedirs(config['save_folder']) # Now we inmediately divide the data into subsets nmbr_columns = len(config['columns']) total_columns = len(data_train[0].iloc[0]) # Some values that we need to determine until where the columns go nmbr_ma = nmbr_columns * 3 nmbr_tsfp = nmbr_columns * 787 # note that this only works if you use the comprehensive tsfresh pack subset_names = ['m', 'pr', 'pr_su', 'pr_su_bf', 'pr_su_bf_ma'] # 'pr_su_bf_ma_tsfp' , 'pr_su_bf_ma_tsfp_tsfd' subset_indices = [(0, 1), (0, 4), (0, 20), (0, 43), (0, 43 + nmbr_ma)] # (0, 43 + nmbr_ma + nmbr_tsfp), (0, total_columns) for name, indices in zip(subset_names, subset_indices): subset = [] for i in range(len(data_train)): subset.append(data_train[i].iloc[:, indices[0]: indices[1]]) subset_test = [] for j in range(len(data_test)): subset_test.append(data_test[j].iloc[:, indices[0]: indices[1]]) save_data(subset, targets_train, config['save_folder'] + '/subdata_' + name + '.pkl') save_data(subset_test, targets_test, config['save_folder'] + '/subdata_' + name + '_test.pkl') save_pandas(subset, targets_train, config['save_folder'] + '/subdata_' + name + '.csv') save_data(data_train, targets_train, filename=config['save_folder'] + '/processed_data_basic_train.pkl') save_data(data_test, targets_test, filename=config['save_folder'] + '/processed_data_basic_test.pkl') # Saves the data to a pandas file before saving it as a pickle object in a different format if config['save_panda']: save_pandas(data_train, targets_train, filename=config['save_folder'] + '/processed_data_pandas.csv') # Now we do the same for the baseline targets targets_train, targets_test = split_test(None, baseline_targets, seed=config['seed'], split=config['test_split']) save_object(targets_train, filename=config['save_folder'] + '/baseline_targets_train.pkl') save_object(targets_test, filename=config['save_folder'] + '/baseline_targets_test.pkl')
def get_user_behave(): # The user_to_behave dict user_to_behave = dict() # Read data chunk by chunk with open(config.data_path, 'r') as data_file: while True: # Get one batch at a time exit = True lines_gen = islice(data_file, config.batch_size) for line in lines_gen: user_behave = json.loads(line).values()[0] # Remove those users who have less than num_min_event if (len(user_behave) <= config.num_min_event): continue if (len(user_behave) < config.num_event + 1): zero_pad = [[0 for _ in range(len(user_behave[0]))] for _ in range(config.num_event - len(user_behave) + 1)] user_behave = user_behave + zero_pad elif (len(user_behave) > config.num_event + 1): user_behave = user_behave[(len(user_behave) - config.num_event - 1):len(user_behave)] # Add to dict user_id = json.loads(line).keys()[0] user_to_behave[user_id] = np.array( user_behave)[:, list([1, 2, 3, 7, 8, 9, 10, 11, 12, 13])] exit = False # Batch is empty then exit if exit: break # Save user_to_behave object save_object( user_to_behave, os.path.join(os.path.dirname(config.save_path), 'user_to_behave.pkl')) return user_to_behave
def concatenate_feats(data_folder, dataset, descriptor): """Compute features through concatenation of texture models. Parameters ---------- data_folder : str Full path of the folder where data are saved. dataset : texdata.TextureDataset Object that encapsulates data of a texture dataset. descriptor : hep.HEP Object that encapsulates data of a texture descriptor. Returns ------- X : array Computed features. The number of rows is equal to the number of samples and the number of columns is equal to the sum of the dimensionalities of the concatenated texture models. If an error occurs in the call to `apply_descriptor`, it returns `None`. """ dat_id = dataset.acronym params = {k: v for k, v in descriptor.__dict__.items()} feats = [] for component in descriptor.components: descr = component(**params) descr_id = descr.abbrev() feat_path = utils.filepath(data_folder, dat_id, descr_id) if os.path.isfile(feat_path): X = utils.load_object(feat_path) else: X = apply_descriptor(dataset, descr) if X is not None: utils.save_object(X, feat_path) else: break feats.append(X) else: X = np.concatenate(feats, axis=-1) return X
def build_modelling_dataset(dataset, mode, total_features): vectorizer = build_vectorizer(mode, total_features) output = vectorizer.fit_transform(dataset['ITM_KEY'].tolist()) save_object(vectorizer, directory_config['root_dir'], directory_config['transformer_dir'], directory_config['transformer_object_name']) output_array = output.toarray() output_df = pd.DataFrame(output_array) feature_names = vectorizer.get_feature_names() output_df.columns = feature_names save_object(feature_names, directory_config['root_dir'], directory_config['features_list_dir'], directory_config['features_list_object_name']) modelling_dataset = pd.concat([output_df, dataset], axis=1) modelling_dataset.to_csv( directory_config['root_dir'] + directory_config['modelling_data_dir'] + directory_config['filename'] + '_' + directory_config['features_list_object_name'] + '.csv', index=False) return modelling_dataset, feature_names
def extract_features(data_folder, imgs_folder, args): """"Compute texture features. Check whether features have been already computed. If they haven't, extract features from each dataset using each descriptor in `args` and save them to disk. If the descriptor is multi-scale, a separate file is created for each single value of the radius. Parameters ---------- data_folder : string Full path of the folder where data are saved. imgs_folder : string Full path of the folder where texture datasets are stored. args : argparse.Namespace Command line arguments. """ utils.boxed_text('Extracting features...', symbol='*') for dat in gen_datasets(imgs_folder, args.dataset): dat_id = dat.acronym for descr in gen_descriptors(args): for rad in descr.radius: descr_rad = copy.deepcopy(descr) descr_rad.radius = [rad] descr_rad_id = descr_rad.abbrev() feat_path = utils.filepath(data_folder, dat_id, descr_rad_id) if os.path.isfile(feat_path): print(f'Found {dat_id}--{descr_rad_id}', flush=True) else: print(f'Computing {dat_id}--{descr_rad_id}', flush=True) if hasattr(descr_rad, 'components'): X = concatenate_feats(data_folder, dat, descr_rad) else: X = apply_descriptor(dat, descr_rad) if X is not None: utils.save_object(X, feat_path) del X
def train_model(algorithm, feats_pct, lang, output, profile_name): """ Prepares arguments to train and saves a NodeClassif object Arguments: ---------- algorithm: type: string info: name of the algorithm to train feats_pct: type: int info: percentage of features to keep lang: type: string info: language to perform the tokenizer process output: type: string info: output file name including extension profile_name: type: string info: name of the JSON training profile file """ if (feats_pct < 0) or (feats_pct > 100): exit('The specified features percentage is invalid') profile_data = read_json(file_name=profile_name, file_type='profile_t') node_classif = NodeClassif( algorithm=algorithm.lower(), feats_pct=feats_pct, lang=lang, ) node_classif.train(profile_data) save_object(node_classif, output, 'model')
def get_user_behave(): # The user_to_behave dict user_to_behave = dict() # Read data chunk by chunk with open(config.data_path, 'r') as data_file: while True: # Get one batch at a time exit = True lines_gen = islice(data_file, config.batch_size) for line in lines_gen: user_behave = json.loads(line).values()[0] # Remove those users who have less than num_min_event if (len(user_behave) <= config.num_min_event): continue if (len(user_behave) < config.num_event + 1): zero_pad = [[0 for _ in range(len(user_behave[0]))] for _ in range(config.num_event-len(user_behave)+1)] user_behave = user_behave + zero_pad elif (len(user_behave) > config.num_event + 1): user_behave = user_behave[(len(user_behave)-config.num_event-1) : len(user_behave)] # Add to dict user_id = json.loads(line).keys()[0] user_to_behave[user_id] = np.array(user_behave)[:, list([1,2,3,7,8,9,10,11,12,13])] exit = False # Batch is empty then exit if exit: break # Save user_to_behave object save_object(user_to_behave, os.path.join(os.path.dirname(config.save_path), 'user_to_behave.pkl')) return user_to_behave
def benchmark_cost_function(data_sizes): polygon = pickle.load(open('utils/good_path_for_rectangle.pickle', 'rb')) save_folder = generate_folder_name() iterations = 10 results = np.empty((len(data_sizes), iterations, 2), dtype=float) for i, dsize in enumerate(data_sizes): data = dg.generate_rectangle_set(dsize) print("\nRun {} with value {}".format(i + 1, dsize)) # Compile functions and warm up GPU acoc.cost_function_gpu(data.T, polygon) for j in range(iterations): utils.print_on_current_line('Iteration {}/{}'.format( j, iterations)) start_cpu = time.clock() acoc.cost_function(data.T, polygon) end_cpu = time.clock() results[i][j][0] = end_cpu - start_cpu start_gpu = time.clock() acoc.cost_function_gpu(data.T, polygon) end_gpu = time.clock() results[i][j][2] = end_gpu - start_gpu mean_results = np.mean(results, axis=1).T acoc_plotter.plot_bar_chart_gpu_benchmark(mean_results, data_sizes, ['CPython', 'GPU'], save_folder, 'results') np.set_printoptions(precision=7, suppress=False) print("\nResults: \n{}".format(mean_results)) utils.save_object(mean_results, save_folder, 'results')
def main(): # Load configuration config = Config() # Parse user_list representations user_list, user_ids = [], [] with open(config.rep_path, 'r') as data_file: lines = data_file.readlines() for line in lines: user_ = line.split(':')[1].replace('[', '').replace(']"}', '').split() id_ = line.split(':')[0].replace('{', '').replace('"', '') user = [float(u) for u in user_[1:len(user_)]] user_list.append(user) user_ids.append(id_) user_list = np.array(user_list) # If tsne is already run path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne') if os.path.isfile(path_user_tsne): user_tsne = load_object(path_user_tsne) else: # Run TSNE model = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) user_tsne = model.fit_transform(user_list) # Save TSNE objects print "Save user_tsne." save_object(user_tsne, 'save/user_tsne') # Run DBSCAN db = DBSCAN(eps=3, min_samples=50, algorithm='brute').fit(user_tsne) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) # Save clustering results save_object(user_ids, 'save/user_ids_db') save_object(labels, 'save/labels_db') # Drawing clustering unique_labels = set(labels) colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): if k == -1: continue class_member_mask = (labels == k) xy = user_tsne[class_member_mask & core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) xy = user_tsne[class_member_mask & ~core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3) plt.title('Estimated number of clusters: %d' % n_clusters_) plt.show()
def main(_): # Rebuild the graph def_graph = tf.Graph().as_default() auto_encoder = AutoEncoder(config) auto_encoder.build_encoder(config.feature_desc) # Create session sess = tf.Session() sess.run(tf.initialize_all_variables()) # Load the auto encoding model saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state('save') if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Analyse DBScan results on t-sne user_ids_db = np.array(load_object('save/user_ids_db')) labels_db = load_object('save/labels_db') user_ids1 = user_ids_db[(labels_db==2)][0:30] user_ids2 = user_ids_db[(labels_db==6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_db') save_object(cluster2, 'save/cluster2_db') # Analyse K-means results on reps user_ids_km = np.array(load_object('save/user_ids_km')) labels_km = load_object('save/labels_km') user_ids1 = user_ids_km[(labels_km==2)][0:30] user_ids2 = user_ids_km[(labels_km==6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_km') save_object(cluster2, 'save/cluster2_km')
def main(_): # Rebuild the graph def_graph = tf.Graph().as_default() auto_encoder = AutoEncoder(config) auto_encoder.build_encoder(config.feature_desc) # Create session sess = tf.Session() sess.run(tf.initialize_all_variables()) # Load the auto encoding model saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state('save') if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Analyse DBScan results on t-sne user_ids_db = np.array(load_object('save/user_ids_db')) labels_db = load_object('save/labels_db') user_ids1 = user_ids_db[(labels_db == 2)][0:30] user_ids2 = user_ids_db[(labels_db == 6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_db') save_object(cluster2, 'save/cluster2_db') # Analyse K-means results on reps user_ids_km = np.array(load_object('save/user_ids_km')) labels_km = load_object('save/labels_km') user_ids1 = user_ids_km[(labels_km == 2)][0:30] user_ids2 = user_ids_km[(labels_km == 6)][0:30] cluster1 = cluster_feature_analysis(sess, user_ids1) cluster2 = cluster_feature_analysis(sess, user_ids2) save_object(cluster1, 'save/cluster1_km') save_object(cluster2, 'save/cluster2_km')
def run(args): # Set default arguments & check for incompatible options args.lr_gen = args.lr if args.lr_gen is None else args.lr_gen args.g_iters = args.iters if args.g_iters is None else args.g_iters args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni # -if [log_per_task], reset all logs if args.log_per_task: args.prec_log = args.iters args.loss_log = args.iters args.sample_log = args.iters # -if [iCaRL] is selected, select all accompanying options if hasattr(args, "icarl") and args.icarl: args.use_exemplars = True args.add_exemplars = True args.bce = True args.bce_distill = True # -if XdG is selected but not the Task-IL scenario, give error if (not args.scenario == "task") and args.xdg: raise ValueError("'XdG' is only compatible with the Task-IL scenario.") # -if EWC, SI or XdG is selected together with 'feedback', give error if args.feedback and (args.ewc or args.si or args.xdg or args.icarl): raise NotImplementedError( "EWC, SI, XdG and iCaRL are not supported with feedback connections." ) # -if binary classification loss is selected together with 'feedback', give error if args.feedback and args.bce: raise NotImplementedError( "Binary classification loss not supported with feedback connections." ) # -if XdG is selected together with both replay and EWC, give error (either one of them alone with XdG is fine) if args.xdg and (not args.replay == "none") and (args.ewc or args.si): raise NotImplementedError( "XdG is not supported with both '{}' replay and EWC / SI.".format( args.replay)) #--> problem is that applying different task-masks interferes with gradient calculation # (should be possible to overcome by calculating backward step on EWC/SI-loss also for each mask separately) # -if 'BCEdistill' is selected for other than scenario=="class", give error if args.bce_distill and not args.scenario == "class": raise ValueError( "BCE-distill can only be used for class-incremental learning.") # -create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) scenario = args.scenario # If Task-IL scenario is chosen with single-headed output layer, set args.scenario to "domain" # (but note that when XdG is used, task-identity information is being used so the actual scenario is still Task-IL) if args.singlehead and args.scenario == "task": scenario = "domain" # If only want param-stamp, get it printed to screen and exit if hasattr(args, "get_stamp") and args.get_stamp: _ = get_param_stamp_from_args(args=args) exit() # Use cuda? cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") # Set random seeds np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) #-------------------------------------------------------------------------------------------------# #----------------# #----- DATA -----# #----------------# # Prepare data for chosen experiment (train_datasets, test_datasets), config, classes_per_task = get_multitask_experiment( name=args.experiment, scenario=scenario, tasks=args.tasks, data_dir=args.d_dir, verbose=True, exception=True if args.seed == 0 else False, ) #print(train_datasets, test_datasets) #a = input() #-------------------------------------------------------------------------------------------------# #------------------------------# #----- MODEL (CLASSIFIER) -----# #------------------------------# # Define main model (i.e., classifier, if requested with feedback connections) if args.feedback: model = AutoEncoder( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, z_dim=args.z_dim, fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) model.lamda_pl = 1. #--> to make that this VAE is also trained to classify else: model = Classifier( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, fc_drop=args.fc_drop, fc_nl=args.fc_nl, fc_bn=True if args.fc_bn == "yes" else False, excit_buffer=True if args.xdg and args.gating_prop > 0 else False, binaryCE=args.bce, binaryCE_distill=args.bce_distill, ).to(device) # Define optimizer (only include parameters that "requires_grad") model.optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }] model.optim_type = args.optimizer if model.optim_type in ("adam", "adam_reset"): model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) elif model.optim_type == "sgd": model.optimizer = optim.SGD(model.optim_list) else: raise ValueError( "Unrecognized optimizer, '{}' is not currently a valid option". format(args.optimizer)) #-------------------------------------------------------------------------------------------------# #----------------------------------# #----- CL-STRATEGY: EXEMPLARS -----# #----------------------------------# # Store in model whether, how many and in what way to store exemplars if isinstance(model, ExemplarHandler) and (args.use_exemplars or args.add_exemplars or args.replay == "exemplars"): model.memory_budget = args.budget model.norm_exemplars = args.norm_exemplars model.herding = args.herding #-------------------------------------------------------------------------------------------------# #-----------------------------------# #----- CL-STRATEGY: ALLOCATION -----# #-----------------------------------# # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 if args.ewc: model.fisher_n = args.fisher_n model.gamma = args.gamma model.online = args.online model.emp_FI = args.emp_fi # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner): model.si_c = args.si_c if args.si else 0 if args.si: model.epsilon = args.epsilon # XdG: create for every task a "mask" for each hidden fully connected layer if isinstance(model, ContinualLearner) and (args.xdg and args.gating_prop > 0): mask_dict = {} excit_buffer_list = [] for task_id in range(args.tasks): mask_dict[task_id + 1] = {} for i in range(model.fcE.layers): layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear if task_id == 0: excit_buffer_list.append(layer.excit_buffer) n_units = len(layer.excit_buffer) gated_units = np.random.choice(n_units, size=int(args.gating_prop * n_units), replace=False) mask_dict[task_id + 1][i] = gated_units model.mask_dict = mask_dict model.excit_buffer_list = excit_buffer_list #-------------------------------------------------------------------------------------------------# #-------------------------------# #----- CL-STRATEGY: REPLAY -----# #-------------------------------# # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature) if isinstance(model, Replayer): model.replay_targets = "soft" if args.distill else "hard" model.KD_temp = args.temp # If needed, specify separate model for the generator train_gen = True if (args.replay == "generative" and not args.feedback) else False if train_gen: # -specify architecture generator = AutoEncoder( image_size=config['size'], image_channels=config['channels'], fc_layers=args.g_fc_lay, fc_units=args.g_fc_uni, z_dim=args.g_z_dim, classes=config['classes'], fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) # -set optimizer(s) generator.optim_list = [{ 'params': filter(lambda p: p.requires_grad, generator.parameters()), 'lr': args.lr_gen }] generator.optim_type = args.optimizer if generator.optim_type in ("adam", "adam_reset"): generator.optimizer = optim.Adam(generator.optim_list, betas=(0.9, 0.999)) elif generator.optim_type == "sgd": generator.optimizer = optim.SGD(generator.optim_list) else: generator = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- REPORTING -----# #---------------------# # Get parameter-stamp (and print on screen) param_stamp = get_param_stamp( args, model.name, verbose=True, replay=True if (not args.replay == "none") else False, replay_model_name=generator.name if (args.replay == "generative" and not args.feedback) else None, ) # Print some model-characteristics on the screen # -main model print("\n") utils.print_model_info(model, title="MAIN MODEL") # -generator if generator is not None: utils.print_model_info(generator, title="GENERATOR") # Prepare for plotting in visdom # -define [precision_dict] to keep track of performance during training for storing and for later plotting in pdf precision_dict = evaluate.initiate_precision_dict(args.tasks) precision_dict_exemplars = evaluate.initiate_precision_dict( args.tasks) if args.use_exemplars else None # -visdom-settings if args.visdom: env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment, tasks=args.tasks, scenario=args.scenario) graph_name = "{fb}{replay}{syn}{ewc}{xdg}{icarl}{bud}".format( fb="1M-" if args.feedback else "", replay="{}{}".format(args.replay, "D" if args.distill else ""), syn="-si{}".format(args.si_c) if args.si else "", ewc="-ewc{}{}".format( args.ewc_lambda, "-O{}".format(args.gamma) if args.online else "") if args.ewc else "", xdg="" if (not args.xdg) or args.gating_prop == 0 else "-XdG{}".format(args.gating_prop), icarl="-iCaRL" if (args.use_exemplars and args.add_exemplars and args.bce and args.bce_distill) else "", bud="-bud{}".format(args.budget) if (args.use_exemplars or args.add_exemplars or args.replay == "exemplars") else "", ) visdom = {'env': env_name, 'graph': graph_name} if args.use_exemplars: visdom_exemplars = { 'env': env_name, 'graph': "{}-EX".format(graph_name) } else: visdom = visdom_exemplars = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- CALLBACKS -----# #---------------------# # Callbacks for reporting on and visualizing loss generator_loss_cbs = [ cb._VAE_loss_cb( log=args.loss_log, visdom=visdom, model=model if args.feedback else generator, tasks=args.tasks, iters_per_task=args.iters if args.feedback else args.g_iters, replay=False if args.replay == "none" else True) ] if (train_gen or args.feedback) else [None] solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, visdom=visdom, model=model, tasks=args.tasks, iters_per_task=args.iters, replay=False if args.replay == "none" else True) ] if (not args.feedback) else [None] # Callbacks for evaluating and plotting generated / reconstructed samples sample_cbs = [ cb._sample_cb( log=args.sample_log, visdom=visdom, config=config, test_datasets=test_datasets, sample_size=args.sample_n, iters_per_task=args.iters if args.feedback else args.g_iters) ] if (train_gen or args.feedback) else [None] # Callbacks for reporting and visualizing accuracy # -visdom (i.e., after each [prec_log] eval_cb = cb._eval_cb( log=args.prec_log, test_datasets=test_datasets, visdom=visdom, precision_dict=None, iters_per_task=args.iters, test_size=args.prec_n, classes_per_task=classes_per_task, scenario=scenario, ) # -pdf / reporting: summary plots (i.e, only after each task) eval_cb_full = cb._eval_cb( log=args.iters, test_datasets=test_datasets, precision_dict=precision_dict, iters_per_task=args.iters, classes_per_task=classes_per_task, scenario=scenario, ) # -with exemplars (both for visdom & reporting / pdf) eval_cb_exemplars = cb._eval_cb( log=args.iters, test_datasets=test_datasets, visdom=visdom_exemplars, classes_per_task=classes_per_task, precision_dict=precision_dict_exemplars, scenario=scenario, iters_per_task=args.iters, with_exemplars=True, ) if args.use_exemplars else None # -collect them in <lists> eval_cbs = [eval_cb, eval_cb_full] eval_cbs_exemplars = [eval_cb_exemplars] #-------------------------------------------------------------------------------------------------# #--------------------# #----- TRAINING -----# #--------------------# print("--> Training:" + args.name) print("Total tasks:" + str(args.tasks_to_complete)) # Keep track of training-time start = time.time() # Train model train_cl( args.tasks_to_complete, args.name, model, train_datasets, test_datasets, replay_mode=args.replay, scenario=scenario, classes_per_task=classes_per_task, iters=args.iters, batch_size=args.batch, generator=generator, gen_iters=args.g_iters, gen_loss_cbs=generator_loss_cbs, sample_cbs=sample_cbs, eval_cbs=eval_cbs, loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs, eval_cbs_exemplars=eval_cbs_exemplars, use_exemplars=args.use_exemplars, add_exemplars=args.add_exemplars, ) # Get total training-time in seconds, and write to file training_time = time.time() - start time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w') time_file.write('{}\n'.format(training_time)) time_file.close() #-------------------------------------------------------------------------------------------------# #----------------------# #----- EVALUATION -----# #----------------------# print("\n\n--> Evaluation ({}-incremental learning scenario):".format( args.scenario)) # Evaluate precision of final model on full test-set precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if scenario == "task" else None) for i in range(args.tasks) ] print("\n Precision on test-set (softmax classification):") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / args.tasks print('=> average precision over all {} tasks: {:.4f}'.format( args.tasks, average_precs)) # -with exemplars if args.use_exemplars: precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=True, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if scenario == "task" else None) for i in range(args.tasks) ] print("\n Precision on test-set (classification using exemplars):") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs_ex = sum(precs) / args.tasks print('=> average precision over all {} tasks: {:.4f}'.format( args.tasks, average_precs_ex)) print("\n") #-------------------------------------------------------------------------------------------------# #------------------# #----- OUTPUT -----# #------------------# # Average precision on full test set output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format( average_precs_ex if args.use_exemplars else average_precs)) output_file.close() # -precision-dict file_name = "{}/dict-{}".format(args.r_dir, param_stamp) utils.save_object( precision_dict_exemplars if args.use_exemplars else precision_dict, file_name) # Average precision on full test set not evaluated using exemplars (i.e., using softmax on final layer) if args.use_exemplars: output_file = open( "{}/prec_noex-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(average_precs)) output_file.close() # -precision-dict: file_name = "{}/dict_noex-{}".format(args.r_dir, param_stamp) utils.save_object(precision_dict, file_name) #-------------------------------------------------------------------------------------------------# #--------------------# #----- PLOTTING -----# #--------------------# # If requested, generate pdf if args.pdf: # -open pdf pp = visual_plt.open_pdf("{}/{}.pdf".format(args.p_dir, param_stamp)) # -show samples and reconstructions (either from main model or from separate generator) if args.feedback or args.replay == "generative": evaluate.show_samples(model if args.feedback else generator, config, size=args.sample_n, pdf=pp) for i in range(args.tasks): evaluate.show_reconstruction( model if args.feedback else generator, test_datasets[i], config, pdf=pp, task=i + 1) # -show metrics reflecting progression during training figure_list = [] #-> create list to store all figures to be plotted # -generate all figures (and store them in [figure_list]) figure = visual_plt.plot_lines( precision_dict["all_tasks"], x_axes=precision_dict["x_task"], line_names=['task {}'.format(i + 1) for i in range(args.tasks)]) figure_list.append(figure) figure = visual_plt.plot_lines([precision_dict["average"]], x_axes=precision_dict["x_task"], line_names=['average all tasks so far']) figure_list.append(figure) if args.use_exemplars: figure = visual_plt.plot_lines( precision_dict_exemplars["all_tasks"], x_axes=precision_dict_exemplars["x_task"], line_names=[ 'task {}'.format(i + 1) for i in range(args.tasks) ]) figure_list.append(figure) # -add figures to pdf (and close this pdf). for figure in figure_list: pp.savefig(figure) # -close pdf pp.close()
def save_composite(cls,out_path): utils.save_object(cls.get_model(),out_path)
user_email = input("Please enter the user email you want to delete: ") users_hids = [] for participant in participants.values(): if participant.hashed_email == encrypt( user_email) and participant.unsubscribe == False: users_hids.append(participant.hashed_subject_id) string_hids = f"\n".join([x.decode("utf-8") for x in users_hids]) print( f'Found {len(users_hids)} ids subcribed with that email here is the list:\n{string_hids}' ) user_to_unsubscribe = input( "Type the item number of appearance corresponding to the sID /!\ start with 0, or type any letter to do thing: " ) if user_to_unsubscribe.isdigit(): indice = int(user_to_unsubscribe) participants[users_hids[indice]].unsubscribe = True participants[users_hids[indice]].unsubscribe_dt = datetime.datetime.now( utc) print( f"User {users_hids[indice].decode('utf-8')} has been unsubscribed, once emailing is active, the user will receive an automatic unsubscription email" ) else: print("Nothing done") save_object(participants, DATA_PATH)
def create_autoencoder(in_path,out_path,dim=0): actions=data.read_actions(action_path) imgs=data.get_projections(dim,actions) cls=ae.built_ae_cls() deep.learning_iter_unsuper(cls,imgs,n_epochs=500) utils.save_object(cls.model,out_path)
def main(): # Load configuration config = Config() # Parse user_list representations user_list = [] user_id_list = [] with open(config.rep_path, "r") as data_file: lines = data_file.readlines() for line in lines: user_ = line.split(":")[1].replace("[", "").replace(']"}', "").split() user = [float(u) for u in user_[1:len(user_)]] user_list.append(user) user_id_list.append( line.split(":")[0].replace("{", "").replace('"', "")) user_list = np.array(user_list) user_id_list = np.array(user_id_list) # If tsne is already run path_user_tsne = os.path.join(os.path.dirname(config.save_path), "user_tsne") if os.path.isfile(path_user_tsne): user_tsne = load_object(path_user_tsne) else: # Run TSNE model = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) user_tsne = model.fit_transform(user_list) # Save TSNE objects print "Save user_tsne." save_object(user_tsne, "save/user_tsne") # Run KMeans clustering kmeans = KMeans(init="k-means++", n_clusters=8, n_init=10) km = kmeans.fit(user_list) # Get cluster labels labels = km.labels_ unique_labels = set(labels) # Save clustering results save_object(user_id_list, "save/user_ids_km") save_object(labels, "save/labels_km") # Save the cluster_to_user dict cluster_to_user = dict() for k in unique_labels: class_member_mask = (labels == k) class_k = user_id_list[class_member_mask] cluster_to_user[k] = class_k save_object(cluster_to_user, "save/cluster_to_user") # Save the user_to_cluster dict user_to_cluster = dict() for user, label in zip(user_id_list, labels): user_to_cluster[user] = label save_object(user_to_cluster, "save/user_to_cluster") # Plot results colors = plt.get_cmap("Spectral")(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): class_member_mask = (labels == k) xy = user_tsne[class_member_mask] plt.plot(xy[:, 0], xy[:, 1], "o", markerfacecolor=col, markeredgecolor="k", markersize=3) plt.title("KMeans Clustering") plt.show()
def main(): # Load configuration config = Config() # Parse user_list representations user_list = [] user_id_list = [] with open(config.rep_path, 'r') as data_file: lines = data_file.readlines() for line in lines: user_ = line.split(':')[1].replace('[','').replace(']"}','').split() user = [float(u) for u in user_[1:len(user_)]] user_list.append(user) user_id_list.append(line.split(':')[0].replace('{','').replace('"','')) user_list = np.array(user_list) user_id_list = np.array(user_id_list) # If tsne is already run path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne') if os.path.isfile(path_user_tsne): user_tsne = load_object(path_user_tsne) else: # Run TSNE model = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) user_tsne = model.fit_transform(user_list) # Save TSNE objects print "Save user_tsne." save_object(user_tsne, 'save/user_tsne') # Run KMeans clustering kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10) km = kmeans.fit(user_list) # Get cluster labels labels = km.labels_ unique_labels = set(labels) # Save clustering results save_object(user_id_list, 'save/user_ids_km') save_object(labels, 'save/labels_km') # Save the cluster_to_user dict cluster_to_user = dict() for k in unique_labels: class_member_mask = (labels == k) class_k = user_id_list[class_member_mask] cluster_to_user[k] = class_k save_object(cluster_to_user, 'save/cluster_to_user') # Save the user_to_cluster dict user_to_cluster = dict() for user, label in zip(user_id_list, labels): user_to_cluster[user] = label save_object(user_to_cluster, 'save/user_to_cluster') # Plot results colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): class_member_mask = (labels == k) xy = user_tsne[class_member_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3) plt.title('KMeans Clustering') plt.show()
def run(args, verbose=False): # Create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) # If only want param-stamp, get it and exit if args.get_stamp: from param_stamp import get_param_stamp_from_args print(get_param_stamp_from_args(args=args)) exit() # Use cuda? cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") # Report whether cuda is used if verbose: print("CUDA is {}used".format("" if cuda else "NOT(!!) ")) # Set random seeds np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) #-------------------------------------------------------------------------------------------------# #----------------# #----- DATA -----# #----------------# # Prepare data for chosen experiment if verbose: print("\nPreparing the data...") (train_datasets, test_datasets), config, classes_per_task = get_multitask_experiment( name=args.experiment, scenario=args.scenario, tasks=args.tasks, data_dir=args.d_dir, normalize=True if utils.checkattr(args, "normalize") else False, augment=True if utils.checkattr(args, "augment") else False, verbose=verbose, exception=True if args.seed < 10 else False, only_test=(not args.train)) #-------------------------------------------------------------------------------------------------# #----------------------# #----- MAIN MODEL -----# #----------------------# # Define main model (i.e., classifier, if requested with feedback connections) if verbose and (utils.checkattr(args, "pre_convE") or utils.checkattr(args, "pre_convD")) and \ (hasattr(args, "depth") and args.depth>0): print("\nDefining the model...") if utils.checkattr(args, 'feedback'): model = define.define_autoencoder(args=args, config=config, device=device) else: model = define.define_classifier(args=args, config=config, device=device) # Initialize / use pre-trained / freeze model-parameters # - initialize (pre-trained) parameters model = define.init_params(model, args) # - freeze weights of conv-layers? if utils.checkattr(args, "freeze_convE"): for param in model.convE.parameters(): param.requires_grad = False if utils.checkattr(args, 'feedback') and utils.checkattr( args, "freeze_convD"): for param in model.convD.parameters(): param.requires_grad = False # Define optimizer (only optimize parameters that "requires_grad") model.optim_list = [ { 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }, ] model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) #-------------------------------------------------------------------------------------------------# #----------------------------------------------------# #----- CL-STRATEGY: REGULARIZATION / ALLOCATION -----# #----------------------------------------------------# # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner) and utils.checkattr(args, 'ewc'): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 model.fisher_n = args.fisher_n model.online = utils.checkattr(args, 'online') if model.online: model.gamma = args.gamma # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner) and utils.checkattr(args, 'si'): model.si_c = args.si_c if args.si else 0 model.epsilon = args.epsilon # XdG: create for every task a "mask" for each hidden fully connected layer if isinstance(model, ContinualLearner) and utils.checkattr( args, 'xdg') and args.xdg_prop > 0: model.define_XdGmask(gating_prop=args.xdg_prop, n_tasks=args.tasks) #-------------------------------------------------------------------------------------------------# #-------------------------------# #----- CL-STRATEGY: REPLAY -----# #-------------------------------# # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature) if isinstance(model, ContinualLearner) and hasattr( args, 'replay') and not args.replay == "none": model.replay_targets = "soft" if args.distill else "hard" model.KD_temp = args.temp # If needed, specify separate model for the generator train_gen = (hasattr(args, 'replay') and args.replay == "generative" and not utils.checkattr(args, 'feedback')) if train_gen: # Specify architecture generator = define.define_autoencoder(args, config, device, generator=True) # Initialize parameters generator = define.init_params(generator, args) # -freeze weights of conv-layers? if utils.checkattr(args, "freeze_convE"): for param in generator.convE.parameters(): param.requires_grad = False if utils.checkattr(args, "freeze_convD"): for param in generator.convD.parameters(): param.requires_grad = False # Set optimizer(s) generator.optim_list = [ { 'params': filter(lambda p: p.requires_grad, generator.parameters()), 'lr': args.lr_gen if hasattr(args, 'lr_gen') else args.lr }, ] generator.optimizer = optim.Adam(generator.optim_list, betas=(0.9, 0.999)) else: generator = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- REPORTING -----# #---------------------# # Get parameter-stamp (and print on screen) if verbose: print("\nParameter-stamp...") param_stamp = get_param_stamp( args, model.name, verbose=verbose, replay=True if (hasattr(args, 'replay') and not args.replay == "none") else False, replay_model_name=generator.name if (hasattr(args, 'replay') and args.replay in ("generative") and not utils.checkattr(args, 'feedback')) else None, ) # Print some model-characteristics on the screen if verbose: # -main model utils.print_model_info(model, title="MAIN MODEL") # -generator if generator is not None: utils.print_model_info(generator, title="GENERATOR") # Define [progress_dicts] to keep track of performance during training for storing and for later plotting in pdf precision_dict = evaluate.initiate_precision_dict(args.tasks) # Prepare for plotting in visdom visdom = None if args.visdom: env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment, tasks=args.tasks, scenario=args.scenario) replay_statement = "{mode}{fb}{con}{gat}{int}{dis}{b}{u}".format( mode=args.replay, fb="Rtf" if utils.checkattr(args, "feedback") else "", con="Con" if (hasattr(args, "prior") and args.prior == "GMM" and utils.checkattr(args, "per_class")) else "", gat="Gat{}".format(args.dg_prop) if (utils.checkattr(args, "dg_gates") and hasattr(args, "dg_prop") and args.dg_prop > 0) else "", int="Int" if utils.checkattr(args, "hidden") else "", dis="Dis" if args.replay == "generative" and args.distill else "", b="" if (args.batch_replay is None or args.batch_replay == args.batch) else "-br{}".format(args.batch_replay), u="" if args.g_fc_uni == args.fc_units else "-gu{}".format( args.g_fc_uni)) if (hasattr(args, "replay") and not args.replay == "none") else "NR" graph_name = "{replay}{syn}{ewc}{xdg}".format( replay=replay_statement, syn="-si{}".format(args.si_c) if utils.checkattr(args, 'si') else "", ewc="-ewc{}{}".format( args.ewc_lambda, "-O{}".format(args.gamma) if utils.checkattr(args, "online") else "") if utils.checkattr( args, 'ewc') else "", xdg="" if (not utils.checkattr(args, 'xdg')) or args.xdg_prop == 0 else "-XdG{}".format(args.xdg_prop), ) visdom = {'env': env_name, 'graph': graph_name} #-------------------------------------------------------------------------------------------------# #---------------------# #----- CALLBACKS -----# #---------------------# g_iters = args.g_iters if hasattr(args, 'g_iters') else args.iters # Callbacks for reporting on and visualizing loss generator_loss_cbs = [ cb._VAE_loss_cb( log=args.loss_log, visdom=visdom, replay=(hasattr(args, "replay") and not args.replay == "none"), model=model if utils.checkattr(args, 'feedback') else generator, tasks=args.tasks, iters_per_task=args.iters if utils.checkattr(args, 'feedback') else g_iters) ] if (train_gen or utils.checkattr(args, 'feedback')) else [None] solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, visdom=visdom, model=model, iters_per_task=args.iters, tasks=args.tasks, replay=(hasattr(args, "replay") and not args.replay == "none")) ] if (not utils.checkattr(args, 'feedback')) else [None] # Callbacks for evaluating and plotting generated / reconstructed samples no_samples = (utils.checkattr(args, "no_samples") or (utils.checkattr(args, "hidden") and hasattr(args, 'depth') and args.depth > 0)) sample_cbs = [ cb._sample_cb(log=args.sample_log, visdom=visdom, config=config, test_datasets=test_datasets, sample_size=args.sample_n, iters_per_task=g_iters) ] if ((train_gen or utils.checkattr(args, 'feedback')) and not no_samples) else [None] # Callbacks for reporting and visualizing accuracy, and visualizing representation extracted by main model # -visdom (i.e., after each [prec_log] eval_cb = cb._eval_cb( log=args.prec_log, test_datasets=test_datasets, visdom=visdom, precision_dict=None, iters_per_task=args.iters, test_size=args.prec_n, classes_per_task=classes_per_task, scenario=args.scenario, ) # -pdf / reporting: summary plots (i.e, only after each task) eval_cb_full = cb._eval_cb( log=args.iters, test_datasets=test_datasets, precision_dict=precision_dict, iters_per_task=args.iters, classes_per_task=classes_per_task, scenario=args.scenario, ) # -visualize feature space latent_space_cb = cb._latent_space_cb( log=args.iters, datasets=test_datasets, visdom=visdom, iters_per_task=args.iters, sample_size=400, ) # -collect them in <lists> eval_cbs = [eval_cb, eval_cb_full, latent_space_cb] #-------------------------------------------------------------------------------------------------# #--------------------# #----- TRAINING -----# #--------------------# if args.train: if verbose: print("\nTraining...") # Train model train_cl( model, train_datasets, replay_mode=args.replay if hasattr(args, 'replay') else "none", scenario=args.scenario, classes_per_task=classes_per_task, iters=args.iters, batch_size=args.batch, batch_size_replay=args.batch_replay if hasattr( args, 'batch_replay') else None, generator=generator, gen_iters=g_iters, gen_loss_cbs=generator_loss_cbs, feedback=utils.checkattr(args, 'feedback'), sample_cbs=sample_cbs, eval_cbs=eval_cbs, loss_cbs=generator_loss_cbs if utils.checkattr(args, 'feedback') else solver_loss_cbs, args=args, reinit=utils.checkattr(args, 'reinit'), only_last=utils.checkattr(args, 'only_last')) # Save evaluation metrics measured throughout training file_name = "{}/dict-{}".format(args.r_dir, param_stamp) utils.save_object(precision_dict, file_name) # Save trained model(s), if requested if args.save: save_name = "mM-{}".format(param_stamp) if ( not hasattr(args, 'full_stag') or args.full_stag == "none") else "{}-{}".format( model.name, args.full_stag) utils.save_checkpoint(model, args.m_dir, name=save_name, verbose=verbose) if generator is not None: save_name = "gM-{}".format(param_stamp) if ( not hasattr(args, 'full_stag') or args.full_stag == "none") else "{}-{}".format( generator.name, args.full_stag) utils.save_checkpoint(generator, args.m_dir, name=save_name, verbose=verbose) else: # Load previously trained model(s) (if goal is to only evaluate previously trained model) if verbose: print("\nLoading parameters of the previously trained models...") load_name = "mM-{}".format(param_stamp) if ( not hasattr(args, 'full_ltag') or args.full_ltag == "none") else "{}-{}".format( model.name, args.full_ltag) utils.load_checkpoint( model, args.m_dir, name=load_name, verbose=verbose, add_si_buffers=(isinstance(model, ContinualLearner) and utils.checkattr(args, 'si'))) if generator is not None: load_name = "gM-{}".format(param_stamp) if ( not hasattr(args, 'full_ltag') or args.full_ltag == "none") else "{}-{}".format( generator.name, args.full_ltag) utils.load_checkpoint(generator, args.m_dir, name=load_name, verbose=verbose) #-------------------------------------------------------------------------------------------------# #-----------------------------------# #----- EVALUATION of CLASSIFIER-----# #-----------------------------------# if verbose: print("\n\nEVALUATION RESULTS:") # Evaluate precision of final model on full test-set precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if args.scenario == "task" else None) for i in range(args.tasks) ] average_precs = sum(precs) / args.tasks # -print on screen if verbose: print("\n Accuracy of final model on test-set:") for i in range(args.tasks): print(" - {} {}: {:.4f}".format( "For classes from task" if args.scenario == "class" else "Task", i + 1, precs[i])) print('=> Average accuracy over all {} {}: {:.4f}\n'.format( args.tasks * classes_per_task if args.scenario == "class" else args.tasks, "classes" if args.scenario == "class" else "tasks", average_precs)) # -write out to text file output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(average_precs)) output_file.close() #-------------------------------------------------------------------------------------------------# #-----------------------------------# #----- EVALUATION of GENERATOR -----# #-----------------------------------# if (utils.checkattr(args, 'feedback') or train_gen ) and args.experiment == "CIFAR100" and args.scenario == "class": # Dataset and model to be used test_set = ConcatDataset(test_datasets) gen_model = model if utils.checkattr(args, 'feedback') else generator gen_model.eval() # Evaluate log-likelihood of generative model on combined test-set (with S=100 importance samples per datapoint) ll_per_datapoint = gen_model.estimate_loglikelihood( test_set, S=100, batch_size=args.batch) if verbose: print('=> Log-likelihood on test set: {:.4f} +/- {:.4f}\n'.format( np.mean(ll_per_datapoint), np.sqrt(np.var(ll_per_datapoint)))) # -write out to text file output_file = open("{}/ll-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(np.mean(ll_per_datapoint))) output_file.close() # Evaluate reconstruction error (averaged over number of input units) re_per_datapoint = gen_model.calculate_recon_error( test_set, batch_size=args.batch, average=True) if verbose: print( '=> Reconstruction error (per input unit) on test set: {:.4f} +/- {:.4f}\n' .format(np.mean(re_per_datapoint), np.sqrt(np.var(re_per_datapoint)))) # -write out to text file output_file = open("{}/re-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(np.mean(re_per_datapoint))) output_file.close() # Try loading the classifier (our substitute for InceptionNet) for calculating IS, FID and Recall & Precision # -define model config['classes'] = 100 pretrained_classifier = define.define_classifier(args=args, config=config, device=device) pretrained_classifier.hidden = False # -load pretrained weights eval_tag = "" if args.eval_tag == "none" else "-{}".format( args.eval_tag) try: utils.load_checkpoint(pretrained_classifier, args.m_dir, verbose=True, name="{}{}".format( pretrained_classifier.name, eval_tag)) FileFound = True except FileNotFoundError: if verbose: print("= Could not find model {}{} in {}".format( pretrained_classifier.name, eval_tag, args.m_dir)) print("= IS, FID and Precision & Recall not computed!") FileFound = False pretrained_classifier.eval() # Only continue with computing these measures if the requested classifier network (using --eval-tag) was found if FileFound: # Preparations total_n = len(test_set) n_repeats = int(np.ceil(total_n / args.batch)) # -sample data from generator (for IS, FID and Precision & Recall) gen_x = gen_model.sample(size=total_n, only_x=True) # -generate predictions for generated data (for IS) gen_pred = [] for i in range(n_repeats): x = gen_x[(i * args.batch):int(min(((i + 1) * args.batch), total_n))] with torch.no_grad(): gen_pred.append( F.softmax(pretrained_classifier.hidden_to_output(x) if args.hidden else pretrained_classifier(x), dim=1).cpu().numpy()) gen_pred = np.concatenate(gen_pred) # -generate embeddings for generated data (for FID and Precision & Recall) gen_emb = [] for i in range(n_repeats): with torch.no_grad(): gen_emb.append( pretrained_classifier.feature_extractor( gen_x[(i * args.batch ):int(min(((i + 1) * args.batch), total_n))], from_hidden=args.hidden).cpu().numpy()) gen_emb = np.concatenate(gen_emb) # -generate embeddings for test data (for FID and Precision & Recall) data_loader = utils.get_data_loader(test_set, batch_size=args.batch, cuda=cuda) real_emb = [] for real_x, _ in data_loader: with torch.no_grad(): real_emb.append( pretrained_classifier.feature_extractor( real_x.to(device)).cpu().numpy()) real_emb = np.concatenate(real_emb) # Calculate "Inception Score" (IS) py = gen_pred.mean(axis=0) is_per_datapoint = [] for i in range(len(gen_pred)): pyx = gen_pred[i, :] is_per_datapoint.append(entropy(pyx, py)) IS = np.exp(np.mean(is_per_datapoint)) if verbose: print('=> Inception Score = {:.4f}\n'.format(IS)) # -write out to text file output_file = open( "{}/is{}-{}.txt".format(args.r_dir, eval_tag, param_stamp), 'w') output_file.write('{}\n'.format(IS)) output_file.close() ## Calculate "Frechet Inception Distance" (FID) FID = fid.calculate_fid_from_embedding(gen_emb, real_emb) if verbose: print('=> Frechet Inception Distance = {:.4f}\n'.format(FID)) # -write out to text file output_file = open( "{}/fid{}-{}.txt".format(args.r_dir, eval_tag, param_stamp), 'w') output_file.write('{}\n'.format(FID)) output_file.close() # Calculate "Precision & Recall"-curves precision, recall = pr.compute_prd_from_embedding( gen_emb, real_emb) # -write out to text files file_name = "{}/precision{}-{}.txt".format(args.r_dir, eval_tag, param_stamp) with open(file_name, 'w') as f: for item in precision: f.write("%s\n" % item) file_name = "{}/recall{}-{}.txt".format(args.r_dir, eval_tag, param_stamp) with open(file_name, 'w') as f: for item in recall: f.write("%s\n" % item) #-------------------------------------------------------------------------------------------------# #--------------------# #----- PLOTTING -----# #--------------------# # If requested, generate pdf if args.pdf: # -open pdf plot_name = "{}/{}.pdf".format(args.p_dir, param_stamp) pp = evaluate.visual.plt.open_pdf(plot_name) # -show metrics reflecting progression during training if args.train and (not utils.checkattr(args, 'only_last')): # -create list to store all figures to be plotted. figure_list = [] # -generate figures (and store them in [figure_list]) figure = evaluate.visual.plt.plot_lines( precision_dict["all_tasks"], x_axes=[ i * classes_per_task for i in precision_dict["x_task"] ] if args.scenario == "class" else precision_dict["x_task"], line_names=[ '{} {}'.format( "episode / task" if args.scenario == "class" else "task", i + 1) for i in range(args.tasks) ], xlabel="# of {}s so far".format("classe" if args.scenario == "class" else "task"), ylabel="Test accuracy") figure_list.append(figure) figure = evaluate.visual.plt.plot_lines( [precision_dict["average"]], x_axes=[ i * classes_per_task for i in precision_dict["x_task"] ] if args.scenario == "class" else precision_dict["x_task"], line_names=[ 'Average based on all {}s so far'.format(( "digit" if args.experiment == "splitMNIST" else "classe") if args.scenario else "task") ], xlabel="# of {}s so far".format("classe" if args.scenario == "class" else "task"), ylabel="Test accuracy") figure_list.append(figure) # -add figures to pdf for figure in figure_list: pp.savefig(figure) gen_eval = (utils.checkattr(args, 'feedback') or train_gen) # -show samples (from main model or separate generator) if gen_eval and not no_samples: evaluate.show_samples( model if utils.checkattr(args, 'feedback') else generator, config, size=args.sample_n, pdf=pp, title="Generated samples (by final model)") # -plot "Precision & Recall"-curve if gen_eval and args.experiment == "CIFAR100" and args.scenario == "class" and FileFound: figure = evaluate.visual.plt.plot_pr_curves([[precision]], [[recall]]) pp.savefig(figure) # -close pdf pp.close() # -print name of generated plot on screen if verbose: print("\nGenerated plot: {}\n".format(plot_name))
def mc_study(self): """ perform psedudo-experiments by generating random data and models (using statistical bin uncertainty) pseudo data: - sum all model hists - then randomise model: - randomise each input hist individually """ samples = self.samples ntrials = 10000 if self.plot_toy_fits: ntrials = 10 ## save options before mc study tag = self.tag quiet = self.quiet self.quiet = True toy_arrays = {} for s in samples: if not s in toy_arrays: toy_arrays[s] = {} toy_arrays[s]['mean'] = [] toy_arrays[s]['error'] = [] toy_arrays[s]['pull'] = [] toy_arrays[s]['diff'] = [] toy_arrays[s]['mc'] = [] data_array = [] ## initialise toy fitter ## VERY important to prepare hists first ## before initialising, so TFractionFitter ## is not initialise with the real data self.prepare_toy_hists() self.init_fitter(toy=True) for i in xrange(ntrials): if i%100==0: print 'trial ',i #print #print 'trial%d summary: ' % i self.tag = '%s_trial%d'% (tag,i) self.randomise_hists() self.toy_fit() if not self.fit_status == 0: continue if self.plot_toy_fits: self.plot() temp_means = {} temp_errors = {} temp_pulls = {} temp_diffs = {} temp_mcs = {} has_zero = False n_tot_mc = self.ntot_orig() #print 'n_tot_mc: %.1f, h_orig_tot.int: %.1f'% (n_tot_mc,self.h_orig_total.Integral()) n_tot_fit = self.ntot_fit() #print 'n_tot_fit: %.1f, n_data: %.1f' % (n_tot_fit,self.ndata_curr()) for s in samples: n_mc = self.nsamp_orig(s) n_fit = self.nsamp_fit(s) en_fit = self.ensamp_fit(s) pull = (n_fit-n_mc)/en_fit if en_fit else 0.0 diff = n_fit - n_mc temp_means[s] = n_fit temp_errors[s] = en_fit temp_pulls[s] = pull temp_diffs[s] = diff temp_mcs[s] = self.nsamp_curr(s) f_fit = self.fsamp_fit(s) #if f_fit == 0.: has_zero = True if f_fit < 0.0000001: has_zero = True f_mc = n_mc / n_tot_mc if n_tot_mc else 0.0 f_fit = n_fit / n_tot_fit if n_tot_fit else 0.0 #print '%s, mc: %.1f, fit: %.1f, fmc: %.4f, ffit: %.4f' % (s,n_mc,n_fit,f_mc,f_fit) #print 'data, mc: %.1f, fit: %.1f' % (n_tot_mc,n_tot_fit) ## remove cases where any component is fit to 0 ## argument is that we would not take this ## result if we got it in data ## probably should try to do something ## better in future if not has_zero: for s in samples: toy_arrays[s]['mean'].append(temp_means[s]) toy_arrays[s]['error'].append(temp_errors[s]) toy_arrays[s]['pull'].append(temp_pulls[s]) toy_arrays[s]['diff'].append(temp_diffs[s]) toy_arrays[s]['mc'].append(temp_mcs[s]) data_array.append(self.ndata_curr()) else: print 'ERROR - component fit to zero' ## restore to original state before toys self.reset_hists() self.tag = tag self.quiet = quiet ## set corrections from toy study filename = 'toy_%s.root'%(self.tag) for s in samples: a_mean = toy_arrays[s]['mean'] a_error = toy_arrays[s]['error'] a_pull = toy_arrays[s]['pull'] a_diff = toy_arrays[s]['diff'] a_mc = toy_arrays[s]['mc'] if not s in self.toy_results: self.toy_results[s] = {} self.toy_results[s]['meanm'] = numpy.mean(a_mean) self.toy_results[s]['meane'] = numpy.std(a_mean) self.toy_results[s]['errorm'] = numpy.mean(a_error) self.toy_results[s]['errore'] = numpy.std(a_error) self.toy_results[s]['pullm'] = numpy.mean(a_pull) self.toy_results[s]['pulle'] = numpy.std(a_pull) ## create plots h_mean = create_mean_hist(s) h_error = create_error_hist(s) h_pull = create_pull_hist(s) h_diff = create_diff_hist(s) h_mc = create_mc_hist(s) for v in a_mean: h_mean.Fill(v) for v in a_error: h_error.Fill(v) for v in a_pull: h_pull.Fill(v) for v in a_diff: h_diff.Fill(v) for v in a_mc : h_mc.Fill(v) utils.save_object(h_mean,filename) utils.save_object(h_error,filename) utils.save_object(h_pull,filename) utils.save_object(h_diff,filename) utils.save_object(h_mc,filename) h_mc_data = create_mc_hist('data') for v in data_array: h_mc_data.Fill(v) utils.save_object(h_mc_data,filename) for isamp in xrange(len(samples)): s1 = samples[isamp] for isamp2 in xrange(len(samples)): if not isamp2 < isamp: continue s2 = samples[isamp2] h = create_2d_mean_hist(s1,s2) for ns1,ns2 in zip(toy_arrays[s1]['mean'], toy_arrays[s2]['mean']): h.Fill(ns1,ns2) utils.save_object(h,filename) for s in samples: h = create_2d_mean_hist('%s_mc'%s,'%s_fit'%s) for ns1,ns2 in zip(toy_arrays[s]['mc'], toy_arrays[s]['mean']): h.Fill(ns1,ns2) utils.save_object(h,filename) f = utils.open_file(filename) f.Close()
def raw_to_pcloud(in_path, out_path): pcloud = action.make_action(in_path) pcloud.standarize() out_path = out_path.replace(".raw", ".cloud") print(out_path) utils.save_object(pcloud, out_path)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") keep_probability_conv = tf.placeholder(tf.float32, name="keep_probability_conv") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="input_image") annotation_labels = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation_labels") annotation_objects = tf.placeholder( tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation_objects") # compute labels pred_annotation, logits = segment(image, keep_probability_conv, 1, NUM_OF_CLASSESS, "labels") if FLAGS.summary: tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth_labels", tf.cast(annotation_labels, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) tf.summary.image("gt_objects", tf.cast(annotation_objects, tf.uint8), max_outputs=2) loss_labels = tf.reduce_mean( (tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.squeeze( annotation_labels, squeeze_dims=[3]), name="loss_labels"))) # compute objective energy combination = tf.concat([image, tf.cast(logits, tf.float32)], -1) pred_annotation_o, logits_o = compute_energy(combination, keep_probability_conv, NUM_OF_CLASSESS + 1, 5, "objects") with tf.variable_scope("loss_objects"): # subtract one from all the nonzero parts of annotation objects one = tf.constant(1, dtype=tf.int32, name="const_one") zero_m = tf.constant(0, dtype=tf.int32, name="const_zero") annotation_objects_sub = tf.subtract(annotation_objects, one, name="loss_obj_sub") if FLAGS.summary: tf.summary.histogram("annotations_o_minus_one", tf.cast(annotation_objects_sub, tf.uint8)) annotation_objects_max = tf.maximum(annotation_objects_sub, zero_m, name="loss_obj_max") if FLAGS.summary: tf.summary.histogram("annotations_o_minus_one_1", tf.cast(annotation_objects_max, tf.uint8)) #compute objective loss - crossentropy based cross_ent_obj = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits_o, labels=tf.squeeze(annotation_objects_max, squeeze_dims=[3]), name="loss_objects") # mask out stuff not detected by segmentation zero = tf.constant(0, dtype=tf.int32) binary_mask = tf.cast( tf.not_equal(tf.cast(pred_annotation, tf.int32), zero), tf.float32) if FLAGS.summary: tf.summary.tensor_summary("bin_mask", tf.cast(binary_mask, tf.uint8)) # weight corners one = tf.constant(1, dtype=tf.float32) weight_mask = tf.multiply( tf.add(tf.cast(annotation_objects_max, tf.float32), one), 100) # compute loss loss_objects = tf.reduce_mean( tf.divide(tf.multiply(cross_ent_obj, binary_mask), weight_mask)) loss_objects = tf.multiply(loss_objects, 100) # # compute objective loss - mean-square error # # # # # manipulate gt # # subtract 1 --> reduce size of patches # # square gt --> make patches deeper # one = tf.constant(1, dtype=tf.float32) # annotation_objects = tf.square(tf.subtract(tf.cast(annotation_objects, tf.float32), one)) # # # tf.summary.image("pred_objectness", tf.cast(pred_objects, tf.uint8), max_outputs=2) # # # # tf.summary.image("ground_truth_objects", tf.cast(annotation_objects, tf.uint8), max_outputs=2) # square_diff = tf.square(tf.subtract(logits_o, annotation_objects)) # # # mask out non-segmented part # # zero = tf.constant(0, dtype=tf.float32) # # binary_mask = tf.cast(tf.not_equal(tf.cast(pred_annotation,tf.float32), zero), tf.float32) # # square_diff = tf.multiply(square_diff, binary_mask) # # loss_objects = tf.multiply(1000.0, tf.reduce_mean(square_diff)) if FLAGS.mode == "train_combined": loss = tf.add(loss_labels, loss_objects) else: loss = loss_labels tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() # if FLAGS.debug: # for var in trainable_var: # utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() if False: # try to load cached Data fname_train = "train_dsreader" + str(IMAGE_SIZE) fname_valid = "valid_dsreader" + str(IMAGE_SIZE) if os.path.isfile(fname_train) and os.path.isfile(fname_valid): # load cached train_dataset_reader = util.load_object(fname_train) validation_dataset_reader = util.load_object(fname_valid) else: # Load Data from disk and cache it print("Setting up image reader...") train_records, valid_records = util.scene_parsing.read_dataset( FLAGS.data_dir) train_records, valid_records = music_data.read_dataset( FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': False, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset( train_records, image_options) validation_dataset_reader = dataset.BatchDatset( valid_records, image_options) if train_dataset_reader is not None: util.save_object(train_dataset_reader, fname_train) util.save_object(validation_dataset_reader, fname_valid) else: # just load from disk print("Setting up image reader...") #train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) train_records, valid_records = music_data.read_dataset( FLAGS.music_data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if "train" in FLAGS.mode: train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset( valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: step = int(os.path.basename(ckpt.model_checkpoint_path).split('-') [1]) # get the step from the last checkpoint saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") else: step = 0 if FLAGS.mode == "train_labels": for itr in xrange(step, MAX_ITERATION): train_images, train_m_annotations, train_o_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation_labels: train_m_annotations, keep_probability_conv: 0.85, keep_probability: 0.85 } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) # train_loss = sess.run(loss_labels, feed_dict=feed_dict) # print("Step: %d, labels_loss:%g" % (itr, train_loss)) # train_loss = sess.run(loss_objects, feed_dict=feed_dict) # print("Step: %d, objects_loss:%g" % (itr, train_loss)) summary_writer.add_summary(summary_str, itr) if itr % 500 == 0 and itr != 0: valid_images, valid_m_annotations, valid_o_annotations = validation_dataset_reader.next_batch( FLAGS.batch_size) valid_loss = sess.run(loss, feed_dict={ image: valid_images, annotation_labels: valid_m_annotations, annotation_objects: valid_o_annotations, keep_probability_conv: 1.0, keep_probability: 1.0 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "train_combined": for itr in xrange(step, MAX_ITERATION): train_images, train_m_annotations, train_o_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation_labels: train_m_annotations, annotation_objects: train_o_annotations, keep_probability_conv: 0.85, keep_probability: 0.85 } # print("debug") # # from PIL import Image # img = Image.fromarray(train_images[0].reshape((IMAGE_SIZE,IMAGE_SIZE)), 'L') # img.show() # # from PIL import Image # img = Image.fromarray(train_m_annotations[0].reshape((IMAGE_SIZE,IMAGE_SIZE)), 'L') # img.show() # # from PIL import Image # img = Image.fromarray(train_o_annotations[0].reshape((IMAGE_SIZE,IMAGE_SIZE)), 'L') # img.show() # # # # feed_dict = {image: train_images, annotation_labels: train_m_annotations,annotation_objects:train_o_annotations, keep_probability_conv: 0.85, keep_probability: 0.85} # pred_annotation_val, logits_val, logits_labels_val = sess.run([pred_annotation_o, logits_o, logits], feed_dict=feed_dict) # # feed_dict = {image: train_images, pred_annotation_o: pred_annotation_val, logits_o: logits_val, annotation_objects:train_o_annotations, keep_probability_conv: 0.85, keep_probability: 0.85} # cross_ent_obj_val, binary_mask_val, weight_mask_val = sess.run([cross_ent_obj, binary_mask, weight_mask], feed_dict=feed_dict) if FLAGS.debug_fetch: # wrap session in debugger train_op, annotation_objects_sub, annotation_objects_max, logits_o, pred_annotation, binary_mask = sess.run( [ train_op, annotation_objects_sub, annotation_objects_max, logits_o, pred_annotation, binary_mask ], feed_dict=feed_dict) # sub annotation_objects_sub = annotation_objects_sub.reshape( (1, 224, 224)) plt.imshow(annotation_objects_sub[0]) plt.show() # max annotation_objects_max = annotation_objects_max.reshape( (1, 224, 224)) plt.imshow(annotation_objects_max[0]) plt.show() # binary mask pred_annotation = pred_annotation.reshape((1, 224, 224)) plt.imshow(pred_annotation[0] == 0) plt.show() # binary mask binary_mask = binary_mask.reshape((1, 224, 224)) plt.imshow(binary_mask[0]) plt.show() else: sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss = sess.run(loss, feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_loss = sess.run(loss_labels, feed_dict=feed_dict) print("Step: %d, labels_loss:%g" % (itr, train_loss)) train_loss = sess.run(loss_objects, feed_dict=feed_dict) print("Step: %d, objects_loss:%g" % (itr, train_loss)) if itr % 250 == 0: train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict) summary_writer.add_summary(summary_str, itr) if itr % 500 == 0 and itr != 0: valid_images, valid_m_annotations, valid_o_annotations = validation_dataset_reader.next_batch( FLAGS.batch_size) valid_loss = sess.run(loss, feed_dict={ image: valid_images, annotation_labels: valid_m_annotations, annotation_objects: valid_o_annotations, keep_probability_conv: 1.0, keep_probability: 1.0 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "visualize": number_of_batches = 25 for i in xrange(number_of_batches): valid_images, valid_m_annotations, valid_o_annotations = validation_dataset_reader.next_batch( FLAGS.batch_size) pred_a, pred_o = sess.run( [pred_annotation, pred_annotation_o], feed_dict={ image: valid_images, annotation_labels: valid_m_annotations, annotation_objects: valid_o_annotations, keep_probability_conv: 1.0, keep_probability: 1.0 }) valid_o_annotations = np.squeeze(valid_o_annotations, axis=3) valid_m_annotations = np.squeeze(valid_m_annotations, axis=3) pred_a = np.squeeze(pred_a, axis=3) pred_o = np.squeeze(pred_o, axis=3) #water_s = util.do_wathershed(pred_a, pred_o) for itr in range(FLAGS.batch_size): utils.save_image( valid_m_annotations[itr].astype(np.uint8), "../" + FLAGS.logs_dir + "trained_images/ground_truth", name="gt_" + str(i * FLAGS.batch_size + itr) + "_m") utils.save_image( valid_o_annotations[itr].astype(np.uint8), "../" + FLAGS.logs_dir + "trained_images/ground_truth", name="gt_" + str(i * FLAGS.batch_size + itr) + "_o") utils.save_image( pred_a[itr].astype(np.uint8), "../" + FLAGS.logs_dir + "trained_images/prediction", name="pred_" + str(i * FLAGS.batch_size + itr) + "_m") utils.save_image( pred_o[itr].astype(np.uint8), "../" + FLAGS.logs_dir + "trained_images/prediction", name="pred_" + str(i * FLAGS.batch_size + itr) + "_o") print("Saved image: %d" % (i * FLAGS.batch_size + itr))
def full_hyperparameter_test(): counter = 0 failed_networks = 0 report = open("report.txt", 'a') report.write("\nSTART\n") for algorithm in [neuralnet.SGD, neuralnet.RMSPROP, neuralnet.ADAM]: for initialization in [neuralnet.NORMAL, neuralnet.UNIFORM]: for hidden_units in [8, 15, 50]: for l1reg in [0., 0.001]: for l2reg in [0., 0.001]: for loss_type in [ pnas2018.CROSS_ENTROPY, pnas2018.MSE ]: learning_rate = 0.1 if algorithm == neuralnet.RMSPROP or algorithm == neuralnet.ADAM: learning_rate *= 0.1 if hidden_units == 8: learning_rate *= 2 elif hidden_units == 50: learning_rate *= 0.5 elif loss_type == pnas2018.MSE and ( algorithm != neuralnet.RMSPROP and algorithm != neuralnet.ADAM): learning_rate *= 10 counter += 1 name = algorithm + initialization + str( hidden_units) + str(l1reg) + str( l2reg) + loss_type + "{:.3f}".format( learning_rate) print("Hyperparameter test:" + name + '(' + str(counter) + ' out of 144)') num_networks = 0 while num_networks < 50: print(num_networks) model, rng_avg_sequence = pnas2018.train( iterations=5000, algorithm=algorithm, size_hidden=hidden_units, learning_rate=learning_rate, l1reg=l1reg, l2reg=l2reg, loss_type=loss_type, initialization=initialization) _, accuracy_totals = pnas2018.accuracy_test( model) total_iterations = 5000 step = 5000 while (rng_avg_sequence < 0.22 or not np.array_equal(accuracy_totals, np.asarray([0.5, 0.5, 1, 1, 1, 1])))\ and total_iterations < 10000: model, rng_avg_sequence = pnas2018.train( model, iterations=step, algorithm=algorithm, size_hidden=hidden_units, learning_rate=learning_rate, l1reg=l1reg, l2reg=l2reg, loss_type=loss_type, initialization=initialization) _, accuracy_totals = pnas2018.accuracy_test( model) report.write(name + " extra iterations:" + str(step) + '\n') total_iterations += step print("total iterations=" + str(total_iterations)) if np.array_equal( accuracy_totals, np.asarray([0.5, 0.5, 1, 1, 1, 1])): num_networks += 1 else: print( "-----------------------------------------!!!!!!!!!!!!!!NETWORK FAILED!!!!!!!!!!!!!!!----------------------------------------" ) report.write(name + " failed\n") failed_networks += 1 utils.save_object( algorithm + initialization + str(hidden_units) + str(l1reg) + str(l2reg) + loss_type, model) print(failed_networks) report.close()
def train_autoencoder(in_path,out_path,training_epochs=15, learning_rate=0.1,batch_size=25): dataset = load_data(in_path,batch_size) da=learning_autoencoder(dataset,training_epochs,learning_rate,batch_size) utils.save_object(out_path,da)
def img_to_final(in_path, out_path): print(in_path + "/") final = td.read_im_action(in_path + "/") out_path = out_path.replace(".img", ".final") print(out_path) utils.save_object(final, out_path)
def save_data(data, targets, filename='processed_data_temporal.pkl'): new_data = [person.to_numpy() for person in data] targets = [target.to_numpy().squeeze(axis=1) for target in targets] save_object((new_data, targets), filename)
sum_posts += len(df) sum_deleted_posts += len(df.loc[['[deleted]']]) # iterate through each dataframe to analze the percentages of posts for j in range(len(df)): if not (df.iloc[j].subreddit in subreddits): subreddits.append(df.iloc[j].subreddit) subreddit_counts.append(1) else: subreddit_counts[subreddits.index(df.iloc[j].subreddit)] += 1 # sort the subreddits and counts by highest percentage subreddit_counts, subreddits = (list(t) for t in zip( *sorted(zip(subreddit_counts, subreddits), reverse=True))) # save subreddit_counts and subreddits save_object(subreddit_counts, 'objects/subreddit_post_analysis/', model_name + "-subreddit_counts") save_object(subreddits, 'objects/subreddit_post_analysis/', model_name + "-subreddits") # open output file with header csvfile = open("subreddit_post_analysis.csv", "a") writer = csv.writer(csvfile, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL) writer.writerow("Pecentage of Posts in each Subreddit:") # print the percentages of posts in each subreddit present for subreddit in subreddits: writer.writerow([ subreddit,
for i in range(CLASSIFIER_CONFIG['runs']): print("\nRun {}/{}\n".format(i + 1, CLASSIFIER_CONFIG['runs'])) for j, c in enumerate(configurations): results[j].append(run(**c)) mean_results = np.array(results).mean(1).tolist() def np_list_to_csv_string(npl): return ",".join(list(map(lambda f: "{:.4f}".format(f), npl))) csv = [] for arr in mean_results: csv.append(np_list_to_csv_string(arr)) utils.save_object(mean_results, SAVE_FOLDER, 'results') utils.save_string_to_file("\n".join(csv), SAVE_FOLDER, 'results.csv') utils.save_dict(CLASSIFIER_CONFIG, SAVE_FOLDER, 'config.json') data = np.array(mean_results) x = range(data.shape[1]) fig, ax = plt.subplots() plotter.hide_top_and_right_axis(ax) ax.yaxis.grid(color='gray') ax.set_xlabel('Time (seconds)') ax.set_ylabel('Best polygon solution') ax.set_prop_cycle(cycler('color', ['c', 'm', 'y', 'k', 'r', 'g', 'b'])) lines = []
def binary_to_raw(in_path,out_path): raw_action=binary.read_binary(in_path) out_path=out_path.replace(".bin",".raw") print(out_path) utils.save_object(raw_action,out_path)
def plot(self): line_width = 3 cname = 'c_%s'%self.tag c = ROOT.TCanvas(cname,cname,700,700) c.cd() fr = c.DrawFrame(0.,0.,20.,1.2*self.h_curr_data.GetMaximum(),';Number of Tracks;Events') ## construct legend leg = ROOT.TLegend(0.6,0.2,0.9,0.5) leg.SetBorderSize(0) leg.SetFillColor(0) leg.SetFillStyle(0) ## get contributions h_bkg = self.fit_hists['bkg'] h_bkg.Scale(self.nsamp_fit('bkg',corr=False) / h_bkg.Integral()) if 'anti' in self.samples: h_anti = self.fit_hists['anti'] h_anti.Scale(self.nsamp_fit('anti',corr=False) / h_anti.Integral()) if self.orig_templates['sig_1p']: if self.fix_r1p3p: h_sig_1p = self.orig_templates['sig_1p'].Clone() h_sig_mp = self.orig_templates['sig_mp'].Clone() else: h_sig_1p = self.fit_hists['sig_1p'] h_sig_mp = self.fit_hists['sig_mp'] h_sig_1p.Scale(self.nsamp_fit('sig_1p',corr=False) / h_sig_1p.Integral()) h_sig_mp.Scale(self.nsamp_fit('sig_mp',corr=False) / h_sig_mp.Integral()) else: h_sig = self.fit_hists['sig'] h_sig.Scale(self.nsamp_fit('sig',corr=False) / h_sig.Integral()) ## draw error on total model ## TODO: fix this up to use orig hists h_total = None for s in self.samples: if not h_total: h_total = self.orig_templates[s].Clone('h_total') h_total.Scale( self.nsamp_fit(s) / h_total.Integral() ) else: htemp = self.orig_templates[s] h_total.Add( htemp, self.nsamp_fit(s) / htemp.Integral() ) h_total.SetFillColor(ROOT.kRed) h_total.SetLineColor(ROOT.kRed) h_total.SetLineStyle(1) h_total.SetLineWidth(0) h_total.SetMarkerSize(0) h_total.Draw("SAME,E2") fsum = 0. for s in self.samples: fsum += self.fsamp_fit(s) print 'fsum: ', fsum ## total model central value from fit #self.fit_hists['model'].Scale(self.ndata() / self.nhist(self.fit_hists['model'])) #self.fit_hists['model'].Scale(self.ndata_curr() / self.nhist(self.fit_hists['model'])) self.fit_hists['model'].SetLineWidth(line_width) self.fit_hists['model'].Draw("SAME") leg.AddEntry(self.h_curr_data,'Data','PL') leg.AddEntry(self.fit_hists['model'],'Model','L') leg.AddEntry(h_total,'Model (stat.)','F') ## draw data self.h_curr_data.Draw("SAME") print 'nfit: ', self.ntot_fit() print 'nfit(corr): ', self.ntot_fit() print 'h_mod: ', self.fit_hists['model'].Integral() print 'h_tot: ', h_total.Integral() # draw bkg h_bkg.SetLineColor(self.color_bkg) h_bkg.SetLineStyle(self.style_bkg) h_bkg.SetLineWidth(line_width) h_bkg.Draw("SAME,HIST") leg.AddEntry(h_bkg,'Jet','L') if self.draw_true_hists: self.orig_templates['bkg'].Draw("SAME,HIST") # draw anti if 'anti' in self.samples: h_anti.SetLineColor(self.color_anti) h_anti.SetLineStyle(self.style_anti) h_anti.SetLineWidth(line_width) h_anti.Draw("SAME,HIST") leg.AddEntry(h_anti,'Lep','L') if self.draw_true_hists: self.orig_templates['anti'].Draw("SAME,HIST") ## 1p3p split signal if self.orig_templates['sig_1p']: h_sig_1p.SetLineColor(self.color_sig_1p) h_sig_1p.SetLineStyle(self.style_sig_1p) h_sig_1p.SetLineWidth(line_width) h_sig_mp.SetLineColor(self.color_sig_mp) h_sig_mp.SetLineStyle(self.style_sig_mp) h_sig_mp.SetLineWidth(line_width) h_sig_1p.Draw("SAME,HIST") h_sig_mp.Draw("SAME,HIST") leg.AddEntry(h_sig_1p,'Tau (1p)','L') leg.AddEntry(h_sig_mp,'Tau (mp)','L') if self.draw_true_hists: self.orig_templates['sig_1p'].Draw("SAME,HIST") if self.draw_true_hists: self.orig_templates['sig_mp'].Draw("SAME,HIST") else: h_sig = self.fit_hists['sig'] h_sig.SetLineColor(self.color_sig) h_sig.SetLineStyle(self.style_sig) h_sig.SetLineWidth(line_width) h_sig.Draw("SAME,HIST") leg.AddEntry(h_sig,'Tau','L') if self.draw_true_hists: self.orig_templates['sig'].Draw("SAME,HIST") leg.Draw() latex = ROOT.TLatex() latex.SetNDC() latex.SetTextFont(42) latex.DrawLatex(0.4,0.85,self.tag) latex.DrawLatex(0.4,0.75,'#chi^{2}/NDF = %.1f / %d' % (self.chi2,self.ndf) ) c.Update() c.SaveAs('fit_%s.eps'%self.tag) if self.fout: utils.save_object(c,self.fout)
def run(args): # Set default arguments args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni args.g_iters = args.iters if args.g_iters is None else args.g_iters # -if [log_per_task], reset all logs if args.log_per_task: args.prec_log = args.iters args.loss_log = args.iters args.sample_log = args.iters # -if XdG is selected but not the incremental task learning scenario, give error if (not args.scenario == "task") and args.gating_prop > 0: raise ValueError( "'XdG' only works for the incremental task learning scenario.") # -if EWC, SI or XdG is selected together with 'feedback', give error if args.feedback and (args.ewc or args.si or args.gating_prop > 0): raise NotImplementedError( "EWC, SI and XdG are not supported with feedback connections.") # -if XdG is selected together with replay of any kind, give error if args.gating_prop > 0 and (not args.replay == "none"): raise NotImplementedError( "XdG is not supported with '{}' replay.".format(args.replay)) # -create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) # Use cuda? cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") # Set random seeds np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) #-------------------------------------------------------------------------------------------------# #----------------# #----- DATA -----# #----------------# # Prepare data for chosen experiment (train_datasets, test_datasets), config, classes_per_task = get_multitask_experiment( name=args.experiment, scenario=args.scenario, tasks=args.tasks, data_dir=args.d_dir, verbose=True, exception=True if args.seed == 0 else False, ) #-------------------------------------------------------------------------------------------------# #------------------------------# #----- MODEL (CLASSIFIER) -----# #------------------------------# # Define main model (i.e., classifier, if requested with feedback connections) if args.feedback: model = AutoEncoder( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, z_dim=args.z_dim, fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) model.lamda_pl = 1. #--> to make that this VAE is also trained to classify else: model = Classifier( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, fc_drop=args.fc_drop, fc_nl=args.fc_nl, fc_bn=True if args.fc_bn == "yes" else False, excit_buffer=True if args.gating_prop > 0 else False, ).to(device) # Define optimizer (only include parameters that "requires_grad") model.optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }] model.optim_type = args.optimizer if model.optim_type in ("adam", "adam_reset"): model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) elif model.optim_type == "sgd": model.optimizer = optim.SGD(model.optim_list) else: raise ValueError( "Unrecognized optimizer, '{}' is not currently a valid option". format(args.optimizer)) # Set loss-function for reconstruction if args.feedback: model.recon_criterion = nn.BCELoss(size_average=True) #-------------------------------------------------------------------------------------------------# #-----------------------------------# #----- CL-STRATEGY: ALLOCATION -----# #-----------------------------------# # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 model.fisher_n = args.fisher_n model.gamma = args.gamma model.online = args.online model.emp_FI = args.emp_fi # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner): model.si_c = args.si_c if args.si else 0 model.epsilon = args.epsilon # XdG: create for every task a "mask" for each hidden fully connected layer if isinstance(model, ContinualLearner) and args.gating_prop > 0: mask_dict = {} excit_buffer_list = [] for task_id in range(args.tasks): mask_dict[task_id + 1] = {} for i in range(model.fcE.layers): layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear if task_id == 0: excit_buffer_list.append(layer.excit_buffer) n_units = len(layer.excit_buffer) gated_units = np.random.choice(n_units, size=int(args.gating_prop * n_units), replace=False) mask_dict[task_id + 1][i] = gated_units model.mask_dict = mask_dict model.excit_buffer_list = excit_buffer_list #-------------------------------------------------------------------------------------------------# #-------------------------------# #----- CL-STRATEGY: REPLAY -----# #-------------------------------# # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature) model.replay_targets = "soft" if args.distill else "hard" model.KD_temp = args.temp # If needed, specify separate model for the generator train_gen = True if (args.replay == "generative" and not args.feedback) else False if train_gen: # -specify architecture generator = AutoEncoder( image_size=config['size'], image_channels=config['channels'], fc_layers=args.g_fc_lay, fc_units=args.g_fc_uni, z_dim=args.z_dim, classes=config['classes'], fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) # -set optimizer(s) generator.optim_list = [{ 'params': filter(lambda p: p.requires_grad, generator.parameters()), 'lr': args.lr }] generator.optim_type = args.optimizer if generator.optim_type in ("adam", "adam_reset"): generator.optimizer = optim.Adam(generator.optim_list, betas=(0.9, 0.999)) elif generator.optim_type == "sgd": generator.optimizer = optim.SGD(generator.optim_list) # -set reconstruction criterion generator.recon_criterion = nn.BCELoss(size_average=True) else: generator = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- REPORTING -----# #---------------------# # Get parameter-stamp (and print on screen) param_stamp = utils.get_param_stamp( args, model.name, verbose=True, replay=True if (not args.replay == "none") else False, replay_model_name=generator.name if (args.replay == "generative" and not args.feedback) else None, ) # Print some model-characteristics on the screen # -main model print("\n") utils.print_model_info(model, title="MAIN MODEL") # -generator if generator is not None: utils.print_model_info(generator, title="GENERATOR") # Prepare for plotting # -open pdf pp = visual_plt.open_pdf("{}/{}.pdf".format( args.p_dir, param_stamp)) if args.pdf else None # -define [precision_dict] to keep track of performance during training for later plotting precision_dict = evaluate.initiate_precision_dict(args.tasks) # -visdom-settings if args.visdom: env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment, tasks=args.tasks, scenario=args.scenario) graph_name = "{fb}{mode}{syn}{ewc}{XdG}".format( fb="1M-" if args.feedback else "", mode=args.replay, syn="-si{}".format(args.si_c) if args.si else "", ewc="-ewc{}{}".format( args.ewc_lambda, "-O{}".format(args.gamma) if args.online else "") if args.ewc else "", XdG="" if args.gating_prop == 0 else "-XdG{}".format(args.gating_prop)) visdom = {'env': env_name, 'graph': graph_name} else: visdom = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- CALLBACKS -----# #---------------------# # Callbacks for reporting on and visualizing loss generator_loss_cbs = [ cb._VAE_loss_cb(log=args.loss_log, visdom=visdom, model=model if args.feedback else generator, tasks=args.tasks, iters_per_task=args.g_iters, replay=False if args.replay == "none" else True) ] if (train_gen or args.feedback) else [None] solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, visdom=visdom, model=model, tasks=args.tasks, iters_per_task=args.iters, replay=False if args.replay == "none" else True) ] if (not args.feedback) else [None] # Callbacks for evaluating and plotting generated / reconstructed samples sample_cbs = [ cb._sample_cb(log=args.sample_log, visdom=visdom, config=config, test_datasets=test_datasets, sample_size=args.sample_n, iters_per_task=args.g_iters) ] if (train_gen or args.feedback) else [None] # Callbacks for reporting and visualizing accuracy # -visdom (i.e., after each [prec_log]) eval_cb = cb._eval_cb( log=args.prec_log, test_datasets=test_datasets, visdom=visdom, iters_per_task=args.iters, scenario=args.scenario, collate_fn=utils.label_squeezing_collate_fn, test_size=args.prec_n, classes_per_task=classes_per_task, task_mask=True if isinstance(model, ContinualLearner) and (args.gating_prop > 0) else False) # -pdf: for summary plots (i.e, only after each task) eval_cb_full = cb._eval_cb( log=args.iters, test_datasets=test_datasets, precision_dict=precision_dict, scenario=args.scenario, collate_fn=utils.label_squeezing_collate_fn, iters_per_task=args.iters, classes_per_task=classes_per_task, task_mask=True if isinstance(model, ContinualLearner) and (args.gating_prop > 0) else False) # -collect them in <lists> eval_cbs = [eval_cb, eval_cb_full] #-------------------------------------------------------------------------------------------------# #--------------------# #----- TRAINING -----# #--------------------# print("--> Training:") # Keep track of training-time start = time.time() # Train model train_cl( model, train_datasets, replay_mode=args.replay, scenario=args.scenario, classes_per_task=classes_per_task, iters=args.iters, batch_size=args.batch, collate_fn=utils.label_squeezing_collate_fn, visualize=True if args.visdom else False, generator=generator, gen_iters=args.g_iters, gen_loss_cbs=generator_loss_cbs, sample_cbs=sample_cbs, eval_cbs=eval_cbs, loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs, ) # Get total training-time in seconds, and write to file training_time = time.time() - start time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w') time_file.write('{}\n'.format(training_time)) time_file.close() #-------------------------------------------------------------------------------------------------# #----------------------# #----- EVALUATION -----# #----------------------# print('\n\n--> Evaluation ("incremental {} learning scenario"):'.format( args.scenario)) # Generation (plot in pdf) if (pp is not None) and train_gen: evaluate.show_samples(generator, config, size=args.sample_n, pdf=pp) if (pp is not None) and args.feedback: evaluate.show_samples(model, config, size=args.sample_n, pdf=pp) # Reconstruction (plot in pdf) if (pp is not None) and (train_gen or args.feedback): for i in range(args.tasks): if args.feedback: evaluate.show_reconstruction(model, test_datasets[i], config, pdf=pp, task=i + 1) else: evaluate.show_reconstruction(generator, test_datasets[i], config, pdf=pp, task=i + 1) # Classifier (print on screen & write to file) if args.scenario == "task": precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task_mask=True if isinstance(model, ContinualLearner) and args.gating_prop > 0 else False, task=i + 1, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1)))) for i in range(args.tasks) ] else: precs = [ evaluate.validate(model, test_datasets[i], verbose=False, test_size=None, task=i + 1) for i in range(args.tasks) ] print("\n Precision on test-set:") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / args.tasks print('=> average precision over all {} tasks: {:.4f}\n'.format( args.tasks, average_precs)) #-------------------------------------------------------------------------------------------------# #------------------# #----- OUTPUT -----# #------------------# # Average precision on full test set (no restrictions on which nodes can be predicted: "incremental" / "singlehead") output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(average_precs)) output_file.close() # Precision-dictionary file_name = "{}/dict-{}".format(args.r_dir, param_stamp) utils.save_object(precision_dict, file_name) #-------------------------------------------------------------------------------------------------# #--------------------# #----- PLOTTING -----# #--------------------# # If requested, generate pdf if pp is not None: # -create list to store all figures to be plotted. figure_list = [] # -generate all figures (and store them in [figure_list]) figure = visual_plt.plot_lines( precision_dict["all_tasks"], x_axes=precision_dict["x_task"], line_names=['task {}'.format(i + 1) for i in range(args.tasks)]) figure_list.append(figure) figure = visual_plt.plot_lines([precision_dict["average"]], x_axes=precision_dict["x_task"], line_names=['average all tasks so far']) figure_list.append(figure) # -add figures to pdf (and close this pdf). for figure in figure_list: pp.savefig(figure) # Close pdf if pp is not None: pp.close()
def main(): sets = generate_data_sets() utils.save_object(sets, 'data_sets')
def sean_solution(input_info, **kwargs): """ This solution is designed to be performed as follows: 1. Setup anything, such as arrays or sorting. Also break down info into components. 2. objective can access anything from the main body part. As such, pass anything extra which may be needed (such as hyper-params) into objective as args (a dict) perform the actual solving logic part here. 3. At the end, can search over hyper-paramters (the args) That are passed into objective. However, if doing a simple function, this part can be safely ignored. """ # TODO main body part here - especially setup D, I, S, V, F, street_info, car_info = input_info s_dict = {} for s in street_info: s_dict[s[2]] = Street(*s) # Find busy streets for c in car_info: paths = c[1:] for p in paths: s_dict[p].cars_using += 1 for s in s_dict.values(): s.calc_score() # print(s) inter_list = [] for i in range(I): intersection = Intersection(i) intersection.find_streets_at_intersection(street_info, D) intersection.weight_streets(s_dict) intersection.find_street_times() inter_list.append(intersection) # for I in inter_list: # print(I) def objective(args): """Actually write the solution part here.""" # TODO Parse out the args if needed val = args.get("name", None) # TODO Solve the thing here solution = [] for I in inter_list: num_lights = len(I.best_times) street_names = [s[2] for s in I.streets] line_to_append = [I.id, num_lights] idxs = [i for i in range(len(street_names))] sorted_idx = [x for x, _ in sorted(zip(idxs, I.best_times), key=lambda pair: pair[1], reverse=True)] for i in sorted_idx: name = street_names[i] time_val = I.best_times[i] line_to_append.append((name, time_val)) solution.append(line_to_append) score = 0 # Return something flexible that can be used with hyperopt # Main point is that it has score and solution. return { "loss": -score, "score": score, "solution": solution, "eval_time": time.time(), "status": STATUS_OK, } # Ignore this bit if not searching hyper_parameters! if kwargs.get("search", True): trials = Trials() # TODO Setup what values the args searching over can have space = hp.choice( "args", [{"arg1": hp.lognormal("arg1", 1, 0.5), "arg2": hp.uniform("arg2", 1, 10)}], ) # TODO If you know the best you do, pass loss_threshold=-best # Do hyper-param searching - possible pass per filename num_evals best = fmin( objective, space=space, algo=tpe.suggest, max_evals=kwargs.get("num_evals", 10), trials=trials, ) # Get the best hyper-params from fmin print("Best hyper-parameters found were:", best) args = space_eval(space, best) # Save the trials to disk # These trials can be printed using print_trial_info in utils out_name = os.path.join( kwargs["output_dir"], "_" + kwargs["input_name"][:-2] + "pkl" ) save_object(trials, out_name) else: # By default, this is just an empty dictionary. args = kwargs.get("objective_args") result = objective(args) return result["solution"], result["score"]
def cluster_images(in_path,out_path): images=utils.read_array(in_path) clusters=clustering_mini_batch(images) utils.save_object(out_path,clusters)