def convert_matlab_pca_data(args, direction_matlab_name, direction_python_name): # class ARGS: # dataset='cifar10' # model='resnet56' # model_folder='folders for models to be projected' # dir_type='weights' # ignore='biasbn' # prefix='model_' # suffix='.t7' # start_epoch=0 # max_epoch=500 # save_epoch=1 # args = ARGS() # args.model_folder = model_folder # args.model = model last_model_file = args.model_folder + '/' + args.prefix + str( args.max_epoch) + args.suffix net = model_loader.load(args.dataset, args.model, last_model_file) w = net_plotter.get_weights(net) # read in matlab pca results f = h5py.File(direction_matlab_name, 'r') fpy = h5py.File(direction_python_name, 'w') fpy['explained_variance_ratio_'] = np.array(f['explained_variance_ratio_']) fpy['explained_variance_'] = np.array(f['explained_variance_']) pc1 = np.array(f['directionx']) pc2 = np.array(f['directiony']) f.close() # convert vectorized directions to the same shape as models to save in h5 file. # import pdb; pdb.set_trace() if args.dir_type == 'weights': xdirection = npvec_to_tensorlist(pc1, w) ydirection = npvec_to_tensorlist(pc2, w) elif args.dir_type == 'states': xdirection = npvec_to_tensorlist(pc1, s) ydirection = npvec_to_tensorlist(pc2, s) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) # import pdb; pdb.set_trace() h5_util.write_list(fpy, 'xdirection', xdirection) h5_util.write_list(fpy, 'ydirection', ydirection) fpy.close() print('PCA directions saved in: %s' % direction_python_name)
def setup_PCA_directions(args, model_files): """ Find PCA directions for the optimization path from the initial model to the final trained model. Returns: dir_name: the h5 file that stores the directions. """ # Name the .h5 file that stores the PCA directions. folder_name = args.model_folder + '/PCA_' + args.dir_type if args.ignore: folder_name += '_ignore=' + args.ignore folder_name += '_save_epoch=' + str(args.save_epoch) os.system('mkdir ' + folder_name) dir_name = folder_name + '/directions.h5' # skip if the direction file exists if os.path.exists(dir_name): f = h5py.File(dir_name, 'a') if 'explained_variance_' in f.keys(): f.close() return dir_name # load models and prepare the optimization path matrix matrix = [] for model_file in model_files: print(model_file) net2 = model_loader.load(args.dataset, args.model, model_file, args.data_parallel) if args.dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif args.dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(d) d = weights_to_vec(d) matrix.append(d.numpy()) # Perform PCA on the optimization path matrix print("Perform PCA on the models") pca = PCA(n_components=2) pca.fit(np.array(matrix)) pc1 = np.array(pca.components_[0]) pc2 = np.array(pca.components_[1]) print("angle between pc1 and pc2: " + str(cal_angle(pc1, pc2))) print(pca.explained_variance_ratio_) # convert vectorized directions to the same shape as models to save in h5 file. if args.dir_type == 'weights': xdirection = vec_to_weights(pc1, w) ydirection = vec_to_weights(pc2, w) elif args.dir_type == 'states': xdirection = vec_to_states(pc1, s) ydirection = vec_to_states(pc2, s) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) f = h5py.File(dir_name, 'w') h5_util.write_list(f, 'xdirection', xdirection) h5_util.write_list(f, 'ydirection', ydirection) f['explained_variance_ratio_'] = pca.explained_variance_ratio_ f['singular_values_'] = pca.singular_values_ f['explained_variance_'] = pca.explained_variance_ f.close() print('PCA directions saved in ' + dir_name) return dir_name
def setup_PCA_directions(args, model_files, w, s): """ Find PCA directions for the optimization path from the initial model to the final trained model. Returns: dir_name: the h5 file that stores the directions. """ # Name the .h5 file that stores the PCA directions. folder_name = args.model_folder + '/PCA_' + args.dir_type if args.ignore: folder_name += '_ignore=' + args.ignore folder_name += '_save_epoch=' + str(args.save_epoch) os.system('mkdir ' + folder_name) dir_name = folder_name + '/directions.h5' # skip if the direction file exists if os.path.exists(dir_name): f = h5py.File(dir_name, 'a') if 'explained_variance_' in f.keys(): f.close() return dir_name # load models and prepare the optimization path matrix matrix = [] for model_file in model_files: print (model_file) net2 = model_loader.load(args.dataset, args.model, model_file) if args.dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif args.dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(d) d = tensorlist_to_tensor(d) matrix.append(d.numpy()) # Perform PCA on the optimization path matrix print ("Perform PCA on the models") pca = PCA(n_components=2) pca.fit(np.array(matrix)) pc1 = np.array(pca.components_[0]) pc2 = np.array(pca.components_[1]) print("angle between pc1 and pc2: %f" % cal_angle(pc1, pc2)) print("pca.explained_variance_ratio_: %s" % str(pca.explained_variance_ratio_)) # convert vectorized directions to the same shape as models to save in h5 file. if args.dir_type == 'weights': xdirection = npvec_to_tensorlist(pc1, w) ydirection = npvec_to_tensorlist(pc2, w) elif args.dir_type == 'states': xdirection = npvec_to_tensorlist(pc1, s) ydirection = npvec_to_tensorlist(pc2, s) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) f = h5py.File(dir_name, 'w') h5_util.write_list(f, 'xdirection', xdirection) h5_util.write_list(f, 'ydirection', ydirection) f['explained_variance_ratio_'] = pca.explained_variance_ratio_ f['singular_values_'] = pca.singular_values_ f['explained_variance_'] = pca.explained_variance_ f.close() print ('PCA directions saved in: %s' % dir_name) return dir_name
def setup_othermodels_PCA_directions(args, ignore_embedding, model_files, w): """ Find PCA directions for the optimization path from the initial model to the final trained model. Returns: dir_name: the h5 file that stores the directions. """ # Name the .h5 file that stores the PCA directions. # Name the .h5 file that stores the PCA directions. folder_name = 'fairseq_master' + '/' + args.save_dir + '/PCA_' folder_name += 'lr=' + str(args.lr[0]) folder_name += '_optimier=' + str(args.optimizer) folder_name += '_ignore_embedding=' + str(ignore_embedding) if args.ignore: folder_name += '_ignoreBN' os.system('mkdir ' + folder_name) dir_name = folder_name + '/directions.h5' # skip if the direction file exists if os.path.exists(dir_name): f = h5py.File(dir_name, 'a') if 'explained_variance_' in f.keys(): f.close() return dir_name # load models and prepare the optimization path matrix matrix = [] for model_file in model_files: print(model_file) state = torch.load( model_file, map_location=lambda s, l: torch.serialization. default_restore_location(s, 'cpu'), ) args2 = state['args'] args2.data = 'fairseq_master/' + args2.data # task = tasks.setup_task(args2) # model2 = task.build_model(args2) # s2 = model2.state_dict() s2 = state['model'] w2 = [] if ignore_embedding: for key in s2: if 'version' in key or '_float_tensor' in key or 'embed' in key: s2[key].fill_(0) w2.append(s2[key]) else: for key in s2: if 'version' in key or '_float_tensor' in key: s2[key].fill_(0) w2.append(s2[key]) d = net_plotter.get_diff_weights(w, w2) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(d) d = tensorlist_to_tensor(d) matrix.append(d.numpy()) # Perform PCA on the optimization path matrix print("Perform PCA on the models") pca = PCA(n_components=2) pca.fit(np.array(matrix)) pc1 = np.array(pca.components_[0]) pc2 = np.array(pca.components_[1]) print("angle between pc1 and pc2: %f" % cal_angle(pc1, pc2)) print("pca.explained_variance_ratio_: %s" % str(pca.explained_variance_ratio_)) xdirection = npvec_to_tensorlist(pc1, w) ydirection = npvec_to_tensorlist(pc2, w) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) f = h5py.File(dir_name, 'w') h5_util.write_list(f, 'xdirection', xdirection) h5_util.write_list(f, 'ydirection', ydirection) f['explained_variance_ratio_'] = pca.explained_variance_ratio_ f['singular_values_'] = pca.singular_values_ f['explained_variance_'] = pca.explained_variance_ f.close() print('PCA directions saved in: %s' % dir_name) return dir_name