def run_with_dogbreed_dataset(generator): """ Runs the network on the dogbreed data set. :param generator: A lambda which takes a a number of classes and a learning rate and returns a CNN instance. """ # load train, test, and validation datasets and merge train+valid since this nn does not use validation train_files, train_targets = load_dataset('CodeData/dogImages/train', onehot=False) valid_files, valid_targets = load_dataset('CodeData/dogImages/valid', onehot=False) test_files, y_test = load_dataset('CodeData/dogImages/test', onehot=False) train_files = np.concatenate((train_files, valid_files), axis=0) y_train = np.concatenate((train_targets, valid_targets), axis=0) # shuffle the training data train_files, y_train = unison_shuffle(train_files, y_train) # load the images and pre-process the data for the network by normalizing it # zscore X_train = paths_to_tensor(train_files, data_format='channels_first').astype('float64') / 255 X_test = paths_to_tensor(test_files, data_format='channels_first').astype('float64') / 255 n_classes = np.unique(y_train).size # create the network xy = unique_labels(y_train) nn = ConvRslvqNetwork(n_classes, batch_size=256, learning_rate=0.001) # Train neural network nn.fit(X_train, y_train) # Evaluate on test data error = nn.evaluate(X_test, y_test) print('Test error rate: %.4f' % error)
def run_evaluation(args, query_embs, gallery_embs): # Load the query and gallery data from the CSV files. query_pids, query_fids = common.load_dataset(args.query_dataset, args.image_root, False) gallery_pids, gallery_fids = common.load_dataset(args.gallery_dataset, args.image_root, False) [mAP, rank1] = evaluate_embs(args, query_pids, query_fids, query_embs, gallery_pids, gallery_fids, gallery_embs) return [mAP, rank1]
def __init__(self, path="CodeData", big_features=True, filter='flatten', pca_dims=137, shuffle=False): """ Initializes the class (and directly starts loading data) :param path: The path to the data set. Should log like this: - path |- dogImages ||- test |||- dog01 ||||- 001.jpg ||||- ... ||- train ||| ... ||- valid ||| ... :param big_features: Whether to use big feature inputs (mean on axis 1,2 instead of 2,3) :param filter: The function to use for reducing the data dimensions to two. Can be 'min', 'max', 'mean', 'pca' or 'flatten'. :param pca_dims: The dimensions to reduce to when using a PCA filter. :param shuffle: Whether to shuffle the input data. """ self.mean_axis = (1, 2) if big_features else (2, 3) self.filter = DimensionReduction(filter, self.mean_axis, pca_dims) self.bottleneck_features = np.load(path + '/DogXceptionData.npz') self.train_set = self.filter.refine(self.bottleneck_features['train']) self.valid_set = self.filter.refine(self.bottleneck_features['valid']) self.test_set = self.filter.refine(self.bottleneck_features['test']) self.train_files, self.train_targets_indexed = load_dataset(path + '/dogImages/train', onehot=False) self.valid_files, self.valid_targets_indexed = load_dataset(path + '/dogImages/valid', onehot=False) self.test_files, self.test_targets_indexed = load_dataset(path + '/dogImages/test', onehot=False) # this part possibly shuffles, but always merges train and valid. if shuffle: combined_set = np.concatenate((self.train_set, self.valid_set, self.test_set), axis=0) combined_targets = np.concatenate((self.train_targets_indexed, self.valid_targets_indexed, self.test_targets_indexed), axis=0) combined_set_shuffled, combined_targets_shuffled = combined_shuffle(combined_set, combined_targets) self.train_set = combined_set_shuffled[self.test_set.shape[0]:] self.train_targets_indexed = combined_targets_shuffled[self.test_set.shape[0]:] self.test_set = combined_set_shuffled[:self.test_set.shape[0]:] self.test_targets_indexed = combined_targets_shuffled[:self.test_set.shape[0]] else: self.train_set = np.concatenate((self.train_set, self.valid_set), axis=0) self.train_targets_indexed = np.concatenate((self.train_targets_indexed, self.valid_targets_indexed), axis=0) self.tests = Queue(1) self.results = Queue(1) self.lock = threading.Lock() print("Prepared Tester: filter=%s big_features=%s pca_dims=%d" % (filter, str(big_features), pca_dims))
def main(argv): # Verify that parameters are set correctly. args = parser.parse_args(argv) gallery_pids, gallery_fids = common.load_dataset(args.gallery_dataset, None) log_file = os.path.join(exp_root, "recall_eval") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('recall_eval') with h5py.File(args.gallery_embeddings, 'r') as f_gallery: gallery_embs = np.array(f_gallery['emb']) #gallery_embs_var = np.array(f_gallery['emb_var']) #print('gallery_embs_var.shape =>',gallery_embs_var.shape) num_clusters = len(np.unique(gallery_pids)) print('Start clustering K ={}'.format(num_clusters)) log.info(exp_root) kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(gallery_embs) log.info('NMI :: {}'.format( normalized_mutual_info_score(gallery_pids, kmeans.labels_))) # centroids, assignments = kmeans_cuda(gallery_embs,num_clusters,seed=3) # log.info('NMI :: {}'.format(normalized_mutual_info_score(gallery_pids, assignments))) log.info('Clustering complete') log.info('Eval with Recall-K') names, accs = evaluate_emb(gallery_embs, gallery_pids) log.info(names) log.info(accs)
def run_embedding(args, dataset): # Load the data from the CSV file. # pids - person id (array corresponding to the images) # fids - array of the paths to the images ({str_}) dataset = os.path.join(os.getcwd(), dataset) img_root = os.path.join(os.getcwd(), args.image_root) data_pids, data_fids = common.load_dataset(dataset, img_root, False) return calculate_emb_for_fids(args, data_fids)
def run_evaluation_with_args(args): # Load the query and gallery data from the CSV files. query_pids, query_fids = common.load_dataset(args.query_dataset, args.image_root, False) gallery_pids, gallery_fids = common.load_dataset(args.gallery_dataset, args.image_root, False) # Load the two datasets fully into memory. with h5py.File(os.path.join(args.experiment_root, args.query_embeddings), 'r') as f_query: query_embs = np.array(f_query['emb']) with h5py.File(os.path.join(args.experiment_root, args.gallery_embeddings), 'r') as f_gallery: gallery_embs = np.array(f_gallery['emb']) [mAP, rank1] = evaluate_embs(args, query_pids, query_fids, query_embs, gallery_pids, gallery_fids, gallery_embs) return [mAP, rank1]
def main(): t = Timer() t.reset_cpu_time() logging.info("PREPARE DATASET") train_X1, train_Y1, test_X1, test_Y1 = load_light_dataset( images_root_dir, training_set_part=0.8, extension='jpeg') train_X2, _, test_X2, _ = load_dataset(options.dataset_bow) train_X3, _, test_X3, _ = load_dataset(options.dataset_entropy) train_X4, _, test_X4, _ = load_dataset_properties(options.dataset_androdet) nlp_X = process_trainset(train_X2, test_X2, 'int') entropy_X = process_trainset(train_X3, test_X3, 'float') androdet_X = process_trainset(train_X4, test_X4, 'float') logging.info("CREATE MODEL") model = create_model(activation=network['activation'], optimizer=network['optimizer'], learning_rate=network['learning_rate'], output_size=train_Y1.shape[1], merged_layers=network['merged_layers']) t.get_cpu_time("PREPARATION") logging.info("TRAIN") try: if options.train == 'true': raise Exception('Force train model') model = models.load_model(model_name) except: fit_one_at_time(model, train_X1, train_Y1, nlp_X, entropy_X, androdet_X, epochs=network['epochs']) model.save(model_name) t.get_cpu_time("TRAIN") logging.info("TEST on TRAIN") score_one_at_time(model, train_X1, train_Y1, nlp_X, entropy_X, androdet_X) t.get_cpu_time("TEST on TRAIN") logging.info("TEST") score_one_at_time(model, test_X1, test_Y1, nlp_X, entropy_X, androdet_X) t.get_cpu_time("TEST")
def load_dataset(config_name, exp_uid): """Load a dataset from a config's name. The loaded dataset consists of: - original data (dataset, train_data, train_label), - encoded data from a pretrained model (train_mu, train_sigma), and - index grouped by label (index_grouped_by_label). - path of saving (save_path) for restoring pre-trained models. Args: config_name: A string indicating the name of config to parameterize the model that associates with the dataset. exp_uid: A string representing the unique id of experiment to be used in model that associates with the dataset. Returns: A DatasetBlob of abovementioned components in the dataset. """ config = common.load_config(config_name) this_config_is_wavegan = common.config_is_wavegan(config) if this_config_is_wavegan: return load_dataset_wavegan() model_uid = common.get_model_uid(config_name, exp_uid) dataset = common.load_dataset(config) train_data = dataset.train_data attr_train = dataset.attr_train save_path = dataset.save_path path_train = join(dataset.basepath, 'encoded', model_uid, 'encoded_train_data.npz') train = np.load(path_train) train_mu = train['mu'] train_sigma = train['sigma'] train_label = np.argmax(attr_train, axis=-1) # from one-hot to label index_grouped_by_label = common.get_index_grouped_by_label(train_label) tf.logging.info('index_grouped_by_label size: %s', [len(_) for _ in index_grouped_by_label]) tf.logging.info('train loaded from %s', path_train) tf.logging.info('train shapes: mu = %s, sigma = %s', train_mu.shape, train_sigma.shape) return DatasetBlob( train_data=train_data, train_label=train_label, train_mu=train_mu, train_sigma=train_sigma, index_grouped_by_label=index_grouped_by_label, save_path=save_path, )
def main(): # Verify that parameters are set correctly. args = parser.parse_args([]) # Possibly auto-generate the output filename. if args.filename is None: basename = os.path.basename(args.dataset) args.filename = os.path.splitext(basename)[0] + '_embeddings.h5' args.filename = os.path.join(args.experiment_root, args.filename) _, data_fids = common.load_dataset(args.dataset, args.image_root) net_input_size = (args.net_input_height, args.net_input_width) # pre_crop_size = (args.pre_crop_height, args.pre_crop_width) # Setup a tf Dataset containing all images. dataset = tf.data.Dataset.from_tensor_slices(data_fids) # Convert filenames to actual image tensors. dataset = dataset.map( lambda fid: common.fid_to_image(fid, tf.constant('dummy'), image_root=args.image_root, image_size=net_input_size), num_parallel_calls=args.loading_threads) dataset = dataset.batch(args.batch_size) # Overlap producing and consuming. dataset = dataset.prefetch(1) model = Trinet(args.embedding_dim) # lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(args.learning_rate,args.train_iterations - args.decay_start_iteration, 0.001) optimizer = tf.keras.optimizers.Adam() ckpt = tf.train.Checkpoint(step=tf.Variable(0), optimizer=optimizer, net=model) manager = tf.train.CheckpointManager(ckpt, args.experiment_root, max_to_keep=10) ckpt.restore(manager.latest_checkpoint) with h5py.File(args.filename, 'w') as f_out: emb_storage = np.zeros((len(data_fids), args.embedding_dim), np.float32) start_idx = 0 for images, fids, pids in dataset: emb = model(images, training=False) emb_storage[start_idx:start_idx + len(emb)] = emb start_idx += args.batch_size emb_dataset = f_out.create_dataset('emb', data=emb_storage)
def load_embed_files(self): print("Load: ", self.query_dataset) self.query_pids, self.query_fids, self.query_views = common.load_dataset(self.query_dataset, None) print("Load: ", self.gallery_dataset) self.gallery_pids, self.gallery_fids, self.gallery_views = common.load_dataset(self.gallery_dataset, None) self.gallery_views = self.gallery_views.astype(int) self.query_views = self.query_views.astype(int) print("Load: ", self.query_embeddings) with h5py.File(self.query_embeddings, 'r') as f_query: self.query_embs = np.array(f_query['emb']) print("Load: ", self.gallery_embeddings) with h5py.File(self.gallery_embeddings, 'r') as f_gallery: self.gallery_embs = np.array(f_gallery['emb']) query_dim = self.query_embs.shape[1] gallery_dim = self.gallery_embs.shape[1] if query_dim != gallery_dim: raise ValueError('Shape mismatch between query ({}) and gallery ({}) ' 'dimension'.format(query_dim, gallery_dim)) print("==========================")
def main(): t = Timer() t.reset_cpu_time() # Create traing and test set train_X, train_Y, test_X, test_Y = load_dataset(dataset, target=target) #dataset beging with the file name train_X = train_X[:, 1:] test_X = test_X[:, 1:] train_size = train_X.shape[0] test_size = test_X.shape[0] input_size = train_X.shape[1] output_size = train_Y.shape[1] try: if options.train == 'true': raise Exception('Force train model') model = models.load_model(model_name) except: # create the model model = create_model(input_size=input_size, output_size=output_size, n_layers=network['n_layers'], n_neurons=network['n_neurons'], activation_function=network['activation'], learning_rate=network['learning_rate'], dropout_rate=network['dropout_rate'], optimizer=network['optimizer']) t.get_cpu_time("PREPARATION") # train the model results = model.fit( x=train_X, y=train_Y, epochs=network['epochs'], #validation_data= (test_X, test_Y) ) model.save(model_name) logging.info("TEST on TRAIN") t.reset_cpu_time() test(model, np.asarray(train_X).astype(np.float32), train_Y) t.get_cpu_time("TEST on TRAIN") logging.info("TEST") test(model, np.asarray(test_X).astype(np.float32), test_Y) t.get_cpu_time("TEST")
def testElementSerialize(self): """Element.serialize()""" a = Element(id='') from Tuke.geometry import Circle,Hole,Line from Tuke.pcb import Pin,Pad # a.add(Element(Id('asdf'))) # a.add(Circle(1,'foo',id=rndId())) # a.add(Line((0.1,-0.1),(2,3),0.05,'foo',id=rndId())) # a.add(Hole(3,id=rndId())) # a.add(Pin(1,0.1,0.1,1,id=rndId())) # a.add(Pin(1,0.1,0.1,1,square=True,id=rndId())) # a.add(Pad((0,0),(0,1),0.5,0.1,0.6,id=rndId())) # a.subs[0].add(Element()) from Tuke.geda import Footprint common.load_dataset('geda_footprints') f1 = Footprint(file=common.tmpd + '/plcc4-rgb-led') f2 = Footprint(file=common.tmpd + '/supercap_20mm') print f1.serialize(sys.stdout,full=True) print f2.serialize(sys.stdout,full=True)
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--folder_inputs') parser.add_argument('--folder_outputs') parser.add_argument('--name') parser.add_argument('--dependence', type=int) parser.add_argument('--only_object', type=int) parser.add_argument('--num_objects_all', type=int, nargs='+') parser.add_argument('--color_divs', type=int, default=2) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() # Load previous dataset name = '{}_{}'.format(args.name, '_'.join([str(n) for n in args.num_objects_all])) images, labels_ami, labels_mse = load_dataset(args.folder_inputs, name) images_new = { key: np.empty((val.shape[0], 3, *val.shape[2:]), dtype=val.dtype) for key, val in images.items() } labels_mse_new = { key: np.empty((*val.shape[:2], 3, *val.shape[3:]), dtype=val.dtype) for key, val in labels_mse.items() } # Create new dataset colors = [ convert_color(idx, args.color_divs) for idx in range(pow(args.color_divs, 3)) ] colors_compatible = [ compute_colors_compatible(color_ref, colors) for color_ref in colors ] np.random.seed(args.seed) for key in labels_mse_new: for idx in range(labels_mse_new[key].shape[0]): if args.dependence: images_new[key][idx], labels_mse_new[key][idx] = add_color_dep( images[key][idx], labels_mse[key][idx], colors, colors_compatible, args.only_object) else: images_new[key][idx], labels_mse_new[key][idx] = add_color_ind( images[key][idx], labels_mse[key][idx], colors, colors_compatible, args.only_object) create_dataset(os.path.join(args.folder_outputs, name), images_new, labels_ami, labels_mse_new)
def main(dataset_path: str): dataset: COCO = load_dataset(dataset_path) image_dir = os.path.join(dataset_path, "images") imgs = random.choices(dataset.loadImgs(dataset.imgs.keys()), k=50) for img in imgs: print(img["file_name"]) file_name = os.path.basename(img["file_name"]) file_path = os.path.join(image_dir, file_name) im = Image.open(file_path) fig, ax = plt.subplots(1, figsize=(12, 8)) ax.imshow(im) corresponding_anns = dataset.imgToAnns[img["id"]] print("Corresponding annotations =", corresponding_anns) for annotation in corresponding_anns: readable_name = dataset.loadCats( ids=annotation["category_id"])[0]["name"] # annotation["category_id"] = readable_name print(annotation["category_id"], "=", readable_name) add_annotation(ax, annotation) fig.show() plt.waitforbuttonpress() plt.close(fig)
def main(): args = parser.parse_args() # Data augmentation global seq_geo global seq_img seq_geo = iaa.SomeOf( (0, 5), [ iaa.Fliplr(0.5), # horizontally flip 50% of the images iaa.PerspectiveTransform(scale=(0, 0.075)), iaa.Affine( scale={ "x": (0.8, 1.0), "y": (0.8, 1.0) }, rotate=(-5, 5), translate_percent={ "x": (-0.1, 0.1), "y": (-0.1, 0.1) }, ), # rotate by -45 to +45 degrees), iaa.Crop(pc=( 0, 0.125 )), # crop images from each side by 0 to 12.5% (randomly chosen) iaa.CoarsePepper(p=0.01, size_percent=0.1) ], random_order=False) # Content transformation seq_img = iaa.SomeOf( (0, 3), [ iaa.GaussianBlur( sigma=(0, 1.0)), # blur images with a sigma of 0 to 2.0 iaa.ContrastNormalization(alpha=(0.9, 1.1)), iaa.Grayscale(alpha=(0, 0.2)), iaa.Multiply((0.9, 1.1)) ]) # We store all arguments in a json file. This has two advantages: # 1. We can always get back and see what exactly that experiment was # 2. We can resume an experiment as-is without needing to remember all flags. args_file = os.path.join(args.experiment_root, 'args.json') if args.resume: if not os.path.isfile(args_file): raise IOError('`args.json` not found in {}'.format(args_file)) print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) args_resumed['resume'] = True # This would be overwritten. # When resuming, we not only want to populate the args object with the # values from the file, but we also want to check for some possible # conflicts between loaded and given arguments. for key, value in args.__dict__.items(): if key in args_resumed: resumed_value = args_resumed[key] if resumed_value != value: print('Warning: For the argument `{}` we are using the' ' loaded value `{}`. The provided value was `{}`' '.'.format(key, resumed_value, value)) args.__dict__[key] = resumed_value else: print('Warning: A new argument was added since the last run:' ' `{}`. Using the new value: `{}`.'.format(key, value)) else: # If the experiment directory exists already, we bail in fear. if os.path.exists(args.experiment_root): if os.listdir(args.experiment_root): print('The directory {} already exists and is not empty.' ' If you want to resume training, append --resume to' ' your call.'.format(args.experiment_root)) exit(1) else: os.makedirs(args.experiment_root) # Store the passed arguments for later resuming and grepping in a nice # and readable format. with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(args.experiment_root, "train") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('train') # Also show all parameter values at the start, for ease of reading logs. log.info('Training using the following parameters:') for key, value in sorted(vars(args).items()): log.info('{}: {}'.format(key, value)) # Check them here, so they are not required when --resume-ing. if not args.train_set: parser.print_help() log.error("You did not specify the `train_set` argument!") sys.exit(1) if not args.image_root: parser.print_help() log.error("You did not specify the required `image_root` argument!") sys.exit(1) # Load the data from the CSV file. pids, fids = common.load_dataset(args.train_set, args.image_root) max_fid_len = max(map(len, fids)) # We'll need this later for logfiles. # Load feature embeddings if args.hard_pool_size > 0: with h5py.File(args.train_embeddings, 'r') as f_train: train_embs = np.array(f_train['emb']) f_dists = scipy.spatial.distance.cdist(train_embs, train_embs) hard_ids = get_hard_id_pool(pids, f_dists, args.hard_pool_size) # Setup a tf.Dataset where one "epoch" loops over all PIDS. # PIDS are shuffled after every epoch and continue indefinitely. unique_pids = np.unique(pids) dataset = tf.data.Dataset.from_tensor_slices(unique_pids) dataset = dataset.shuffle(len(unique_pids)) # Constrain the dataset size to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. if args.hard_pool_size == 0: dataset = dataset.take( (len(unique_pids) // args.batch_p) * args.batch_p) dataset = dataset.repeat( None) # Repeat forever. Funny way of stating it. else: dataset = dataset.repeat( None) # Repeat forever. Funny way of stating it. dataset = dataset.map(lambda pid: sample_batch_ids_for_pid( pid, all_pids=pids, batch_p=args.batch_p, all_hard_pids=hard_ids)) # Unbatch the P PIDs dataset = dataset.apply(tf.contrib.data.unbatch()) # For every PID, get K images. dataset = dataset.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k)) # Ungroup/flatten the batches for easy loading of the files. dataset = dataset.apply(tf.contrib.data.unbatch()) # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset = dataset.map(lambda im, fid, pid: common.fid_to_image( fid, pid, image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. if args.augment == False: dataset = dataset.map( lambda fid, pid: common.fid_to_image(fid, pid, image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), #Ergys num_parallel_calls=args.loading_threads) if args.flip_augment: dataset = dataset.map(lambda im, fid, pid: ( tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset = dataset.map(lambda im, fid, pid: (tf.random_crop( im, net_input_size + (3, )), fid, pid)) else: dataset = dataset.map(lambda im, fid, pid: common.fid_to_image( fid, pid, image_root=args.image_root, image_size=net_input_size), num_parallel_calls=args.loading_threads) dataset = dataset.map(lambda im, fid, pid: (tf.py_func( augment_images, [im], [tf.float32]), fid, pid)) dataset = dataset.map(lambda im, fid, pid: (tf.reshape( im[0], (args.net_input_height, args.net_input_width, 3)), fid, pid)) # Group it back into PK batches. batch_size = args.batch_p * args.batch_k dataset = dataset.batch(batch_size) # Overlap producing and consuming for parallelism. dataset = dataset.prefetch(batch_size * 2) # Since we repeat the data infinitely, we only need a one-shot iterator. images, fids, pids = dataset.make_one_shot_iterator().get_next() # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) # Feed the image through the model. The returned `body_prefix` will be used # further down to load the pre-trained weights for all variables with this # prefix. endpoints, body_prefix = model.endpoints(images, is_training=True) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=True) # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. dists = loss.cdist(endpoints['emb'], endpoints['emb'], metric=args.metric) losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[ args.loss](dists, pids, args.margin, batch_precision_at_k=args.batch_k - 1) # Count the number of active entries, and compute the total batch loss. num_active = tf.reduce_sum(tf.cast(tf.greater(losses, 1e-5), tf.float32)) loss_mean = tf.reduce_mean(losses) # Some logging for tensorboard. tf.summary.histogram('loss_distribution', losses) tf.summary.scalar('loss', loss_mean) tf.summary.scalar('batch_top1', train_top1) tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec_at_k) tf.summary.scalar('active_count', num_active) tf.summary.histogram('embedding_dists', dists) tf.summary.histogram('embedding_pos_dists', pos_dists) tf.summary.histogram('embedding_neg_dists', neg_dists) tf.summary.histogram('embedding_lengths', tf.norm(endpoints['emb_raw'], axis=1)) # Create the mem-mapped arrays in which we'll log all training detail in # addition to tensorboard, because tensorboard is annoying for detailed # inspection and actually discards data in histogram summaries. if args.detailed_logs: log_embs = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'embeddings'), dtype=np.float32, shape=(args.train_iterations, batch_size, args.embedding_dim)) log_loss = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'losses'), dtype=np.float32, shape=(args.train_iterations, batch_size)) log_fids = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'fids'), dtype='S' + str(max_fid_len), shape=(args.train_iterations, batch_size)) # These are collected here before we add the optimizer, because depending # on the optimizer, it might add extra slots, which are also global # variables, with the exact same prefix. model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, body_prefix) # Define the optimizer and the learning-rate schedule. # Unfortunately, we get NaNs if we don't handle no-decay separately. global_step = tf.Variable(0, name='global_step', trainable=False) if 0 <= args.decay_start_iteration < args.train_iterations: learning_rate = tf.train.exponential_decay( args.learning_rate, tf.maximum(0, global_step - args.decay_start_iteration), args.train_iterations - args.decay_start_iteration, 0.001) else: learning_rate = args.learning_rate tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # Feel free to try others! # optimizer = tf.train.AdadeltaOptimizer(learning_rate) # Update_ops are used to update batchnorm stats. with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss_mean, global_step=global_step) # Define a saver for the complete model. checkpoint_saver = tf.train.Saver(max_to_keep=0) with tf.Session() as sess: if args.resume: # In case we're resuming, simply load the full checkpoint to init. last_checkpoint = tf.train.latest_checkpoint(args.experiment_root) log.info('Restoring from checkpoint: {}'.format(last_checkpoint)) checkpoint_saver.restore(sess, last_checkpoint) else: # But if we're starting from scratch, we may need to load some # variables from the pre-trained weights, and random init others. sess.run(tf.global_variables_initializer()) if args.initial_checkpoint is not None: saver = tf.train.Saver(model_variables) saver.restore(sess, args.initial_checkpoint) # In any case, we also store this initialization as a checkpoint, # such that we could run exactly reproduceable experiments. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=0) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.experiment_root, sess.graph) start_step = sess.run(global_step) log.info('Starting training from iteration {}.'.format(start_step)) # Finally, here comes the main-loop. This `Uninterrupt` is a handy # utility such that an iteration still finishes on Ctrl+C and we can # stop the training cleanly. with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u: for i in range(start_step, args.train_iterations): # Compute gradients, update weights, store logs! start_time = time.time() _, summary, step, b_prec_at_k, b_embs, b_loss, b_fids = \ sess.run([train_op, merged_summary, global_step, prec_at_k, endpoints['emb'], losses, fids]) elapsed_time = time.time() - start_time # Compute the iteration speed and add it to the summary. # We did observe some weird spikes that we couldn't track down. summary2 = tf.Summary() summary2.value.add(tag='secs_per_iter', simple_value=elapsed_time) summary_writer.add_summary(summary2, step) summary_writer.add_summary(summary, step) if args.detailed_logs: log_embs[i], log_loss[i], log_fids[ i] = b_embs, b_loss, b_fids # Do a huge print out of the current progress. seconds_todo = (args.train_iterations - step) * elapsed_time log.info( 'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ' 'batch-p@{}: {:.2%}, ETA: {} ({:.2f}s/it)'.format( step, float(np.min(b_loss)), float(np.mean(b_loss)), float(np.max(b_loss)), args.batch_k - 1, float(b_prec_at_k), timedelta(seconds=int(seconds_todo)), elapsed_time)) sys.stdout.flush() sys.stderr.flush() # Save a checkpoint of training every so often. if (args.checkpoint_frequency > 0 and step % args.checkpoint_frequency == 0): checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step) # Stop the main-loop at the end of the step, if requested. if u.interrupted: log.info("Interrupted on request!") break # Store one final checkpoint. This might be redundant, but it is crucial # in case intermediate storing was disabled and it saves a checkpoint # when the process was interrupted. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step)
def draw_feature(path='train_log/models/best-accuracy', scale=1, data_set='fashion_mnist'): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) checkpoint = torch.load(path, map_location=device) data_loader_train, data_loader_test, data_train, data_test = load_dataset( data_set=data_set) net = Model(n_feature) net.load_state_dict(checkpoint['model_state_dict']) exact_list = ['feature'] feature_extractor = FeatureExtractor(net, exact_list) feature_extractor.to(device) # get weight weight = checkpoint['model_state_dict']['pred.weight'].to('cpu').data weight_norm = weight / torch.norm(weight, dim=1, keepdim=True) print("weight_norm: ", torch.norm(weight, dim=1)) # get feature features = [] labels = [] for data in data_loader_train: X_train, y_train = data X_train = Variable(X_train).to(device) outputs = feature_extractor(X_train)['feature'].data features.append(outputs) labels.append(y_train) features = torch.cat(features, dim=0).to('cpu').data features_norm = features / torch.norm(features, dim=1, keepdim=True) features = features.numpy() features_norm = features_norm.numpy() labels = torch.cat(labels, dim=0).to('cpu').data.numpy() # draw features label_list = get_label_list(data_set) plt.figure(1, figsize=(20, 20)) plt.subplot(221) for i in range(10): plt.plot([0, scale * weight[i, 0]], [0, scale * weight[i, 1]], color=color_list[i]) feature = features[labels == i] plt.scatter(feature[:, 0], feature[:, 1], c=color_list[i], marker='.', label=label_list[i], s=1) plt.legend() plt.subplot(223) for i in range(10): plt.plot([0, weight_norm[i, 0]], [0, weight_norm[i, 1]], color=color_list[i]) feature = features_norm[labels == i] plt.scatter(feature[:, 0], feature[:, 1], c=color_list[i], marker='.', label=label_list[i], s=1) plt.legend() # get feature features = [] labels = [] for data in data_loader_test: X_test, y_test = data X_test = Variable(X_test).to(device) outputs = feature_extractor(X_test)['feature'].data features.append(outputs) labels.append(y_test) features = torch.cat(features, dim=0).to('cpu').data features_norm = features / torch.norm(features, dim=1, keepdim=True) features = features.numpy() features_norm = features_norm.numpy() labels = torch.cat(labels, dim=0).to('cpu').data.numpy() plt.subplot(222) for i in range(10): plt.plot([0, scale * weight[i, 0]], [0, scale * weight[i, 1]], color=color_list[i]) feature = features[labels == i] plt.scatter(feature[:, 0], feature[:, 1], c=color_list[i], marker='.', label=label_list[i], s=1) plt.legend() plt.subplot(224) for i in range(10): plt.plot([0, weight_norm[i, 0]], [0, weight_norm[i, 1]], color=color_list[i]) feature = features_norm[labels == i] plt.scatter(feature[:, 0], feature[:, 1], c=color_list[i], marker='.', label=label_list[i], s=1) plt.legend() title = os.path.basename(os.getcwd()) + '-' + os.path.basename(path) plt.suptitle(title) fname = 'train_log/feature-{}'.format(os.path.basename(path)) figname = 'train_log/{}.png'.format(fname) os.remove(figname) if os.path.exists(figname) else None plt.savefig(fname) plt.close('all')
#-*- coding: utf-8 -*- from common import load_dataset import numpy as np if __name__ == '__main__': features, labels = load_dataset('seeds.tsv') print labels print len(labels) print features.shape name = sorted(set(labels)) labels = np.array([name.index(ell) for ell in labels]) print labels # data process features -= features.mean(0) features /= features.std(0) print features # in figure4_5.py , it's using photo to describe the # classification # 不过这里面的只是在 二维的空间中进行计算 train_plot(features, labels)
def main(): args = parser.parse_args() # We store all arguments in a json file. This has two advantages: # 1. We can always get back and see what exactly that experiment was # 2. We can resume an experiment as-is without needing to remember all flags. args_file = os.path.join(args.experiment_root, 'args.json') if args.resume: if not os.path.isfile(args_file): raise IOError('`args.json` not found in {}'.format(args_file)) print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) args_resumed['resume'] = True # This would be overwritten. # When resuming, we not only want to populate the args object with the # values from the file, but we also want to check for some possible # conflicts between loaded and given arguments. for key, value in args.__dict__.items(): if key in args_resumed: resumed_value = args_resumed[key] if resumed_value != value: print('Warning: For the argument `{}` we are using the' ' loaded value `{}`. The provided value was `{}`' '.'.format(key, resumed_value, value)) comand = input('Would you like to restore it?(yes/no)') if comand == 'yes': args.__dict__[key] = resumed_value print( 'For the argument `{}` we are using the loaded value `{}`.' .format(key, args.__dict__[key])) else: print( 'For the argument `{}` we are using the provided value `{}`.' .format(key, args.__dict__[key])) else: print('Warning: A new argument was added since the last run:' ' `{}`. Using the new value: `{}`.'.format(key, value)) os.remove(args_file) with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) else: # If the experiment directory exists already, we bail in fear. if os.path.exists(args.experiment_root): if os.listdir(args.experiment_root): print('The directory {} already exists and is not empty.' ' If you want to resume training, append --resume to' ' your call.'.format(args.experiment_root)) exit(1) else: os.makedirs(args.experiment_root) # Store the passed arguments for later resuming and grepping in a nice # and readable format. with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(args.experiment_root, "train") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('train') # Also show all parameter values at the start, for ease of reading logs. log.info('Training using the following parameters:') for key, value in sorted(vars(args).items()): log.info('{}: {}'.format(key, value)) # Check them here, so they are not required when --resume-ing. if not args.train_set: parser.print_help() log.error("You did not specify the `train_set` argument!") sys.exit(1) if not args.image_root: parser.print_help() log.error("You did not specify the required `image_root` argument!") sys.exit(1) ###################################################################################### #prepare the training dataset # Load the data from the TxT file. see Common.load_dataset function for details pids_train, fids_train = common.load_dataset(args.train_set, args.image_root) max_fid_len = max(map(len, fids_train)) # We'll need this later for logfiles. # Setup a tf.Dataset where one "epoch" loops over all PIDS. # PIDS are shuffled after every epoch and continue indefinitely. unique_pids = np.unique(pids_train) dataset = tf.data.Dataset.from_tensor_slices(unique_pids) dataset = dataset.shuffle(len(unique_pids)) # Constrain the dataset size to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p) dataset = dataset.repeat(None) # Repeat forever. Funny way of stating it. # For every PID, get K images. dataset = dataset.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids_train, all_pids=pids_train, batch_k=args.batch_k )) # now the dataset has been modified as [selected_fids # , pid] due to the return of the function 'sample_k_fids_for_pid' # Ungroup/flatten the batches for easy loading of the files. dataset = dataset.apply(tf.contrib.data.unbatch()) # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset = dataset.map( lambda fid, pid: common.fid_to_image(fid, pid, image_root=args.image_root, image_size=pre_crop_size if args. crop_augment else net_input_size), num_parallel_calls=args.loading_threads ) # now the dataset has been modified as [selected_images # , fid, pid] due to the return of the function 'fid_to_image' # Augment the data if specified by the arguments. if args.flip_augment: dataset = dataset.map(lambda im, fid, pid: (tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset = dataset.map(lambda im, fid, pid: (tf.random_crop( im, net_input_size + (3, )), fid, pid)) # Group it back into PK batches. batch_size = args.batch_p * args.batch_k dataset = dataset.batch(batch_size) # Overlap producing and consuming for parallelism. dataset = dataset.prefetch(1) # Since we repeat the data infinitely, we only need a one-shot iterator. images_train, fids_train, pids_train = dataset.make_one_shot_iterator( ).get_next() ######################################################################################################################## #prepare the validation set pids_val, fids_val = common.load_dataset(args.validation_set, args.validation_image_root) # Setup a tf.Dataset where one "epoch" loops over all PIDS. # PIDS are shuffled after every epoch and continue indefinitely. unique_pids_val = np.unique(pids_val) dataset_val = tf.data.Dataset.from_tensor_slices(unique_pids_val) dataset_val = dataset_val.shuffle(len(unique_pids_val)) # Constrain the dataset size to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. dataset_val = dataset_val.take( (len(unique_pids_val) // args.batch_p) * args.batch_p) dataset_val = dataset_val.repeat( None) # Repeat forever. Funny way of stating it. # For every PID, get K images. dataset_val = dataset_val.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids_val, all_pids=pids_val, batch_k=args.batch_k )) # now the dataset has been modified as [selected_fids # , pid] due to the return of the function 'sample_k_fids_for_pid' # Ungroup/flatten the batches for easy loading of the files. dataset_val = dataset_val.apply(tf.contrib.data.unbatch()) # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset_val = dataset_val.map( lambda fid, pid: common.fid_to_image( fid, pid, image_root=args.validation_image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads ) # now the dataset has been modified as [selected_images # , fid, pid] due to the return of the function 'fid_to_image' # Augment the data if specified by the arguments. if args.flip_augment: dataset_val = dataset_val.map(lambda im, fid, pid: ( tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset_val = dataset_val.map(lambda im, fid, pid: (tf.random_crop( im, net_input_size + (3, )), fid, pid)) # Group it back into PK batches. dataset_val = dataset_val.batch(batch_size) # Overlap producing and consuming for parallelism. dataset_val = dataset_val.prefetch(1) # Since we repeat the data infinitely, we only need a one-shot iterator. images_val, fids_val, pids_val = dataset_val.make_one_shot_iterator( ).get_next() #################################################################################################################### # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) # Feed the image through the model. The returned `body_prefix` will be used # further down to load the pre-trained weights for all variables with this # prefix. input_images = tf.placeholder( dtype=tf.float32, shape=[None, args.net_input_height, args.net_input_width, 3], name='input') pids = tf.placeholder(dtype=tf.string, shape=[ None, ], name='pids') fids = tf.placeholder(dtype=tf.string, shape=[ None, ], name='fids') endpoints, body_prefix = model.endpoints(input_images, is_training=True) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=True) # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. # dists = loss.cdist(endpoints['emb'], endpoints['emb'], metric=args.metric) # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[args.loss]( # dists, pids, args.margin, batch_precision_at_k=args.batch_k-1) # # '_' stands for the boolean matrix shows topK where the correct match of the identities occurs # shape=(batch_size,K) # 更改 # loss1 dists1 = loss.cdist(endpoints['feature1'], endpoints['feature1'], metric=args.metric) losses1, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists1, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists2 = loss.cdist(endpoints['feature2'], endpoints['feature2'], metric=args.metric) losses2, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists2, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists3 = loss.cdist(endpoints['feature3'], endpoints['feature3'], metric=args.metric) losses3, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists3, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists4 = loss.cdist(endpoints['feature4'], endpoints['feature4'], metric=args.metric) losses4, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss]( dists4, pids, args.margin, batch_precision_at_k=args.batch_k - 1) dists_fu = loss.cdist(endpoints['fusion_layer'], endpoints['fusion_layer'], metric=args.metric) losses_fu, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[ args.loss](dists_fu, pids, args.margin, batch_precision_at_k=args.batch_k - 1) losses = losses1 + losses2 + losses3 + losses4 + losses_fu # 更改 #loss # losses_fu, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[args.loss]( # endpoints,pids, model_type=args.model_name, metric=args.metric, batch_precision_at_k=args.batch_k - 1 # ) # Count the number of active entries, and compute the total batch loss. num_active = tf.reduce_sum(tf.cast(tf.greater(losses, 1e-5), tf.float32)) # 此处losses即为 pospair 比 negpair+margin 还大的部分 loss_mean = tf.reduce_mean(losses) # Some logging for tensorboard. tf.summary.histogram('loss_distribution', losses) tf.summary.scalar('loss', loss_mean) tf.summary.scalar('batch_top1', train_top1) tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec_at_k) tf.summary.scalar('active_count', num_active) #tf.summary.histogram('embedding_dists', dists) tf.summary.histogram('embedding_pos_dists', pos_dists) tf.summary.histogram('embedding_neg_dists', neg_dists) tf.summary.histogram('embedding_lengths', tf.norm(endpoints['emb_raw'], axis=1)) # Create the mem-mapped arrays in which we'll log all training detail in # addition to tensorboard, because tensorboard is annoying for detailed # inspection and actually discards data in histogram summaries. if args.detailed_logs: log_embs = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'embeddings'), dtype=np.float32, shape=(args.train_iterations, batch_size, args.embedding_dim)) log_loss = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'losses'), dtype=np.float32, shape=(args.train_iterations, batch_size)) log_fids = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'fids'), dtype='S' + str(max_fid_len), shape=(args.train_iterations, batch_size)) # These are collected here before we add the optimizer, because depending # on the optimizer, it might add extra slots, which are also global # variables, with the exact same prefix. model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, body_prefix) # Define the optimizer and the learning-rate schedule. # Unfortunately, we get NaNs if we don't handle no-decay separately. global_step = tf.Variable( 0, name='global_step', trainable=False) # 'global_step' means the number of batches seen # by graph if 0 <= args.decay_start_iteration < args.train_iterations: learning_rate = tf.train.exponential_decay( args.learning_rate, tf.maximum(0, global_step - args.decay_start_iteration ), # decay every 'lr_decay_steps' after the # 'decay_start_iteration' # args.train_iterations - args.decay_start_iteration, args.weight_decay_factor) args.lr_decay_steps, args.lr_decay_factor, staircase=True) else: learning_rate = args.learning_rate # the case when we set 'decay_start_iteration' as -1 tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-3) # Feel free to try others! # optimizer = tf.train.AdadeltaOptimizer(learning_rate) # Update_ops are used to update batchnorm stats. with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss_mean, global_step=global_step) # Define a saver for the complete model. checkpoint_saver = tf.train.Saver(max_to_keep=0) with tf.Session(config=config) as sess: if args.resume: # In case we're resuming, simply load the full checkpoint to init. if args.checkpoint is None: last_checkpoint = tf.train.latest_checkpoint( args.experiment_root) log.info( 'Restoring from checkpoint: {}'.format(last_checkpoint)) checkpoint_saver.restore(sess, last_checkpoint) else: ckpt_path = os.path.join(args.experiment_root, args.checkpoint) log.info('Restoring from checkpoint: {}'.format( args.checkpoint)) checkpoint_saver.restore(sess, ckpt_path) else: # But if we're starting from scratch, we may need to load some # variables from the pre-trained weights, and random init others. sess.run(tf.global_variables_initializer()) if args.initial_checkpoint is not None: saver = tf.train.Saver(model_variables) saver.restore( sess, args.initial_checkpoint ) # restore the pre-trained parameter from online model # In any case, we also store this initialization as a checkpoint, # such that we could run exactly reproduceable experiments. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=0) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.experiment_root, sess.graph) start_step = sess.run(global_step) log.info('Starting training from iteration {}.'.format(start_step)) # Finally, here comes the main-loop. This `Uninterrupt` is a handy # utility such that an iteration still finishes on Ctrl+C and we can # stop the training cleanly. with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u: for i in range(start_step, args.train_iterations): # Compute gradients, update weights, store logs! start_time = time.time() _, summary, step, b_prec_at_k, b_embs, b_loss, b_fids = \ sess.run([train_op, merged_summary, global_step, prec_at_k, endpoints['emb'], losses, fids], feed_dict={input_images:images_train.eval(), pids:pids_train.eval(), fids:fids_train.eval()}) elapsed_time = time.time() - start_time # Compute the iteration speed and add it to the summary. # We did observe some weird spikes that we couldn't track down. summary2 = tf.Summary() summary2.value.add(tag='secs_per_iter', simple_value=elapsed_time) summary_writer.add_summary(summary2, step) summary_writer.add_summary(summary, step) if args.detailed_logs: log_embs[i], log_loss[i], log_fids[ i] = b_embs, b_loss, b_fids # Do a huge print out of the current progress. seconds_todo = (args.train_iterations - step) * elapsed_time log.info( 'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ' 'batch-p@{}: {:.2%}, ETA: {} ({:.2f}s/it)'.format( step, float(np.min(b_loss)), float(np.mean(b_loss)), float(np.max(b_loss)), args.batch_k - 1, float(b_prec_at_k), timedelta(seconds=int(seconds_todo)), elapsed_time)) sys.stdout.flush() sys.stderr.flush() # Save a checkpoint of training every so often. if (args.checkpoint_frequency > 0 and step % args.checkpoint_frequency == 0): checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step) #get validation results if (args.validation_frequency > 0 and step % args.validation_frequency == 0): b_prec_at_k_val, b_loss, b_fids = \ sess.run([prec_at_k, losses, fids], feed_dict={input_images : images_val.eval(), pids:pids_val.eval(), fids:fids_val.eval()}) log.info( 'Validation @:{:6d} iteration, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ' 'batch-p@{}: {:.2%}'.format(step, float(np.min(b_loss)), float(np.mean(b_loss)), float(np.max(b_loss)), args.batch_k - 1, float(b_prec_at_k_val))) sys.stdout.flush() sys.stderr.flush() summary3 = tf.Summary() summary3.value.add(tag='secs_per_iter', simple_value=float(np.mean(b_loss))) summary_writer.add_summary(summary3, step) summary_writer.add_summary(summary3, step) # Stop the main-loop at the end of the step, if requested. if u.interrupted: log.info("Interrupted on request!") break # Store one final checkpoint. This might be redundant, but it is crucial # in case intermediate storing was disabled and it saves a checkpoint # when the process was interrupted. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step)
def testMain(self): """main() accepts -m""" common.load_dataset("check_dataset_not_empty1") main(["-m","test message"])
def main(): # Verify that parameters are set correctly. args = parser.parse_args() # Possibly auto-generate the output filename. if args.filename is None: basename = os.path.basename(args.dataset) args.filename = os.path.splitext(basename)[0] + '_embeddings_viz.h5' args.filename = os.path.join(args.experiment_root, args.filename) # Load the args from the original experiment. args_file = os.path.join(args.experiment_root, 'args.json') if os.path.isfile(args_file): if not args.quiet: print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) # Add arguments from training. for key, value in args_resumed.items(): args.__dict__.setdefault(key, value) # A couple special-cases and sanity checks if (args_resumed['crop_augment']) == (args.crop_augment is None): print('WARNING: crop augmentation differs between training and ' 'evaluation.') args.image_root = args.image_root or args_resumed['image_root'] else: raise IOError( '`args.json` could not be found in: {}'.format(args_file)) # Check a proper aggregator is provided if augmentation is used. if args.flip_augment or args.crop_augment == 'five': if args.aggregator is None: print( 'ERROR: Test time augmentation is performed but no aggregator' 'was specified.') exit(1) else: if args.aggregator is not None: print('ERROR: No test time augmentation that needs aggregating is ' 'performed but an aggregator was specified.') exit(1) if not args.quiet: print('Evaluating using the following parameters:') for key, value in sorted(vars(args).items()): print('{}: {}'.format(key, value)) # Load the data from the CSV file. _, data_fids = common.load_dataset(args.dataset, args.image_root) net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) # Setup a tf Dataset containing all images. dataset = tf.data.Dataset.from_tensor_slices(data_fids) # Convert filenames to actual image tensors. dataset = dataset.map(lambda fid: common.fid_to_image( fid, 'dummy', image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. # `modifiers` is a list of strings that keeps track of which augmentations # have been applied, so that a human can understand it later on. modifiers = ['original'] if args.flip_augment: dataset = dataset.map(flip_augment) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [o + m for m in ['', '_flip'] for o in modifiers] if args.crop_augment == 'center': dataset = dataset.map(lambda im, fid, pid: (five_crops(im, net_input_size)[0], fid, pid)) modifiers = [o + '_center' for o in modifiers] elif args.crop_augment == 'five': dataset = dataset.map(lambda im, fid, pid: (tf.stack( five_crops(im, net_input_size)), [fid] * 5, [pid] * 5)) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [ o + m for o in modifiers for m in [ '_center', '_top_left', '_top_right', '_bottom_left', '_bottom_right' ] ] elif args.crop_augment == 'avgpool': modifiers = [o + '_avgpool' for o in modifiers] else: modifiers = [o + '_resize' for o in modifiers] # Group it back into PK batches. dataset = dataset.batch(args.batch_size) # Overlap producing and consuming. dataset = dataset.prefetch(1) images, fids, pids = dataset.make_one_shot_iterator().get_next() # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) endpoints, body_prefix = model.endpoints(images, is_training=False) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=False) with h5py.File(args.filename, 'w') as f_out, tf.Session(config=config) as sess: # Initialize the network/load the checkpoint. if args.checkpoint is None: checkpoint = tf.train.latest_checkpoint(args.experiment_root) else: checkpoint = os.path.join(args.experiment_root, args.checkpoint) if not args.quiet: print('Restoring from checkpoint: {}'.format(checkpoint)) tf.train.Saver().restore(sess, checkpoint) # Go ahead and embed the whole dataset, with all augmented versions too. emb_storage = np.zeros( (len(data_fids) * len(modifiers), args.embedding_dim), np.float32) for start_idx in count(step=args.batch_size): try: emb, _attmaps, _images, _fids, _pids = sess.run([ endpoints['emb'], endpoints['attention_masks'], images, fids, pids ]) print('\rEmbedded batch {}-{}/{}'.format( start_idx, start_idx + len(emb), len(emb_storage)), flush=True, end='') emb_storage[start_idx:start_idx + len(emb)] = emb if not os.path.exists('attention_maps'): os.mkdir('attention_maps') _fids = [ x.decode().split('/')[-1].split('.')[0] for x in _fids ] _pids = [x.decode() for x in _pids] for batch_idx in range(len(_attmaps[0])): print('process image {}'.format(_fids[batch_idx])) cv2.imwrite( os.path.join('attention_maps', '{}_origin.jpg'.format(_fids[batch_idx])), _images[batch_idx].astype(np.uint8)) for att_idx in range(len(_attmaps)): # normed_map = im_norm(_attmaps[att_idx][batch_idx]) # _enlarged = np.expand_dims(cv2.resize(_attmaps[att_idx][batch_idx], (224, 224), interpolation=cv2.INTER_CUBIC), 2) # norm_enlarged = im_norm(np.expand_dims(cv2.resize(normed_map, (224, 224), interpolation=cv2.INTER_CUBIC), 2)) norm_enlarged = np.expand_dims( cv2.resize(_attmaps[att_idx][batch_idx], (224, 224), interpolation=cv2.INTER_CUBIC), 2) tmp_enlarged = norm_enlarged * 255 pseudo_enlarged = cv2.applyColorMap( tmp_enlarged.astype(np.uint8), cv2.COLORMAP_JET) norm_masked = norm_enlarged * _images[batch_idx] pseudo_masked = pseudo_enlarged * 0.5 + _images[ batch_idx] * 0.5 norm_enlarged = norm_enlarged * 255 cv2.imwrite( os.path.join( 'attention_maps', '{}_norm_masked_b{}.jpg'.format( _fids[batch_idx], att_idx)), norm_masked.astype(np.uint8)) cv2.imwrite( os.path.join( 'attention_maps', '{}_pseudo_masked_b{}.jpg'.format( _fids[batch_idx], att_idx)), pseudo_masked.astype(np.uint8)) # cv2.imwrite(os.path.join('attention_maps', '{}_gray_mask_b{}.jpg'.format(_fids[batch_idx], att_idx)), norm_enlarged.astype(np.uint8)) # cv2.imwrite(os.path.join('attention_maps', '{}_pseudo_mask_b{}.jpg'.format(_fids[batch_idx], att_idx)), pseudo_enlarged.astype(np.uint8)) except tf.errors.OutOfRangeError: break # This just indicates the end of the dataset. print() if not args.quiet: print("Done with embedding, aggregating augmentations...", flush=True) if len(modifiers) > 1: # Pull out the augmentations into a separate first dimension. emb_storage = emb_storage.reshape(len(data_fids), len(modifiers), -1) emb_storage = emb_storage.transpose((1, 0, 2)) # (Aug,FID,128D) # Store the embedding of all individual variants too. emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage) # Aggregate according to the specified parameter. emb_storage = AGGREGATORS[args.aggregator](emb_storage) # Store the final embeddings. emb_dataset = f_out.create_dataset('emb', data=emb_storage) # Store information about the produced augmentation and in case no crop # augmentation was used, if the images are resized or avg pooled. f_out.create_dataset('augmentation_types', data=np.asarray(modifiers, dtype='|S'))
def main(unused_argv): del unused_argv # Load Config config_name = FLAGS.config config_module = importlib.import_module(configs_module_prefix + '.%s' % config_name) config = config_module.config model_uid = common.get_model_uid(config_name, FLAGS.exp_uid) batch_size = config['batch_size'] # Load dataset dataset = common.load_dataset(config) save_path = dataset.save_path train_data = dataset.train_data attr_train = dataset.attr_train eval_data = dataset.eval_data attr_eval = dataset.attr_eval # Make the directory save_dir = os.path.join(save_path, model_uid) best_dir = os.path.join(save_dir, 'best') tf.gfile.MakeDirs(save_dir) tf.gfile.MakeDirs(best_dir) tf.logging.info('Save Dir: %s', save_dir) np.random.seed(10003) N_train = train_data.shape[0] N_eval = eval_data.shape[0] # Load Model tf.reset_default_graph() sess = tf.Session() m = model_dataspace.Model(config, name=model_uid) _ = m() # noqa # Create summaries y_true = m.labels y_pred = tf.cast(tf.greater(m.pred_classifier, 0.5), tf.int32) accuracy = tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred), tf.float32)) tf.summary.scalar('Loss', m.classifier_loss) tf.summary.scalar('Accuracy', accuracy) scalar_summaries = tf.summary.merge_all() # Summary writers train_writer = tf.summary.FileWriter(save_dir + '/train', sess.graph) eval_writer = tf.summary.FileWriter(save_dir + '/eval', sess.graph) # Initialize sess.run(tf.global_variables_initializer()) i_start = 0 running_N_eval = 30 traces = { 'i': [], 'i_pred': [], 'loss': [], 'loss_eval': [], } best_eval_loss = np.inf classifier_lr_ = np.logspace(np.log10(FLAGS.lr), np.log10(1e-6), FLAGS.n_iters) # Train the Classifier for i in range(i_start, FLAGS.n_iters): start = (i * batch_size) % N_train end = start + batch_size batch = train_data[start:end] labels = attr_train[start:end] # train op res = sess.run( [m.train_classifier, m.classifier_loss, scalar_summaries], { m.x: batch, m.labels: labels, m.classifier_lr: classifier_lr_[i] }) tf.logging.info('Iter: %d, Loss: %.2e', i, res[1]) train_writer.add_summary(res[-1], i) if i % 10 == 0: # write training reconstructions if batch.shape[0] == batch_size: # write eval summaries start = (i * batch_size) % N_eval end = start + batch_size batch = eval_data[start:end] labels = attr_eval[start:end] if batch.shape[0] == batch_size: res_eval = sess.run([m.classifier_loss, scalar_summaries], { m.x: batch, m.labels: labels, }) traces['loss_eval'].append(res_eval[0]) eval_writer.add_summary(res_eval[-1], i) if i % FLAGS.n_iters_per_save == 0: smoothed_eval_loss = np.mean(traces['loss_eval'][-running_N_eval:]) if smoothed_eval_loss < best_eval_loss: # Save the best model best_eval_loss = smoothed_eval_loss save_name = os.path.join(best_dir, 'classifier_best_%s.ckpt' % model_uid) tf.logging.info('SAVING BEST! %s Iter: %d', save_name, i) m.classifier_saver.save(sess, save_name) with tf.gfile.Open( os.path.join(best_dir, 'best_ckpt_iters.txt'), 'w') as f: f.write('%d' % i)
def main(argv): args = parser.parse_args(argv) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # We store all arguments in a json file. This has two advantages: # 1. We can always get back and see what exactly that experiment was # 2. We can resume an experiment as-is without needing to remember all flags. args_file = os.path.join(args.experiment_root, 'args.json') if args.resume: if not os.path.isfile(args_file): raise IOError('`args.json` not found in {}'.format(args_file)) print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) args_resumed['resume'] = True # This would be overwritten. # When resuming, we not only want to populate the args object with the # values from the file, but we also want to check for some possible # conflicts between loaded and given arguments. for key, value in args.__dict__.items(): if key in args_resumed: resumed_value = args_resumed[key] if resumed_value != value: print('Warning: For the argument `{}` we are using the' ' loaded value `{}`. The provided value was `{}`' '.'.format(key, resumed_value, value)) args.__dict__[key] = resumed_value else: print('Warning: A new argument was added since the last run:' ' `{}`. Using the new value: `{}`.'.format(key, value)) else: # If the experiment directory exists already, we bail in fear. if os.path.exists(args.experiment_root): if os.listdir(args.experiment_root): print('The directory {} already exists and is not empty.' ' If you want to resume training, append --resume to' ' your call.'.format(args.experiment_root)) exit(1) else: os.makedirs(args.experiment_root) # Store the passed arguments for later resuming and grepping in a nice # and readable format. with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(args.experiment_root, "train") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('train') # Also show all parameter values at the start, for ease of reading logs. log.info('Training using the following parameters:') for key, value in sorted(vars(args).items()): log.info('{}: {}'.format(key, value)) # Check them here, so they are not required when --resume-ing. if not args.train_set: parser.print_help() log.error("You did not specify the `train_set` argument!") sys.exit(1) if not args.image_root: parser.print_help() log.error("You did not specify the required `image_root` argument!") sys.exit(1) # Load the data from the CSV file. pids, fids = common.load_dataset(args.train_set, args.image_root) max_fid_len = max(map(len, fids)) # We'll need this later for logfiles. # Setup a tf.Dataset where one "epoch" loops over all PIDS. # PIDS are shuffled after every epoch and continue indefinitely. unique_pids = np.unique(pids) if len(unique_pids) < args.batch_p: unique_pids = np.tile(unique_pids, int(np.ceil(args.batch_p / len(unique_pids)))) dataset = tf.data.Dataset.from_tensor_slices(unique_pids) dataset = dataset.shuffle(len(unique_pids)) # Constrain the dataset size to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p) dataset = dataset.repeat(None) # Repeat forever. Funny way of stating it. # For every PID, get K images. dataset = dataset.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k)) # Ungroup/flatten the batches for easy loading of the files. dataset = dataset.apply(tf.contrib.data.unbatch()) # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset = dataset.map(lambda fid, pid: common.fid_to_image( fid, pid, image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. dataset = dataset.map( lambda im, fid, pid: common.fid_to_image(fid, pid, image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), # Ergys num_parallel_calls=args.loading_threads) if args.flip_augment: dataset = dataset.map(lambda im, fid, pid: (tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset = dataset.map(lambda im, fid, pid: (tf.random_crop( im, net_input_size + (3, )), fid, pid)) # Group it back into PK batches. batch_size = args.batch_p * args.batch_k dataset = dataset.batch(batch_size) # Overlap producing and consuming for parallelism. dataset = dataset.prefetch(1) # Since we repeat the data infinitely, we only need a one-shot iterator. images, fids, pids = dataset.make_one_shot_iterator().get_next() # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) # Feed the image through the model. The returned `body_prefix` will be used # further down to load the pre-trained weights for all variables with this # prefix. weight_decay = 10e-4 weights_regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay) endpoints, body_prefix = model.endpoints(images, is_training=True) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=True, weights_regularizer=weights_regularizer) # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. # batch_embedding = endpoints['emb'] batch_embedding = endpoints['emb'] if args.loss == 'semi_hard_triplet': triplet_loss = triplet_semihard_loss(batch_embedding, pids, args.margin) elif args.loss == 'hard_triplet': triplet_loss = batch_hard(batch_embedding, pids, args.margin, args.metric) elif args.loss == 'lifted_loss': triplet_loss = lifted_loss(pids, batch_embedding, margin=args.margin) elif args.loss == 'contrastive_loss': assert batch_size % 2 == 0 assert args.batch_k == 4 ## Can work with other number but will need tuning contrastive_idx = np.tile([0, 1, 4, 3, 2, 5, 6, 7], args.batch_p // 2) for i in range(args.batch_p // 2): contrastive_idx[i * 8:i * 8 + 8] += i * 8 contrastive_idx = np.expand_dims(contrastive_idx, 1) batch_embedding_ordered = tf.gather_nd(batch_embedding, contrastive_idx) pids_ordered = tf.gather_nd(pids, contrastive_idx) # batch_embedding_ordered = tf.Print(batch_embedding_ordered,[pids_ordered],'pids_ordered :: ',summarize=1000) embeddings_anchor, embeddings_positive = tf.unstack( tf.reshape(batch_embedding_ordered, [-1, 2, args.embedding_dim]), 2, 1) # embeddings_anchor = tf.Print(embeddings_anchor,[pids_ordered,embeddings_anchor,embeddings_positive,batch_embedding,batch_embedding_ordered],"Tensors ", summarize=1000) fixed_labels = np.tile([1, 0, 0, 1], args.batch_p // 2) # fixed_labels = np.reshape(fixed_labels,(len(fixed_labels),1)) # print(fixed_labels) labels = tf.constant(fixed_labels) # labels = tf.Print(labels,[labels],'labels ',summarize=1000) triplet_loss = contrastive_loss(labels, embeddings_anchor, embeddings_positive, margin=args.margin) elif args.loss == 'angular_loss': embeddings_anchor, embeddings_positive = tf.unstack( tf.reshape(batch_embedding, [-1, 2, args.embedding_dim]), 2, 1) # pids = tf.Print(pids, [pids], 'pids:: ', summarize=100) pids, _ = tf.unstack(tf.reshape(pids, [-1, 2, 1]), 2, 1) # pids = tf.Print(pids,[pids],'pids:: ',summarize=100) triplet_loss = angular_loss(pids, embeddings_anchor, embeddings_positive, batch_size=args.batch_p, with_l2reg=True) elif args.loss == 'npairs_loss': assert args.batch_k == 2 ## Single positive pair per class embeddings_anchor, embeddings_positive = tf.unstack( tf.reshape(batch_embedding, [-1, 2, args.embedding_dim]), 2, 1) pids, _ = tf.unstack(tf.reshape(pids, [-1, 2, 1]), 2, 1) pids = tf.reshape(pids, [-1]) triplet_loss = npairs_loss(pids, embeddings_anchor, embeddings_positive) else: raise NotImplementedError('loss function {} NotImplemented'.format( args.loss)) loss_mean = tf.reduce_mean(triplet_loss) # These are collected here before we add the optimizer, because depending # on the optimizer, it might add extra slots, which are also global # variables, with the exact same prefix. model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, body_prefix) # Define the optimizer and the learning-rate schedule. # Unfortunately, we get NaNs if we don't handle no-decay separately. global_step = tf.Variable(0, name='global_step', trainable=False) if 0 <= args.decay_start_iteration < args.train_iterations: learning_rate = tf.train.polynomial_decay(args.learning_rate, global_step, args.train_iterations, end_learning_rate=1e-7, power=1) else: learning_rate = args.learning_rate if args.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif args.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) else: raise NotImplementedError('Invalid optimizer {}'.format( args.optimizer)) # # learning_rate = tf.train.polynomial_decay(args.learning_rate, global_step, # args.train_iterations, end_learning_rate= 1e-7, # power=1) # # Feel free to try others! # optimizer = tf.train.AdadeltaOptimizer(learning_rate) # Update_ops are used to update batchnorm stats. with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss_mean, global_step=global_step) # Define a saver for the complete model. checkpoint_saver = tf.train.Saver(max_to_keep=2) gpu_options = tf.GPUOptions(allow_growth=True) gpu_config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(config=gpu_config) as sess: if args.resume: # In case we're resuming, simply load the full checkpoint to init. last_checkpoint = tf.train.latest_checkpoint(args.experiment_root) if last_checkpoint == None: print('Resume with No previous checkpoint') # But if we're starting from scratch, we may need to load some # variables from the pre-trained weights, and random init others. sess.run(tf.global_variables_initializer()) if args.initial_checkpoint is not None: saver = tf.train.Saver(model_variables) saver.restore(sess, args.initial_checkpoint) # In any case, we also store this initialization as a checkpoint, # such that we could run exactly reproduceable experiments. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=0) else: log.info( 'Restoring from checkpoint: {}'.format(last_checkpoint)) checkpoint_saver.restore(sess, last_checkpoint) else: # But if we're starting from scratch, we may need to load some # variables from the pre-trained weights, and random init others. sess.run(tf.global_variables_initializer()) if args.initial_checkpoint is not None: saver = tf.train.Saver(model_variables) saver.restore(sess, args.initial_checkpoint) # In any case, we also store this initialization as a checkpoint, # such that we could run exactly reproduceable experiments. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=0) start_step = sess.run(global_step) log.info('Starting training from iteration {}.'.format(start_step)) # Finally, here comes the main-loop. This `Uninterrupt` is a handy # utility such that an iteration still finishes on Ctrl+C and we can # stop the training cleanly. with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u: for i in range(start_step, args.train_iterations): # Compute gradients, update weights, store logs! start_time = time.time() _, step, b_embs, b_loss, b_fids = \ sess.run([train_op, global_step, endpoints['emb'], triplet_loss, fids]) elapsed_time = time.time() - start_time # Do a huge print out of the current progress. seconds_todo = (args.train_iterations - step) * elapsed_time log.info( 'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ETA: {} ({:.2f}s/it)' .format( step, float(np.min(b_loss)), float(np.mean(b_loss)), float(np.max(b_loss)), # args.batch_k - 1, float(b_prec_at_k), timedelta(seconds=int(seconds_todo)), elapsed_time)) sys.stdout.flush() sys.stderr.flush() # Save a checkpoint of training every so often. if (args.checkpoint_frequency > 0 and step % args.checkpoint_frequency == 0): checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step) # Stop the main-loop at the end of the step, if requested. if u.interrupted: log.info("Interrupted on request!") break # Store one final checkpoint. This might be redundant, but it is crucial # in case intermediate storing was disabled and it saves a checkpoint # when the process was interrupted. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step)
def embed_csv_file(self, image_root, csv_file, emb_file, loading_threads=8, flip=False, crop=False, aggregator='mean'): data_ids, data_fids, data_fols = common.load_dataset( csv_file, image_root) data_ids = data_ids.astype(np.int32) data_fols = data_fols.astype(np.int32) dataset = tf.data.Dataset.from_tensor_slices(data_fids) dataset = dataset.map(lambda fid: common.fid_to_image( fid, tf.constant('dummy'), image_root=image_root, image_size=self.pre_crop_size if self.crop_augment else self.net_input_size), num_parallel_calls=loading_threads) # Augment the data if specified by the arguments. # `modifiers` is a list of strings that keeps track of which augmentations # have been applied, so that a human can understand it later on. modifiers = ['original'] if flip: dataset = dataset.map(Embedder.flip_augment) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [o + m for m in ['', '_flip'] for o in modifiers] print(flip, crop) if crop == 'center': dataset = dataset.map(lambda im, fid, pid: (five_crops( im, net_input_size)[0], fid, pid)) modifiers = [o + '_center' for o in modifiers] elif crop == 'five': dataset = dataset.map(lambda im, fid, pid: ( tf.stack(Embedder.five_crops(im, self.net_input_size)), tf.stack([fid] * 5), tf.stack([pid] * 5))) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [ o + m for o in modifiers for m in [ '_center', '_top_left', '_top_right', '_bottom_left', '_bottom_right' ] ] elif crop == 'avgpool': modifiers = [o + '_avgpool' for o in modifiers] else: modifiers = [o + '_resize' for o in modifiers] # Group it back into PK batches. dataset = dataset.batch(self.batch_size) # Overlap producing and consuming. dataset = dataset.prefetch(1) images, _, _ = dataset.make_one_shot_iterator().get_next() endpoints, body_prefix = self.model.endpoints(images, is_training=False) with tf.name_scope('head'): endpoints = self.head.head(endpoints, self.embedding_dim, is_training=False) # emb_file = os.path.join(self.exp_root, emb_file) print("Save h5 file to: ", emb_file) with h5py.File(emb_file, 'w') as f_out, tf.Session() as sess: # Initialize the network/load the checkpoint. checkpoint = tf.train.latest_checkpoint(self.exp_root) print('Restoring from checkpoint: {}'.format(checkpoint)) tf.train.Saver().restore(sess, checkpoint) # Go ahead and embed the whole dataset, with all augmented versions too. emb_storage = np.zeros( (len(data_fids) * len(modifiers), self.embedding_dim), np.float32) for start_idx in count(step=self.batch_size): try: emb = sess.run(endpoints['emb']) print('\rEmbedded batch {}-{}/{}'.format( start_idx, start_idx + len(emb), len(emb_storage)), flush=True, end='') emb_storage[start_idx:start_idx + len(emb)] = emb except tf.errors.OutOfRangeError: break # This just indicates the end of the dataset. print() print("Done with embedding, aggregating augmentations...", flush=True) print(emb_storage.shape) if len(modifiers) > 1: # Pull out the augmentations into a separate first dimension. emb_storage = emb_storage.reshape(len(data_fids), len(modifiers), -1) emb_storage = emb_storage.transpose( (1, 0, 2)) # (Aug,FID,128D) # Store the embedding of all individual variants too. emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage) # Aggregate according to the specified parameter. emb_storage = AGGREGATORS[aggregator](emb_storage) print(emb_storage.shape) # Store the final embeddings. f_out.create_dataset('emb', data=emb_storage) f_out.create_dataset('id', data=data_ids) f_out.create_dataset('fol_id', data=data_fols) # Store information about the produced augmentation and in case no crop # augmentation was used, if the images are resized or avg pooled. f_out.create_dataset('augmentation_types', data=np.asarray(modifiers, dtype='|S')) tf.reset_default_graph()
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) client = None logger = None try: client = CrayonClient(args.host, args.port) client.remove_experiment("pytorch_logging") logger = client.create_experiment("pytorch_logging") except ValueError: logger = client.create_experiment("pytorch_logging") except: print("Cannot create logger") datasets, dataloaders = load_dataset(args.arch, args.data, args.batch_size, args.workers) for param in model.named_parameters(): if param[0] not in unfreeze: param[1].requires_grad = False exp = Experiment(model, criterion, optimizer, scheduler, log_client=logger) exp.train(args.epochs, dataloaders, args.resume) try: fname = logger.to_zip() client.remove_experiment("pytorch_logging") safe_mkdir('logs/') shutil.move(fname, 'logs/') print("Log stored in file: {}".format(os.path.join('logs', fname))) except:
def main(argv): # Verify that parameters are set correctly. args = parser.parse_args(argv) if not os.path.exists(args.dataset): return # Possibly auto-generate the output filename. if args.filename is None: basename = os.path.basename(args.dataset) args.filename = os.path.splitext(basename)[0] + '_embeddings.h5' os_utils.touch_dir(os.path.join(args.experiment_root, args.foldername)) log_file = os.path.join(args.experiment_root, args.foldername, "embed") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('embed') args.filename = os.path.join(args.experiment_root, args.foldername, args.filename) var_filepath = os.path.join(args.experiment_root, args.foldername, args.filename[:-3] + '_var.txt') # Load the args from the original experiment. args_file = os.path.join(args.experiment_root, 'args.json') if os.path.isfile(args_file): if not args.quiet: print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) # Add arguments from training. for key, value in args_resumed.items(): args.__dict__.setdefault(key, value) # A couple special-cases and sanity checks if (args_resumed['crop_augment']) == (args.crop_augment is None): print('WARNING: crop augmentation differs between training and ' 'evaluation.') args.image_root = args.image_root or args_resumed['image_root'] else: raise IOError( '`args.json` could not be found in: {}'.format(args_file)) # Check a proper aggregator is provided if augmentation is used. if args.flip_augment or args.crop_augment == 'five': if args.aggregator is None: print( 'ERROR: Test time augmentation is performed but no aggregator' 'was specified.') exit(1) else: if args.aggregator is not None: print('ERROR: No test time augmentation that needs aggregating is ' 'performed but an aggregator was specified.') exit(1) if not args.quiet: print('Evaluating using the following parameters:') for key, value in sorted(vars(args).items()): print('{}: {}'.format(key, value)) # Load the data from the CSV file. _, data_fids = common.load_dataset(args.dataset, args.image_root) net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) # Setup a tf Dataset containing all images. dataset = tf.data.Dataset.from_tensor_slices(data_fids) # Convert filenames to actual image tensors. dataset = dataset.map(lambda fid: common.fid_to_image( fid, tf.constant('dummy'), image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. # `modifiers` is a list of strings that keeps track of which augmentations # have been applied, so that a human can understand it later on. modifiers = ['original'] if args.flip_augment: dataset = dataset.map(flip_augment) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [o + m for m in ['', '_flip'] for o in modifiers] if args.crop_augment == 'center': dataset = dataset.map(lambda im, fid, pid: (five_crops(im, net_input_size)[0], fid, pid)) modifiers = [o + '_center' for o in modifiers] elif args.crop_augment == 'five': dataset = dataset.map(lambda im, fid, pid: (tf.stack(five_crops(im, net_input_size)), tf.stack([fid] * 5), tf.stack([pid] * 5))) dataset = dataset.apply(tf.contrib.data.unbatch()) modifiers = [ o + m for o in modifiers for m in [ '_center', '_top_left', '_top_right', '_bottom_left', '_bottom_right' ] ] elif args.crop_augment == 'avgpool': modifiers = [o + '_avgpool' for o in modifiers] else: modifiers = [o + '_resize' for o in modifiers] # Group it back into PK batches. dataset = dataset.batch(args.batch_size) # Overlap producing and consuming. dataset = dataset.prefetch(1) #images, _, _ = dataset.make_one_shot_iterator().get_next() #init_iter = dataset.make_initializable_iterator() init_iter = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes) images, _, _ = init_iter.get_next() iter_init_op = init_iter.make_initializer(dataset) # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) images_ph = tf.placeholder(dataset.output_types[0], dataset.output_shapes[0]) endpoints, body_prefix = model.endpoints(images_ph, is_training=False) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=False) gpu_options = tf.GPUOptions(allow_growth=True) gpu_config = tf.ConfigProto(gpu_options=gpu_options) with h5py.File(args.filename, 'w') as f_out, tf.Session(config=gpu_config) as sess: # Initialize the network/load the checkpoint. if args.checkpoint is None: checkpoint = tf.train.latest_checkpoint(args.experiment_root) else: checkpoint = os.path.join(args.experiment_root, args.checkpoint) if not args.quiet: print('Restoring from checkpoint: {}'.format(checkpoint)) tf.train.Saver().restore(sess, checkpoint) # Go ahead and embed the whole dataset, with all augmented versions too. emb_storage = np.zeros( (len(data_fids) * len(modifiers), args.embedding_dim), np.float32) ##sess.run(init_iter.initializer) sess.run(iter_init_op) for start_idx in count(step=args.batch_size): try: current_imgs = sess.run(images) batch_embedding = endpoints['emb'] emb = sess.run(batch_embedding, feed_dict={images_ph: current_imgs}) emb_storage[start_idx:start_idx + len(emb)] += emb print('\rEmbedded batch {}-{}/{}'.format( start_idx, start_idx + len(emb), len(emb_storage)), flush=True, end='') except tf.errors.OutOfRangeError: break # This just indicates the end of the dataset. if not args.quiet: print("Done with embedding, aggregating augmentations...", flush=True) if len(modifiers) > 1: # Pull out the augmentations into a separate first dimension. emb_storage = emb_storage.reshape(len(data_fids), len(modifiers), -1) emb_storage = emb_storage.transpose((1, 0, 2)) # (Aug,FID,128D) # Store the embedding of all individual variants too. emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage) # Aggregate according to the specified parameter. emb_storage = AGGREGATORS[args.aggregator](emb_storage) # Store the final embeddings. emb_dataset = f_out.create_dataset('emb', data=emb_storage) # Store information about the produced augmentation and in case no crop # augmentation was used, if the images are resized or avg pooled. f_out.create_dataset('augmentation_types', data=np.asarray(modifiers, dtype='|S'))
def main(): args = parser.parse_args() # We store all arguments in a json file. This has two advantages: # 1. We can always get back and see what exactly that experiment was # 2. We can resume an experiment as-is without needing to remember all flags. args_file = os.path.join(args.experiment_root, 'args.json') if args.resume: if not os.path.isfile(args_file): raise IOError('`args.json` not found in {}'.format(args_file)) print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) args_resumed['resume'] = True # This would be overwritten. # When resuming, we not only want to populate the args object with the # values from the file, but we also want to check for some possible # conflicts between loaded and given arguments. for key, value in args.__dict__.items(): if key in args_resumed: resumed_value = args_resumed[key] if resumed_value != value: print('Warning: For the argument `{}` we are using the' ' loaded value `{}`. The provided value was `{}`' '.'.format(key, resumed_value, value)) args.__dict__[key] = resumed_value else: print('Warning: A new argument was added since the last run:' ' `{}`. Using the new value: `{}`.'.format(key, value)) else: # If the experiment directory exists already, we bail in fear. if os.path.exists(args.experiment_root): if os.listdir(args.experiment_root): print('The directory {} already exists and is not empty.' ' If you want to resume training, append --resume to' ' your call.'.format(args.experiment_root)) exit(1) else: os.makedirs(args.experiment_root) # Store the passed arguments for later resuming and grepping in a nice # and readable format. with open(args_file, 'w') as f: json.dump(vars(args), f, ensure_ascii=False, indent=2, sort_keys=True) log_file = os.path.join(args.experiment_root, "train") logging.config.dictConfig(common.get_logging_dict(log_file)) log = logging.getLogger('train') # Also show all parameter values at the start, for ease of reading logs. log.info('Training using the following parameters:') for key, value in sorted(vars(args).items()): log.info('{}: {}'.format(key, value)) # Check them here, so they are not required when --resume-ing. if not args.train_set: parser.print_help() log.error("You did not specify the `train_set` argument!") sys.exit(1) if not args.image_root: parser.print_help() log.error("You did not specify the required `image_root` argument!") sys.exit(1) # Load the data from the CSV file. pids, fids = common.load_dataset(args.train_set, args.image_root) max_fid_len = max(map(len, fids)) # We'll need this later for logfiles. # Setup a tf.Dataset where one "epoch" loops over all PIDS. # PIDS are shuffled after every epoch and continue indefinitely. unique_pids = np.unique(pids) dataset = tf.data.Dataset.from_tensor_slices(unique_pids) dataset = dataset.shuffle(len(unique_pids)) # Constrain the dataset size to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p) dataset = dataset.repeat(None) # Repeat forever. Funny way of stating it. # For every PID, get K images. dataset = dataset.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k)) # Ungroup/flatten the batches for easy loading of the files. dataset = dataset.apply(tf.contrib.data.unbatch()) # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset = dataset.map(lambda fid, pid: common.fid_to_image( fid, pid, image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size), num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. if args.flip_augment: dataset = dataset.map(lambda im, fid, pid: (tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset = dataset.map(lambda im, fid, pid: (tf.random_crop( im, net_input_size + (3, )), fid, pid)) # Group it back into PK batches. batch_size = args.batch_p * args.batch_k dataset = dataset.batch(batch_size) # Overlap producing and consuming for parallelism. dataset = dataset.prefetch(1) # Since we repeat the data infinitely, we only need a one-shot iterator. images, fids, pids = dataset.make_one_shot_iterator().get_next() # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) # Feed the image through the model. The returned `body_prefix` will be used # further down to load the pre-trained weights for all variables with this # prefix. endpoints, body_prefix = model.endpoints(images, is_training=True) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=True) # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. dists = loss.cdist(endpoints['emb'], endpoints['emb'], metric=args.metric) losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[ args.loss](dists, pids, args.margin, batch_precision_at_k=args.batch_k - 1) decDense = tf.layers.dense( inputs=endpoints['emb'], units=5120, name='decDense') # ,activation = tf.nn.relu ################ unflat = tf.reshape(decDense, shape=[tf.shape(decDense)[0], 32, 16, 10]) unp3shape = tf.TensorShape( [2 * di for di in unflat.get_shape().as_list()[1:-1]]) unPool3 = tf.image.resize_nearest_neighbor(unflat, unp3shape, name='unpool3') deConv3 = tf.layers.conv2d(inputs=unPool3, filters=64, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv3') unp2shape = tf.TensorShape( [2 * di for di in deConv3.get_shape().as_list()[1:-1]]) unPool2 = tf.image.resize_nearest_neighbor(deConv3, unp2shape, name='unpool2') deConv2 = tf.layers.conv2d(inputs=unPool2, filters=32, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv2') unp1shape = tf.TensorShape( [2 * di for di in deConv2.get_shape().as_list()[1:-1]]) unPool1 = tf.image.resize_nearest_neighbor(deConv2, unp1shape, name='unpool1') deConv1 = tf.layers.conv2d(inputs=unPool1, filters=3, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=None, name='deConv1') imClip = deConv1 #tf.clip_by_value(t = deConv1,clip_value_min = -1.0,clip_value_max = 1.0,name='clipRelu') print('RconstructeddImage : ', imClip.name) recLoss = tf.multiply( 0.01, tf.losses.mean_squared_error( labels=images, predictions=imClip, )) print('recLoss : ', recLoss.name) decDense1 = tf.layers.dense( inputs=endpoints['emb'], units=5120, name='decDense1') # ,activation = tf.nn.relu ################ unflat1 = tf.reshape(decDense1, shape=[tf.shape(decDense1)[0], 32, 16, 10]) unp3shape1 = tf.TensorShape( [2 * di for di in unflat1.get_shape().as_list()[1:-1]]) unPool3_new = tf.image.resize_nearest_neighbor(unflat1, unp3shape1, name='unpool3_new') deConv3_new = tf.layers.conv2d(inputs=unPool3_new, filters=64, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv3_new') unp2shape_new = tf.TensorShape( [2 * di for di in deConv3_new.get_shape().as_list()[1:-1]]) unPool2_new = tf.image.resize_nearest_neighbor(deConv3_new, unp2shape_new, name='unpool2_new') deConv2_new = tf.layers.conv2d(inputs=unPool2_new, filters=3, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv2_new') unp1shape_new = tf.TensorShape( [2 * di for di in deConv2_new.get_shape().as_list()[1:-1]]) unPool1_new = tf.image.resize_nearest_neighbor(deConv2_new, unp1shape_new, name='unpool1_new') deConv1_new = tf.layers.conv2d(inputs=unPool1_new, filters=3, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=None, name='deConv1_new') imClip1 = deConv2_new print('RconstructeddImage : ', imClip1.name) print(imClip1.shape) images2 = tf.image.resize_images(images, [128, 64]) print(images2.shape) recLoss1 = tf.multiply( 0.01, tf.losses.mean_squared_error( labels=images2, predictions=imClip1, )) print('recLoss_new : ', recLoss1.name) decDense2 = tf.layers.dense( inputs=endpoints['emb'], units=5120, name='decDense2') # ,activation = tf.nn.relu ################ unflat12 = tf.reshape(decDense2, shape=[tf.shape(decDense2)[0], 32, 16, 10]) unp3shape12 = tf.TensorShape( [2 * di for di in unflat12.get_shape().as_list()[1:-1]]) unPool3_new2 = tf.image.resize_nearest_neighbor(unflat12, unp3shape12, name='unpool3_new2') deConv3_new2 = tf.layers.conv2d(inputs=unPool3_new2, filters=3, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv3_new2') unp2shape_new2 = tf.TensorShape( [2 * di for di in deConv3_new2.get_shape().as_list()[1:-1]]) unPool2_new2 = tf.image.resize_nearest_neighbor(deConv3_new2, unp2shape_new2, name='unpool2_new2') imClip11 = deConv3_new2 images21 = tf.image.resize_images(images, [64, 32]) recLoss2 = tf.multiply( 0.01, tf.losses.mean_squared_error( labels=images21, predictions=imClip11, )) print('recLoss_new : ', recLoss2.name) decDensel = tf.layers.dense( inputs=endpoints['emb'], units=5120, name='decDensel') # ,activation = tf.nn.relu ################ unflatl = tf.reshape(decDensel, shape=[tf.shape(decDensel)[0], 32, 16, 10]) unp3shapel = tf.TensorShape( [2 * di for di in unflatl.get_shape().as_list()[1:-1]]) unPool3l = tf.image.resize_nearest_neighbor(unflatl, unp3shapel, name='unpool3l') deConv3l = tf.layers.conv2d(inputs=unPool3l, filters=64, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv3l') unp2shapel = tf.TensorShape( [2 * di for di in deConv3l.get_shape().as_list()[1:-1]]) unPool2l = tf.image.resize_nearest_neighbor(deConv3l, unp2shapel, name='unpool2l') deConv2l = tf.layers.conv2d(inputs=unPool2l, filters=32, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=tf.nn.relu, name='deConv2l') unp1shapel = tf.TensorShape( [2 * di for di in deConv2l.get_shape().as_list()[1:-1]]) unPool1l = tf.image.resize_nearest_neighbor(deConv2l, unp1shapel, name='unpool1l') deConv1l = tf.layers.conv2d(inputs=unPool1l, filters=3, kernel_size=[5, 5], strides=(1, 1), padding='same', activation=None, name='deConv1l') imClipl = deConv1l #tf.clip_by_value(t = deConv1,clip_value_min = -1.0,clip_value_max = 1.0,name='clipRelu') print('RconstructeddImage : ', imClipl.name) recLossl = tf.multiply( 0.01, tf.losses.mean_squared_error( labels=images, predictions=imClipl, )) print('recLoss : ', recLossl.name) # Count the number of active entries, and compute the total batch loss. num_active = tf.reduce_sum(tf.cast(tf.greater(losses, 1e-5), tf.float32)) loss_mean = tf.reduce_mean(losses) # Some logging for tensorboard. tf.summary.histogram('loss_distribution', losses) tf.summary.scalar('loss', loss_mean) tf.summary.scalar('batch_top1', train_top1) tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec_at_k) tf.summary.scalar('active_count', num_active) tf.summary.histogram('embedding_dists', dists) tf.summary.histogram('embedding_pos_dists', pos_dists) tf.summary.histogram('embedding_neg_dists', neg_dists) tf.summary.histogram('embedding_lengths', tf.norm(endpoints['emb_raw'], axis=1)) # Create the mem-mapped arrays in which we'll log all training detail in # addition to tensorboard, because tensorboard is annoying for detailed # inspection and actually discards data in histogram summaries. if args.detailed_logs: log_embs = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'embeddings'), dtype=np.float32, shape=(args.train_iterations, batch_size, args.embedding_dim)) log_loss = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'losses'), dtype=np.float32, shape=(args.train_iterations, batch_size)) log_fids = lb.create_or_resize_dat( os.path.join(args.experiment_root, 'fids'), dtype='S' + str(max_fid_len), shape=(args.train_iterations, batch_size)) # These are collected here before we add the optimizer, because depending # on the optimizer, it might add extra slots, which are also global # variables, with the exact same prefix. model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, body_prefix) # Define the optimizer and the learning-rate schedule. # Unfortunately, we get NaNs if we don't handle no-decay separately. global_step = tf.Variable(0, name='global_step', trainable=False) if 0 <= args.decay_start_iteration < args.train_iterations: learning_rate = tf.train.exponential_decay( args.learning_rate, tf.maximum(0, global_step - args.decay_start_iteration), args.train_iterations - args.decay_start_iteration, 0.001) else: learning_rate = args.learning_rate tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # Feel free to try others! # optimizer = tf.train.AdadeltaOptimizer(learning_rate) # Update_ops are used to update batchnorm stats. with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(tf.add( loss_mean, tf.add(recLoss, tf.add(recLoss1, tf.add(recLoss2, recLossl)))), global_step=global_step) # Define a saver for the complete model. checkpoint_saver = tf.train.Saver(max_to_keep=0) with tf.Session() as sess: if args.resume: # In case we're resuming, simply load the full checkpoint to init. last_checkpoint = tf.train.latest_checkpoint(args.experiment_root) log.info('Restoring from checkpoint: {}'.format(last_checkpoint)) checkpoint_saver.restore(sess, last_checkpoint) else: # But if we're starting from scratch, we may need to load some # variables from the pre-trained weights, and random init others. sess.run(tf.global_variables_initializer()) if args.initial_checkpoint is not None: saver = tf.train.Saver(model_variables) saver.restore(sess, args.initial_checkpoint) # In any case, we also store this initialization as a checkpoint, # such that we could run exactly reproduceable experiments. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=0) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.experiment_root, sess.graph) start_step = sess.run(global_step) log.info('Starting training from iteration {}.'.format(start_step)) # Finally, here comes the main-loop. This `Uninterrupt` is a handy # utility such that an iteration still finishes on Ctrl+C and we can # stop the training cleanly. with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u: for i in range(start_step, args.train_iterations): # Compute gradients, update weights, store logs! start_time = time.time() _, summary, step, b_prec_at_k, b_embs, b_loss, b_fids ,b_rec, b_rec1= \ sess.run([train_op, merged_summary, global_step, prec_at_k, endpoints['emb'], losses, fids,recLoss, recLoss1]) elapsed_time = time.time() - start_time # Compute the iteration speed and add it to the summary. # We did observe some weird spikes that we couldn't track down. summary2 = tf.Summary() summary2.value.add(tag='secs_per_iter', simple_value=elapsed_time) summary_writer.add_summary(summary2, step) summary_writer.add_summary(summary, step) if args.detailed_logs: log_embs[i], log_loss[i], log_fids[ i] = b_embs, b_loss, b_fids # Do a huge print out of the current progress. seconds_todo = (args.train_iterations - step) * elapsed_time log.info( 'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ' 'recLoss: {:.3f} batch-p@{}: {:.2%}, ETA: {} ({:.2f}s/it)'. format(step, float(np.min(b_loss)), float(np.mean(b_loss)), float(np.max(b_loss)), b_rec, args.batch_k - 1, float(b_prec_at_k), timedelta(seconds=int(seconds_todo)), elapsed_time)) sys.stdout.flush() sys.stderr.flush() # Save a checkpoint of training every so often. if (args.checkpoint_frequency > 0 and step % args.checkpoint_frequency == 0): checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step) # Stop the main-loop at the end of the step, if requested. if u.interrupted: log.info("Interrupted on request!") break # Store one final checkpoint. This might be redundant, but it is crucial # in case intermediate storing was disabled and it saves a checkpoint # when the process was interrupted. checkpoint_saver.save(sess, os.path.join(args.experiment_root, 'checkpoint'), global_step=step)
def main(): net = Model(n_feature) optimizer = torch.optim.Adam(net.parameters(), weight_decay=weight_decay) session = Session(net, optimizer) device = session.device clock = session.clock logger = session.logger net.to(device) logger.info(net) # prepare data data_loader_train, data_loader_test, data_train, data_test = load_dataset(data_set=data_set) cost = torch.nn.CrossEntropyLoss() while True: clock.tock() if clock.epoch > n_epochs: break logger.info("Epoch {}/{}".format(clock.epoch, n_epochs)) logger.info("-" * 10) train_loss_arcface, train_loss_ce, train_correct = 0.0, 0.0, 0.0 train_correct_cls, train_num_cls = [0] * 10, [0] * 10 for idx, data in enumerate(data_loader_train): X_train, y_train = data X_train, y_train = Variable(X_train).to(device), Variable(y_train).to(device) net.train() outputs = net(X_train) _, pred = torch.max(outputs.data, 1) for i in range(10): index = y_train == i pred_i = pred[index] label_i = y_train[index].data train_num_cls[i] += len(pred_i) train_correct_cls[i] += torch.sum(pred_i == label_i).item() optimizer.zero_grad() outputs_2 = am_softmax(outputs, y_train, scale, margin) loss_arcface = cost(outputs_2, y_train) loss_ce = cost(scale * outputs, y_train) loss_arcface.backward() optimizer.step() step_correct = torch.sum(pred == y_train.data).item() train_loss_arcface += loss_arcface.item() train_loss_ce += loss_ce.item() train_correct += step_correct if idx % 10 == 0: # update for every 10 step session.update_curv_state('train_step', step_correct/len(y_train), loss_arcface.item()) if idx % 100 == 0: # print train info logger.info("step: {}, train arcface loss: {:.4f}, ce loss: {:.4f}, train acc: {:.4f}".format(idx, loss_arcface.item(), loss_ce.item(), step_correct / len(y_train))) clock.tick() test_loss, test_correct = 0.0, 0.0 test_correct_cls, test_num_cls = [0] * 10, [0] * 10 for data in data_loader_test: X_test, y_test = data X_test, y_test = Variable(X_test).to(device), Variable(y_test).to(device) net.eval() outputs = net(X_test) _, pred = torch.max(outputs.data, 1) for i in range(10): idx = y_test == i pred_i = pred[idx] label_i = y_test[idx].data test_num_cls[i] += len(pred_i) test_correct_cls[i] += torch.sum(pred_i == label_i).item() test_correct += torch.sum(pred == y_test.data).item() test_loss += cost(scale*outputs, y_test).item() train_acc = train_correct / len(data_train) train_loss_arcface = 64 * train_loss_arcface / len(data_train) train_acc_cls = np.array(train_correct_cls) / np.array(train_num_cls) assert np.sum(np.array(train_num_cls)) == len(data_train) assert np.sum(np.array(train_correct_cls)) == train_correct test_acc = test_correct /len(data_test) test_loss = 64 * test_loss / len(data_test) test_acc_cls = np.array(test_correct_cls) / np.array(test_num_cls) assert np.sum(np.array(test_num_cls)) == len(data_test) assert np.sum(np.array(test_correct_cls)) == test_correct session.update_best_state(test_acc) session.update_curv_state('train_epoch', train_acc, train_loss_arcface, train_acc_cls) session.update_curv_state('val_epoch', test_acc, test_loss, test_acc_cls) logger.info("Loss is:{:.4f}, Train Accuracy is:{:.2f}%, Test Accuracy is:{:.2f}%, {}".format( train_loss_arcface, 100 * train_acc, 100 * test_acc, session.best_state)) logger.info(', '.join([ '{:.4f}'.format(x) for x in train_acc_cls])) logger.info(', '.join(['{:.4f}'.format(x) for x in test_acc_cls])) if clock.epoch in [5, 20, 50, 100]: session.save_checkpoint('epoch-{}'.format(clock.epoch)) session.save_checkpoint('latest') session.end() print('drawing curve') draw_curve(session.curv_stat, session.log_curv_dir, data_set) if n_feature == 2: print('drawing featue') draw_feature(os.path.join(session.log_model_dir, 'best-accuracy'), 1, data_set)
def main(): # my_devices = tf.config.experimental.list_physical_devices(device_type='CPU') # tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU') # # To find out which devices your operations and tensors are assigned to # tf.debugging.set_log_device_placement(True) args = parser.parse_args(args=[]) show_all_parameters(args) if not args.train_set: parser.print_help() print("You didn't specify the 'train_set' argument!") sys.exit(1) if not args.image_root: parser.print_help() print("You didn't specify the 'image_root' argument!") sys.exit(1) pids, fids = common.load_dataset(args.train_set, args.image_root) unique_pids = np.unique(pids) dataset = tf.data.Dataset.from_tensor_slices(unique_pids) dataset = dataset.shuffle(len(unique_pids)) # Take the dataset size equal to a multiple of the batch-size, so that # we don't get overlap at the end of each epoch. dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p) dataset = dataset.repeat(None) # Repeat indefinitely. # For every PID, get K images. dataset = dataset.map(lambda pid: sample_k_fids_for_pid( pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k)) # Ungroup/flatten the batches dataset = dataset.unbatch() # Convert filenames to actual image tensors. net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) dataset = dataset.map(lambda fid, pid: common.fid_to_image( fid, pid, image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size)) if args.flip_augment: dataset = dataset.map(lambda im, fid, pid: (tf.image.random_flip_left_right(im), fid, pid)) if args.crop_augment: dataset = dataset.map(lambda im, fid, pid: (tf.image.random_crop( im, net_input_size + (3, )), fid, pid)) # Group the data into PK batches. batch_size = args.batch_p * args.batch_k dataset = dataset.batch(batch_size) dataset = dataset.prefetch(1) dataiter = iter(dataset) model = Trinet(args.embedding_dim) lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( args.learning_rate, args.train_iterations - args.decay_start_iteration, 0.001) optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule) writer = tf.summary.create_file_writer(args.experiment_root) ckpt = tf.train.Checkpoint(step=tf.Variable(0), optimizer=optimizer, net=model) manager = tf.train.CheckpointManager(ckpt, args.experiment_root, max_to_keep=10) if args.resume: ckpt.restore(manager.latest_checkpoint) for epoch in range(args.train_iterations): # for images,fids,pids in dataset: images, fids, pids = next(dataiter) with tf.GradientTape() as tape: emb = model(images) dists = loss.cdist(emb, emb) losses, top1, prec, topksame, negdist, posdist = loss.batch_hard( dists, pids, args.margin, args.batch_k) lossavg = tf.reduce_mean(losses) lossnp = losses.numpy() with writer.as_default(): tf.summary.scalar("loss", lossavg, step=epoch) tf.summary.scalar('batch_top1', top1, step=epoch) tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec, step=epoch) tf.summary.histogram('losses', losses, step=epoch) tf.summary.histogram('embedding_dists', dists, step=epoch) tf.summary.histogram('embedding_pos_dists', negdist, step=epoch) tf.summary.histogram('embedding_neg_dists', posdist, step=epoch) print('iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, ' ' batch-p@{}: {:.2%}'.format(epoch, float(np.min(lossnp)), float(np.mean(lossnp)), float(np.max(lossnp)), args.batch_k - 1, float(prec))) grad = tape.gradient(lossavg, model.trainable_variables) optimizer.apply_gradients(zip(grad, model.trainable_variables)) ckpt.step.assign_add(1) if epoch % args.checkpoint_frequency == 0: manager.save()
def main(unused_argv): del unused_argv # Load Config config_name = FLAGS.config config_module = importlib.import_module(configs_module_prefix + '.%s' % config_name) config = config_module.config model_uid = common.get_model_uid(config_name, FLAGS.exp_uid) batch_size = config['batch_size'] # Load dataset dataset = common.load_dataset(config) save_path = dataset.save_path train_data = dataset.train_data attr_train = dataset.attr_train eval_data = dataset.eval_data attr_eval = dataset.attr_eval # Make the directory save_dir = os.path.join(save_path, model_uid) best_dir = os.path.join(save_dir, 'best') tf.gfile.MakeDirs(save_dir) tf.gfile.MakeDirs(best_dir) tf.logging.info('Save Dir: %s', save_dir) np.random.seed(FLAGS.random_seed) # We use `N` in variable name to emphasis its being the Number of something. N_train = train_data.shape[0] # pylint:disable=invalid-name N_eval = eval_data.shape[0] # pylint:disable=invalid-name # Load Model tf.reset_default_graph() sess = tf.Session() m = model_dataspace.Model(config, name=model_uid) _ = m() # noqa # Create summaries tf.summary.scalar('Train_Loss', m.vae_loss) tf.summary.scalar('Mean_Recon_LL', m.mean_recons) tf.summary.scalar('Mean_KL', m.mean_KL) scalar_summaries = tf.summary.merge_all() x_mean_, x_ = m.x_mean, m.x if common.dataset_is_mnist_family(config['dataset']): x_mean_ = tf.reshape(x_mean_, [-1, MNIST_SIZE, MNIST_SIZE, 1]) x_ = tf.reshape(x_, [-1, MNIST_SIZE, MNIST_SIZE, 1]) x_mean_summary = tf.summary.image( 'Reconstruction', nn.tf_batch_image(x_mean_), max_outputs=1) x_summary = tf.summary.image('Original', nn.tf_batch_image(x_), max_outputs=1) sample_summary = tf.summary.image( 'Sample', nn.tf_batch_image(x_mean_), max_outputs=1) # Summary writers train_writer = tf.summary.FileWriter(save_dir + '/vae_train', sess.graph) eval_writer = tf.summary.FileWriter(save_dir + '/vae_eval', sess.graph) # Initialize sess.run(tf.global_variables_initializer()) i_start = 0 running_N_eval = 30 # pylint:disable=invalid-name traces = { 'i': [], 'i_pred': [], 'loss': [], 'loss_eval': [], } best_eval_loss = np.inf vae_lr_ = np.logspace(np.log10(FLAGS.lr), np.log10(1e-6), FLAGS.n_iters) # Train the VAE for i in range(i_start, FLAGS.n_iters): start = (i * batch_size) % N_train end = start + batch_size batch = train_data[start:end] labels = attr_train[start:end] # train op res = sess.run( [m.train_vae, m.vae_loss, m.mean_recons, m.mean_KL, scalar_summaries], { m.x: batch, m.vae_lr: vae_lr_[i], m.labels: labels, }) tf.logging.info('Iter: %d, Loss: %d', i, res[1]) train_writer.add_summary(res[-1], i) if i % FLAGS.n_iters_per_eval == 0: # write training reconstructions if batch.shape[0] == batch_size: res = sess.run([x_summary, x_mean_summary], { m.x: batch, m.labels: labels, }) train_writer.add_summary(res[0], i) train_writer.add_summary(res[1], i) # write sample reconstructions prior_sample = sess.run(m.prior_sample) res = sess.run([sample_summary], { m.q_z_sample: prior_sample, m.labels: labels, }) train_writer.add_summary(res[0], i) # write eval summaries start = (i * batch_size) % N_eval end = start + batch_size batch = eval_data[start:end] labels = attr_eval[start:end] if batch.shape[0] == batch_size: res_eval = sess.run([ m.vae_loss, m.mean_recons, m.mean_KL, scalar_summaries, x_summary, x_mean_summary ], { m.x: batch, m.labels: labels, }) traces['loss_eval'].append(res_eval[0]) eval_writer.add_summary(res_eval[-3], i) eval_writer.add_summary(res_eval[-2], i) eval_writer.add_summary(res_eval[-1], i) if i % FLAGS.n_iters_per_save == 0: smoothed_eval_loss = np.mean(traces['loss_eval'][-running_N_eval:]) if smoothed_eval_loss < best_eval_loss: # Save the best model best_eval_loss = smoothed_eval_loss save_name = os.path.join(best_dir, 'vae_best_%s.ckpt' % model_uid) tf.logging.info('SAVING BEST! %s Iter: %d', save_name, i) m.vae_saver.save(sess, save_name) with tf.gfile.Open(os.path.join(best_dir, 'best_ckpt_iters.txt'), 'w') as f: f.write('%d' % i)
# If arch is None and not holovae, we assume we want # imagenet scaling. Otherwise, the argument # --imagenet_scaling will define this. if args['arch'] is None and not args['use_holovae']: # Assume we're using imagenet pretrained print("arch==None and use_holovae==False, so use imagenet scaling...") imagenet_scaling = True else: if not args['imagenet_scaling']: imagenet_scaling = False else: print("imagenet_scaling==True, so use imagenet scaling...") imagenet_scaling = True if args['mode'] == 'eval_test': ds_train, ds_valid = load_dataset('blank', args['img_size']) else: ds_train, ds_valid = load_dataset(name=args['dataset'], img_size=args['img_size'], imagenet_scaling=imagenet_scaling) if args['subset_train'] is not None: # The subset is randomly sampled from the # training data, and changes depending on # the seed. indices = np.arange(0, args['subset_train']) rs = np.random.RandomState(args['seed']) rs.shuffle(indices) indices = indices[0:args['subset_train']] old_ds_train = ds_train ds_train = Subset(old_ds_train, indices=indices)
def main(): # Verify that parameters are set correctly. args = parser.parse_args() # Possibly auto-generate the output filename. if args.filename is None: basename = os.path.basename(args.dataset) args.filename = os.path.splitext(basename)[0] + '_embeddings.h5' args.filename = os.path.join(args.experiment_root, args.filename) # Load the args from the original experiment. args_file = os.path.join(args.experiment_root, 'args.json') if os.path.isfile(args_file): if not args.quiet: print('Loading args from {}.'.format(args_file)) with open(args_file, 'r') as f: args_resumed = json.load(f) # Add arguments from training. for key, value in args_resumed.items(): args.__dict__.setdefault(key, value) # A couple special-cases and sanity checks if (args_resumed['crop_augment']) == (args.crop_augment is None): print('WARNING: crop augmentation differs between training and ' 'evaluation.') args.image_root = args.image_root or args_resumed['image_root'] else: raise IOError('`args.json` could not be found in: {}'.format(args_file)) # Check a proper aggregator is provided if augmentation is used. if args.flip_augment or args.crop_augment == 'five': if args.aggregator is None: print('ERROR: Test time augmentation is performed but no aggregator' 'was specified.') exit(1) else: if args.aggregator is not None: print('ERROR: No test time augmentation that needs aggregating is ' 'performed but an aggregator was specified.') exit(1) if not args.quiet: print('Evaluating using the following parameters:') for key, value in sorted(vars(args).items()): print('{}: {}'.format(key, value)) # Load the data from the CSV file. _, data_fids = common.load_dataset(args.dataset, args.image_root) net_input_size = (args.net_input_height, args.net_input_width) pre_crop_size = (args.pre_crop_height, args.pre_crop_width) data_fid = data_fids[10] # Setup a tf Dataset containing all images. dataset = tf.data.Dataset.from_tensor_slices(data_fids) image = common.fid_to_image(data_fid,'dummy',image_root=args.image_root, image_size=pre_crop_size if args.crop_augment else net_input_size) # Convert filenames to actual image tensors. # dataset = dataset.map( # lambda fid: common.fid_to_image( # fid, 'dummy', image_root=args.image_root, # image_size=pre_crop_size if args.crop_augment else net_input_size), # num_parallel_calls=args.loading_threads) # Augment the data if specified by the arguments. # `modifiers` is a list of strings that keeps track of which augmentations # have been applied, so that a human can understand it later on. # modifiers = ['original'] # if args.flip_augment: # dataset = dataset.map(flip_augment) # dataset = dataset.apply(tf.contrib.data.unbatch()) # modifiers = [o + m for m in ['', '_flip'] for o in modifiers] # # if args.crop_augment == 'center': # dataset = dataset.map(lambda im, fid, pid: # (five_crops(im, net_input_size)[0], fid, pid)) # modifiers = [o + '_center' for o in modifiers] # elif args.crop_augment == 'five': # dataset = dataset.map(lambda im, fid, pid: # (tf.stack(five_crops(im, net_input_size)), [fid]*5, [pid]*5)) # dataset = dataset.apply(tf.contrib.data.unbatch()) # modifiers = [o + m for o in modifiers for m in [ # '_center', '_top_left', '_top_right', '_bottom_left', '_bottom_right']] # elif args.crop_augment == 'avgpool': # modifiers = [o + '_avgpool' for o in modifiers] # else: # modifiers = [o + '_resize' for o in modifiers] # Group it back into PK batches. # dataset = dataset.batch(args.batch_size) # # # Overlap producing and consuming. # dataset = dataset.prefetch(1) # # images, _, _ = dataset.make_one_shot_iterator().get_next() # Create the model and an embedding head. model = import_module('nets.' + args.model_name) head = import_module('heads.' + args.head_name) image = tf.reshape(image[0],[1,224,224,3]) endpoints, body_prefix = model.endpoints(image, is_training=False) with tf.name_scope('head'): endpoints = head.head(endpoints, args.embedding_dim, is_training=False) with tf.Session() as sess: checkpoint = os.path.join(args.experiment_root, args.checkpoint) tf.train.Saver().restore(sess,checkpoint) layer_name = ['Conv2d_1_pointwise', 'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise','Conv2d_13_pointwise'] feature1,feature2,feature3,feature4,feature5 = sess.run([endpoints[layer_name[0]], endpoints[layer_name[1]], endpoints[layer_name[2]], endpoints[layer_name[3]], endpoints[layer_name[4]], ]) features = [feature1,feature2,feature3,feature4,feature5] cols = 5 rows = 1 for layer,feature in zip(layer_name,features): # for feature in features: h = feature.shape[1] w = feature.shape[2] filter_show = cols img_grid = np.zeros((h*rows,w*cols)) for c in range(filter_show): f_r = math.ceil((c + 1) / cols) f_c = (c + 1) if f_r == 1 else (c + 1 - (f_r - 1) * cols) img_grid[(f_r - 1) * h:f_r * h, (f_c - 1) * w:f_c * w] = feature[0, :, :, c] plt.figure() plt.imshow(img_grid,aspect='equal',cmap='viridis') plt.grid(False) plt.title(layer, fontsize=16) plt.show()
from importlib import import_module from itertools import count import os import h5py import json import numpy as np from sklearn.metrics import average_precision_score import tensorflow as tf import common import loss EXP_DIR = "/home/hthieu/AICityChallenge2019/track2_experiments/260218_triplet-reid_pre-trained_resnet50_veri+small_training_set/" query_dataset = "data/track2_validate_query_v3.csv" gallery_dataset = "data/track2_validate_v3.csv" query_embeddings = os.path.join(EXP_DIR, "track2_validate_embedding.h5") gallery_embeddings = os.path.join(EXP_DIR, "track2_validate_query_embedding.h5") batch_size = 256 query_pids, query_fids, query_views = common.load_dataset(query_dataset, None) gallery_pids, gallery_fids, gallery_views = common.load_dataset( gallery_dataset, None) with h5py.File(query_embeddings, 'r') as f_query: query_embs = np.array(f_query['emb']) with h5py.File(gallery_embeddings, 'r') as f_gallery: gallery_embs = np.array(f_gallery['emb'])