def train(output_dir, mnist_location=constants.MNIST_LOCATION, training_len=constants.TRAINING_LEN, masks=None, presets=None, train_order_seed=None): """Train the MNIST model, possibly with presets and masks. Args: output_dir: The directory to which to write model logs and output. mnist_location: The location of the MNIST numpy npz file. training_len: How long to run the model. A tuple of two values. The first value is the unit of measure (either "epochs" or "iterations") and the second is the number of units for which to train. masks: The masks, if any, used to prune weights. Masks can come in one of four forms: * A dictionary of numpy arrays. Each dictionary key is the name of the corresponding tensor that is to be masked out. Each value is a numpy array containing the masks (1 for including a weight, 0 for excluding). * The string name of a directory containing one file for each mask (in the form of bedrock.save_restore). * A list of strings paths and dictionaries representing several masks. The mask used for training is the union of the pruned networks represented by these masks. * None, meaning the network should not be pruned. presets: The initial weights for the network, if any. Presets can come in any of the non-list forms mentioned for masks; each numpy array stores the desired initializations. train_order_seed: The random seed, if any, to be used to determine the order in which training examples are shuffled before being presented to the network. """ # Retrieve previous information, if any. masks = save_restore.standardize(masks, union.union) presets = save_restore.standardize(presets) # Create the dataset and model. dataset = dataset_mnist.Dataset(mnist_location, train_order_seed=train_order_seed) inputs, labels = dataset.placeholders model = model_fc.ModelFc(constants.HYPERPARAMETERS, inputs, labels, presets=presets, masks=masks) # Train. params = { 'test_interval': 100, 'save_summaries': True, 'save_network': True } trainer.train(tf.Session(), dataset, model, constants.OPTIMIZER_FN, training_len, output_dir, **params)
def main(): for trial in range(1, 21): mnist_experiment = Experiment(trial) experiment.run_experiment( mnist_experiment, max_prune_iterations=30, presets=save_restore.standardize(None))
def main(): task_runner = TaskRunner() task = lambda train_len, trial: experiment.run_experiment( Experiment(train_len, trial), max_prune_iterations=30, presets=save_restore.standardize(None)) for train_len in [500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500]: for trial in range(1, 10): task_runner.do_task(task, train_len, trial)
def train( output_dir, mnist_location=constants.MNIST_LOCATION, training_len=constants.TRAINING_LEN, iterations=5, # 30, experiment_name='wgan_generator', presets=None, permute_labels=False, train_order_seed=None): """Perform the lottery ticket experiment. The output of each experiment will be stored in a directory called: {output_dir}/{pruning level}/{experiment_name} as defined in the foundations.paths module. Args: output_dir: Parent directory for all output files. mnist_location: The path to the NPZ file containing MNIST. training_len: How long to train on each iteration. iterations: How many iterative pruning steps to perform. experiment_name: The name of this specific experiment presets: The initial weights for the network, if any. Presets can come in one of three forms: * A dictionary of numpy arrays. Each dictionary key is the name of the corresponding tensor that is to be initialized. Each value is a numpy array containing the initializations. * The string name of a directory containing one file for each set of weights that is to be initialized (in the form of foundations.save_restore). * None, meaning the network should be randomly initialized. permute_labels: Whether to permute the labels on the dataset. train_order_seed: The random seed, if any, to be used to determine the order in which training examples are shuffled before being presented to the network. """ # Define model and dataset functions. def make_dataset(): return dataset_mnist.DatasetMnist(mnist_location, permute_labels=permute_labels, train_order_seed=train_order_seed) make_model = functools.partial(model_wgan.ModelWgan, constants.HYPERPARAMETERS) # Define a training function. def train_model(sess, level, dataset, model): params = { 'test_interval': 100, 'save_summaries': True, 'save_network': True, } return trainer_wgan.train(sess, dataset, model, constants.OPTIMIZER_FN, training_len, output_dir=paths.run(output_dir, level, experiment_name), **params) # Define a pruning function. prune_masks = functools.partial(pruning.prune_by_percent, constants.GENERATOR_PRUNE_PERCENTS) # Run the experiment. experiment.experiment(make_dataset, make_model, train_model, prune_masks, iterations, presets=save_restore.standardize(presets))
def train(output_dir, iterations, conv_layers, experiment_name, training_len=constants.TRAINING_LEN, location=constants.DATASET_LOCATION, presets=None, permute_labels=False, train_order_seed=None): """Perform the lottery ticket experiment. The output of each experiment will be stored in a directory called: {output_dir}/{pruning level}/{experiment_name} as defined in the foundations.paths module. Args: output_dir: Parent directory for all output files. location: The path to the NPZ file containing dataset. training_len: How long to train on each iteration. iterations: How many iterative pruning steps to perform. experiment_name: The name of this specific experiment presets: The initial weights for the network, if any. Presets can come in one of three forms: * A dictionary of numpy arrays. Each dictionary key is the name of the corresponding tensor that is to be initialized. Each value is a numpy array containing the initializations. * The string name of a directory containing one file for each set of weights that is to be initialized (in the form of foundations.save_restore). * None, meaning the network should be randomly initialized. permute_labels: Whether to permute the labels on the dataset. train_order_seed: The random seed, if any, to be used to determine the order in which training examples are shuffled before being presented to the network. """ # Define model and dataset functions. def make_dataset(): return dataset.Dataset( location, permute_labels=permute_labels, train_order_seed=train_order_seed) make_model = functools.partial(model.Model, constants.HYPERPARAMETERS) # Define a training function. def train_model(_sess, _level, _dataset, _model): params = { 'test_interval': 100, 'save_summaries': True, 'save_network': True, } return trainer.train( _sess, _dataset, _model, constants.OPTIMIZER_FN, training_len, output_dir=paths.run(output_dir, _level, experiment_name), **params) # Define a pruning function. prune_masks = functools.partial(pruning.prune_by_percent, constants.PRUNE_PERCENTS) # Run the experiment t_accuracy, v_loss = experiment.experiment( make_dataset, make_model, train_model, prune_masks, iterations, conv_layers, presets=save_restore.standardize(presets)) for k, x in t_accuracy.items(): plt.plot(np.arange(0, constants.TRAINING_LEN[1], constants.TRAINING_LEN[1]%100), x, linewidth = 0.8, label = k) plt.legend() plt.xticks(np.arange(0, constants.TRAINING_LEN[1], 500)) plt.grid() plt.savefig("experminets_graphs/results.png")
def train(output_dir, mnist_location=constants.MNIST_LOCATION, training_len=constants.TRAINING_LEN, masks=None, initialization_distribution=None, same_sign=None): """Perform the reinitialization experiment. Using the masks from a previous run of the lottery ticket experiment, train a new, randomly reinitialized network. At most one of masks_location and masks_dictionary should be set. If both are None, then no masks are used. At most one of same_sign_location and same_sign_dictionary should be set. If both are None, then the same sign initialization strategy is not used. Args: output_dir: The directory to which the output should be written. mnist_location: The path to the NPZ file containing MNIST. training_len: How long to train the network. masks: The masks, if any, used to prune weights. Masks can come in one of three forms: * A dictionary of numpy arrays. Each dictionary key is the name of the corresponding tensor that is to be masked out. Each value is a numpy array containing the masks (1 for including a weight, 0 for excluding). * The string name of a directory containing one file for each mask (in the form of bedrock.save_restore). * None, meaning the network should not be pruned. initialization_distribution: The distribution from which weights are sampled If the argument is None, the weights are samples from the default distribution. If the argument is a string, it is treated as the name of a directory whose filenams are layer names and whose entries are one-dimensional numpy arrays of weights. The weights for each layer are randomly sampled from these arrays. If the argument is anything else, it is treated as a dictionary whose keys are layer names and whose values are numpy arrays as described above. same_sign: Whether to ensure each weight is initialized to the same sign as the weight in the original network. Only applies when initialization is not None. If this argument is not None, then it contains the previous network weights that are used to determine the signs to which the new network should be initialized. This argument can be provided as a dictionary or string path in the same fashion as masks. """ masks = save_restore.standardize(masks) prev_weights = save_restore.standardize(same_sign) if initialization_distribution is None: presets = None else: initialization_distribution = save_restore.maybe_restore( initialization_distribution) # The preset weights should be randomly sampled from the values of # initialization. They should be the same shape as the masks. presets = {} for k, mask in masks.items(): init = initialization_distribution[k] # Weights have the same sign as those in the original networks. if prev_weights: positive = np.random.choice(init[init > 0], mask.shape) negative = np.random.choice(init[init < 0], mask.shape) presets[k] = np.where(prev_weights > 0, positive, negative) # Weights are randomly sampled. else: presets[k] = np.random.choice(init, mask.shape) train_mnist.train( output_dir=output_dir, mnist_location=mnist_location, training_len=training_len, presets=presets, masks=masks)