def params_v8(verbose=True, path_to_checkpoints=""): """ Returns params dict for v8 architecture :param verbose: Outputs information on model configuration :param path_to_checkpoints: Path to parent of checkpoints directory (include '/'!) :return: Params dict for DeepSphere model """ params = dict() params['dir_name'] = path_to_checkpoints + "flask101-101-v8" # Types of layers. params[ 'conv'] = 'chebyshev5' # Graph convolution: chebyshev5 or monomials. params['pool'] = 'max' # Pooling: max or average. params[ 'activation'] = 'relu' # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc. params[ 'statistics'] = 'mean' # Statistics (for invariance): None, mean, var, meanvar, hist. # Architecture. params['F'] = [32, 32, 64, 64, 64, 64, 32, 32] # Graph convolutional layers: number of feature maps. params['K'] = [5, 5, 5, 5, 5, 5, 5, 5] # Polynomial orders. params['batch_norm'] = [False] * 8 # Batch normalization. params['M'] = [2] # Fully connected layers: output dimensionalities. params['input_channel'] = 1 # Two channels (spherical maps) per sample. # Pooling. nsides = [ NSIDE, NSIDE // 2, NSIDE // 4, NSIDE // 8, NSIDE // 16, NSIDE // 32, NSIDE // 64, NSIDE // 128, NSIDE // 256 ] params['nsides'] = nsides params['indexes'] = utils.nside2indexes(nsides, ORDER) # params['batch_norm_full'] = [] # Regularization (to prevent over-fitting). params[ 'regularization'] = 0 # Amount of L2 regularization over the weights # (will be divided by the number of weights). params['dropout'] = 1 # Percentage of neurons to keep. # Training. params['num_epochs'] = 20 # Number of passes through the training data. params[ 'batch_size'] = 32 # Constant quantity of information (#pixels) per step (invariant to sample size). # Optimization: learning rate schedule and optimizer. params['scheduler'] = lambda step: 1e-4 params['optimizer'] = lambda lr: tf.train.AdamOptimizer( lr, beta1=0.9, beta2=0.999, epsilon=1e-8) params['loss'] = 'custom2' # Regression loss. # Number of model evaluations during training (influence training time). params['eval_frequency'] = 60 if verbose: print('#sides: {}'.format(nsides)) print('#pixels: {}'.format([(nside // ORDER)**2 for nside in nsides])) # Number of pixels on the full sphere: 12 * nsides**2. print('#samples per batch: {}'.format(params['batch_size'])) print('=> #pixels per batch (input): {:,}'.format( params['batch_size'] * (NSIDE // ORDER)**2)) print('=> #pixels for training (input): {:,}'.format( params['num_epochs'] * 422 * (NSIDE // ORDER)**2)) return params
def params_vdata1(exp_name, input_channels, nmaps, nfilters, verbose=True, num_epochs=20, learning_rate=1e-4, decay_factor=0.999, order=ORDER, batch_size=16): """ Returns params dict for vdata1 type architectures :param batch_size: :param nfilters: :param exp_name: :param order: :param decay_factor: :param nmaps: :param input_channels: :param learning_rate: Constant learning rate to use during training :param num_epochs: Number of epochs for training the model :param verbose: Outputs information on model configuration :return: Params dict for DeepSphere model """ params = dict() params['dir_name'] = "flaskv2-vdata1-{}".format(exp_name) # Types of layers. params[ 'conv'] = 'chebyshev5' # Graph convolution: chebyshev5 or monomials. params['pool'] = 'max' # Pooling: max or average. params[ 'activation'] = 'relu' # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc. params[ 'statistics'] = None # Statistics (for invariance): None, mean, var, meanvar, hist. # Architecture. params['F'] = [nfilters ] * 8 # Graph convolutional layers: number of feature maps. params['K'] = [5, 5, 5, 5, 5, 5, 5, 5] # Polynomial orders. params['batch_norm'] = [True] * 7 + [False] # Batch normalization. params['M'] = [1] # Fully connected layers: output dimensionalities. params[ 'input_channel'] = input_channels # Two channels (spherical maps) per sample. # Pooling. nsides = [ NSIDE, NSIDE // 2, NSIDE // 4, NSIDE // 8, NSIDE // 16, NSIDE // 32, NSIDE // 64, NSIDE // 128, NSIDE // 256 ] params['nsides'] = nsides params['indexes'] = utils.nside2indexes(nsides, order) # params['batch_norm_full'] = [] # Regularization (to prevent over-fitting). params[ 'regularization'] = 0 # Amount of L2 regularization over the weights # (will be divided by the number of weights). params['dropout'] = 0.8 # Percentage of neurons to keep. # Training. params[ 'num_epochs'] = num_epochs # Number of passes through the training data. params[ 'batch_size'] = batch_size # Constant quantity of information (#pixels) per step (invariant to sample size). # Optimization: learning rate schedule and optimizer. params['scheduler'] = lambda step: tf.train.exponential_decay( learning_rate, step, decay_steps=1, decay_rate=decay_factor) # params['scheduler'] = lambda step: learning_rate params['optimizer'] = lambda lr: tf.train.AdamOptimizer( lr, beta1=0.9, beta2=0.999, epsilon=1e-8) params['loss'] = 'l1' # Regression loss. # Number of model evaluations during training (influence training time). params['eval_frequency'] = (12 * order * order * nmaps / batch_size) / 3 # Thrice per epoch if verbose: print('#sides: {}'.format(nsides)) print('#pixels: {}'.format([(nside // order)**2 for nside in nsides])) # Number of pixels on the full sphere: 12 * nsides**2. print('#samples per batch: {}'.format(params['batch_size'])) print('=> #pixels per batch (input): {:,}'.format( params['batch_size'] * (NSIDE // order)**2)) print('=> #pixels for training (input): {:,}'.format( params['num_epochs'] * 12 * order * order * nmaps * (NSIDE // order)**2)) return params
def params_v3(exp_name, convtype='chebyshev5', pooltype='max', nmaps=16, activation_func='relu', stat_layer=None, input_channels=1, gc_depth=8, nfilters=64, const_k=5, var_k=None, filters=None, batch_norm_output=False, var_batch_norm=None, fc_layers=[], num_outputs=1, reg_factor=0, dropout_rate=0.8, verbose=True, num_epochs=1, learning_rate=1e-4, decay_factor=0.999, decay_freq=1, decay_staircase=False, loss_func="l1", nside=NSIDE, nsides=None, order=ORDER, batch_size=16): """ Returns params dict for v3 architectures :param convtype: Type of graph convolution performed ("chebyshev5" or "monomials"). :param num_outputs: 1 for just sigma_8, 2 for sigma_8 and predicted log-variance q :param nsides: List of NSIDES for graph convolutional layers. Length = gc_depth :param nside: NSIDE of input maps. Should be 1024. :param loss_func: Choice of loss function ("l1", "custom1", "custom2", "l2"). Must be implemented in DeepSphere codebase. :param decay_staircase: If true, performs integer division in lr decay, decaying every decay_freq steps. :param decay_freq: If decay_staircase=true, acts to stagger decays. Otherwise, brings down decay factor. :param dropout_rate: Percentage of neurons kept. :param reg_factor: Multiplier for L2 Norm of weights. :param fc_layers: List of sizes of hidden fully connected layers (excluding the output layer). :param var_batch_norm: List of True/False values turning batch normalization on/off for each GC layer. :param batch_norm_output: Batch normalization value for the output layer (True/False). Ununsed if var_batch_norm is not None. :param var_k: List of GC orders K for each layer. Length = gc_depth. :param const_k: Constant K value for each GC layer. Unused if var_k is not None. :param stat_layer: Type of statistical layer applied for invariance. Can be None, mean, meanvar, var, or hist. :param pooltype: Type of pooling used for GC layers (max or avg). :param activation_func: Type of activation function applied for all GC and FC layers (relu, leaky_relu, elu, etc.). :param gc_depth: Number of GC layers in the network. Fixed at eight if NSIDE=1024 and pooling by two every layer. :param filters: List of # of filters for each GC layer. Length = gc_depth. :param batch_size: Batch size for training the network. Ideally a power of two. :param nfilters: Constant # of filters for each GC layer. Unused if filters is not None. :param exp_name: Experiment ID to define and track directories. :param order: HEALPIX order for partial-sky maps. Fixed at 2. :param decay_factor: Decay factor by which learning rate gets multiplied every decay_freq steps depending on decay_staircase. :param nmaps: Number of full-sky maps from which the training data is being generated. :param input_channels: Number of input partial-sky maps. 1 for convergence, 2 for shear, +1 for counts-in-cells. :param learning_rate: Initial learning rate to use during training :param num_epochs: Number of epochs for training the model :param verbose: Outputs information on model config :return: Params dict for DeepSphere model trained on FLASK v2 data without galaxy-matter bias. Doesn't allow for sophisticated regularization or loss functions. """ params = dict() params['dir_name'] = "flaskv3-{}".format(exp_name) # Types of layers. params['conv'] = convtype # Graph convolution: chebyshev5 or monomials. params['pool'] = pooltype # Pooling: max or average. params[ 'activation'] = activation_func # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc. params[ 'statistics'] = stat_layer # Statistics (for invariance): None, mean, var, meanvar, hist. # Architecture. if filters is None: filters = [nfilters] * gc_depth if var_k is None: var_k = [const_k] * gc_depth if var_batch_norm is None: var_batch_norm = [True] * (gc_depth - 1) + [batch_norm_output] if nsides is None: nsides = [nside // (2**i) for i in range(gc_depth + 1)] params[ 'F'] = filters # Graph convolutional layers: number of feature maps. params['K'] = var_k # Polynomial orders. params['batch_norm'] = var_batch_norm # Batch normalization. params['M'] = fc_layers + [ num_outputs ] # Fully connected layers: output dimensionalities. params[ 'input_channel'] = input_channels # Two channels (spherical maps) per sample. # Pooling. params['nsides'] = nsides params['indexes'] = utils.nside2indexes(nsides, order) # params['batch_norm_full'] = [] # Regularization (to prevent over-fitting). params[ 'regularization'] = reg_factor # Amount of L2 regularization over the weights # (will be divided by the number of weights). params['dropout'] = dropout_rate # Percentage of neurons to keep. # Training. params[ 'num_epochs'] = num_epochs # Number of passes through the training data. params[ 'batch_size'] = batch_size # Constant quantity of information (#pixels) per step (invariant to sample size). # Optimization: learning rate schedule and optimizer. params['scheduler'] = lambda step: tf.train.exponential_decay( learning_rate, step, decay_steps=decay_freq, decay_rate=decay_factor, staircase=decay_staircase) # params['scheduler'] = lambda step: learning_rate params['optimizer'] = lambda lr: tf.train.AdamOptimizer( lr, beta1=0.9, beta2=0.999, epsilon=1e-8) params['loss'] = loss_func # Regression loss. # Number of model evaluations during training (influence training time). params['eval_frequency'] = (12 * order * order * nmaps / batch_size) / 3 # Thrice per epoch if verbose: print('#sides: {}'.format(nsides)) print('#pixels: {}'.format([(nside // order)**2 for nside in nsides])) # Number of pixels on the full sphere: 12 * nsides**2. print('#samples per batch: {}'.format(params['batch_size'])) print('=> #pixels per batch (input): {:,}'.format( params['batch_size'] * (NSIDE // order)**2)) print('=> #pixels for training (input): {:,}'.format( params['num_epochs'] * 12 * order * order * nmaps * (NSIDE // order)**2)) return params
def get_params(ntrain, EXP_NAME, order, Nside, architecture="FCN", verbose=True): """Parameters for the cgcnn and cnn2d defined in deepsphere/models.py""" n_classes = 2 params = dict() params['dir_name'] = EXP_NAME # Types of layers. params[ 'conv'] = 'chebyshev5' # Graph convolution: chebyshev5 or monomials. params['pool'] = 'max' # Pooling: max or average. params[ 'activation'] = 'relu' # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc. params[ 'statistics'] = 'mean' # Statistics (for invariance): None, mean, var, meanvar, hist. # Architecture. params['F'] = [16, 32, 64, 64, 64, n_classes ] # Graph convolutional layers: number of feature maps. params['K'] = [5] * 6 # Polynomial orders. params['batch_norm'] = [True] * 6 # Batch normalization. params['M'] = [] # Fully connected layers: output dimensionalities. # Pooling. nsides = [ Nside, Nside // 2, Nside // 4, Nside // 8, Nside // 16, Nside // 32, Nside // 32 ] params['nsides'] = nsides params['indexes'] = utils.nside2indexes(nsides, order) # params['batch_norm_full'] = [] if architecture == "CNN": # Classical convolutional neural network. # Replace the last graph convolution and global average pooling by a fully connected layer. # That is, change the classifier while keeping the feature extractor. params['F'] = params['F'][:-1] params['K'] = params['K'][:-1] params['batch_norm'] = params['batch_norm'][:-1] params['nsides'] = params['nsides'][:-1] params['indexes'] = params['indexes'][:-1] params['statistics'] = None params['M'] = [n_classes] elif architecture == "FCN": # Fully convolutional neural network. pass elif architecture == 'FNN': # Fully connected neural network. raise NotImplementedError('This is not working!') params['F'] = [] params['K'] = [] params['batch_norm'] = [] params['indexes'] = [] params['statistics'] = None params['M'] = [ 128 * order * order, 1024 * order, 1024 * order, n_classes ] params['batch_norm_full'] = [True] * 3 params['input_shape'] = (Nside // order)**2 elif architecture == 'CNN-2d-big': params['F'] = params['F'][:-1] params['K'] = [[5, 5]] * 5 params['p'] = [2, 2, 2, 2, 2] params['input_shape'] = [1024 // order, 1024 // order] params['batch_norm'] = params['batch_norm'][:-1] params['statistics'] = None params['M'] = [n_classes] del params['indexes'] del params['nsides'] del params['conv'] elif architecture == 'FCN-2d-big': params['K'] = [[5, 5]] * 6 params['p'] = [2, 2, 2, 2, 2, 1] params['input_shape'] = [1024 // order, 1024 // order] del params['indexes'] del params['nsides'] del params['conv'] elif architecture == 'CNN-2d': params['F'] = [8, 16, 32, 32, 16] params['K'] = [[5, 5]] * 5 params['p'] = [2, 2, 2, 2, 2] params['input_shape'] = [1024 // order, 1024 // order] params['batch_norm'] = params['batch_norm'][:-1] params['statistics'] = None params['M'] = [n_classes] del params['indexes'] del params['nsides'] del params['conv'] elif architecture == 'FCN-2d': params['F'] = [8, 16, 32, 32, 16, 2] params['K'] = [[5, 5]] * 6 params['p'] = [2, 2, 2, 2, 2, 1] params['input_shape'] = [1024 // order, 1024 // order] del params['indexes'] del params['nsides'] del params['conv'] else: raise ValueError('Unknown architecture {}.'.format(architecture)) # Regularization (to prevent over-fitting). params[ 'regularization'] = 0 # Amount of L2 regularization over the weights (will be divided by the number of weights). if '2d' in architecture: params['regularization'] = 3 # elif architecture == 'FNN': # print('Use regularization new') # params['regularization'] = 10 # Amount of L2 regularization over the weights (will be divided by the number of weights). # params['dropout'] = 1 # Percentage of neurons to keep. params['dropout'] = 1 # Percentage of neurons to keep. # Training. params['num_epochs'] = 80 # Number of passes through the training data. params[ 'batch_size'] = 16 * order**2 # Constant quantity of information (#pixels) per step (invariant to sample size). # Optimization: learning rate schedule and optimizer. params['scheduler'] = lambda step: tf.train.exponential_decay( 2e-4, step, decay_steps=1, decay_rate=0.999) params['optimizer'] = lambda lr: tf.train.AdamOptimizer( lr, beta1=0.9, beta2=0.999, epsilon=1e-8) # Number of model evaluations during training (influence training time). n_evaluations = 200 params['eval_frequency'] = int(params['num_epochs'] * ntrain / params['batch_size'] / n_evaluations) if verbose: print('#sides: {}'.format(nsides)) print('#pixels: {}'.format([(nside // order)**2 for nside in nsides])) # Number of pixels on the full sphere: 12 * nsides**2. print('#samples per batch: {}'.format(params['batch_size'])) print('=> #pixels per batch (input): {:,}'.format( params['batch_size'] * (Nside // order)**2)) print('=> #pixels for training (input): {:,}'.format( params['num_epochs'] * ntrain * (Nside // order)**2)) n_steps = params['num_epochs'] * ntrain // params['batch_size'] lr = [ params['scheduler'](step).eval(session=tf.Session()) for step in [0, n_steps] ] print( 'Learning rate will start at {:.1e} and finish at {:.1e}.'.format( *lr)) return params