class LocalDensity(flowws.Stage): """Compute the local density of particles in the system""" ARGS = [ Arg('histogram_bins', '-b', int, default=64, help='Number of bins to use in the histogram plot'), Arg('r_max', '-r', float, required=True, help='Maximum radial distance'), Arg('diameter', '-d', float, default=0., help='Smoothing diameter to use in the density calculation'), ] def run(self, scope, storage): """Compute and provide the local density""" compute = freud.density.LocalDensity( self.arguments['r_max'], self.arguments['diameter']) box = freud.box.Box.from_box(scope['box'], scope.get('dimensions', 3)) compute.compute((box, scope['position']), scope['position']) self.density = scope['local_density'] = compute.density scope.setdefault('color_scalars', []).append('local_density') scope.setdefault('visuals', []).append(self) def draw_matplotlib(self, figure): ax = figure.add_subplot(111) ax.hist(self.density, bins=self.arguments['histogram_bins'], density=True) ax.set_xlabel('Density') ax.set_ylabel('Probability')
class InitializeTF(flowws.Stage): """Initialize tensorflow, enabling memory growth for GPUs.""" ARGS = [ Arg('jit', '-j', bool, True, help='If True, enable JIT compilation'), Arg('gpu', '-g', bool, True, help='If False, disable GPUs'), Arg('memory_growth', '-m', bool, True, help='If True, enable gradual memory growth'), ] def run(self, scope, storage): tf.config.optimizer.set_jit(self.arguments['jit']) if not self.arguments['gpu']: tf.config.set_visible_devices([], 'GPU') gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: if self.arguments['memory_growth']: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e)
class WikiText2(flowws.Stage): """Train a model on the wikitext-2 dataset""" ARGS = [ Arg('sequence_length', '-l', int, 64, help='Maximum sequence length of the network'), Arg('batch_size', '-b', int, 32, help='Batch size for training') ] def run(self, scope, storage): encoder = wikitext.build_wikitext_bpe_encoder() scope['vocabulary_size'] = encoder.vocabulary_size() sequence_length = self.arguments['sequence_length'] def x_y_for_dataset(dataset_name): fat_sample = training_data_to_dense_samples( dataset_name, encoder, sequence_length) _x = fat_sample[:, :sequence_length] _y = np.expand_dims(fat_sample[:, 1:], axis=-1) return _x, _y scope['training_data'] = x_y_for_dataset(wikitext.TRAINING_SET_NAME) (scope['x_train'], scope['y_train']) = scope['training_data'] scope['validation_data'] = x_y_for_dataset( wikitext.VALIDATION_SET_NAME) scope['test_data'] = x_y_for_dataset(wikitext.TEST_SET_NAME) scope['loss'] = 'sparse_categorical_crossentropy' scope['sequence_length'] = sequence_length scope['batch_size'] = self.arguments['batch_size'] scope['encoder'] = lambda x: [n for (n, token) in encoder(x)] scope['decoder'] = lambda x: encoder.decode(x)
class NeuralPotentialDropout(flowws.Stage): ARGS = [ Arg('reset', '-r', bool, False, help='If given, first reset (clear) dropout configuration'), Arg('mu', '-m', float, help='Neural potential to use for layers'), Arg('spatial_mask', '-s', bool, False, help='If True, learn a spatial mask for spatial dropout'), ] def run(self, scope, storage): mu = self.arguments['mu'] if self.arguments['reset']: scope.pop('dropout_class', None) scope.pop('dropout_spatial2d_class', None) layer_dropout = functools.partial(LearnedDropout, mu) sequence_dropout = functools.partial( LearnedSequenceDropout, mu, spatial_mask=self.arguments['spatial_mask']) spatial_dropout = functools.partial( LearnedSpatialDropout2D, mu, spatial_mask=self.arguments['spatial_mask']) scope['dropout_class'] = layer_dropout scope['dropout_sequence_class'] = sequence_dropout scope['dropout_spatial2d_class'] = spatial_dropout
class Tensorboard(flowws.Stage): """Specify a tensorboard dump callback.""" ARGS = [ Arg('histogram_period', None, int, 0, help='Frequency to dump histogram data'), Arg('write_graph', '-g', bool, True, help='Write the computational graph'), Arg('profile_batch', '-b', int, 2, help='Batch index to profile (0 to disable)'), ] def run(self, scope, storage): callback = callbacks.TensorBoard( histogram_freq=self.arguments['histogram_period'], write_graph=self.arguments['write_graph'], profile_batch=self.arguments['profile_batch']) scope.setdefault('callbacks', []).append(callback)
class DEMInteraction(flowws.Stage): """Specify that DEM interactions should be included in future MD stages""" ARGS = [ Arg('reset', '-r', bool, False, help='Clear previously-defined DEM interactions beforehand'), Arg('type', '-t', str, required=True, help='Interaction class name'), ] def run(self, scope, storage): """Registers this object to provide a DEM force compute in future MD stages""" callbacks = scope.setdefault('callbacks', collections.defaultdict(list)) if self.arguments['reset']: pre_run_callbacks = [c for c in callbacks['pre_run'] if not isinstance(c, DEMInteraction)] callbacks['pre_run'] = pre_run_callbacks callbacks['pre_run'].append(self) def __call__(self, scope, storage, context): """Callback to be performed before each run command. Initializes a DEM pair potential interaction based on per-type shape information. """ interaction_type = self.arguments['type'] nlist = hoomd.md.nlist.tree() system = scope['system'] dimensions = scope.get('dimensions', 3) try: type_shapes = scope['type_shapes'] except KeyError: msg = ('Shape information has not been set for DEM interactions. ' 'Use a ShapeDefinition or similar step beforehand.') raise WorkflowError(msg) if interaction_type == 'wca': radii = [shape.get('rounding_radius', 0) for shape in type_shapes] assert np.isclose(min(radii), max(radii)), 'WCA requires identical rounding radii for all shapes' radius = radii[0] if radius <= 0: logger.warning('Non-rounded shapes given, using a rounding radius of 0.5') radius = .5 potential = hoomd.dem.pair.WCA(nlist, radius) for (name, shape) in zip(system.particles.types, type_shapes): vertices = shape['vertices'] if dimensions == 2: potential.setParams(name, vertices) else: (vertices, faces) = hoomd.dem.utils.convexHull(shape['vertices']) potential.setParams(name, vertices, faces) else: raise NotImplementedError( 'Unknown Interaction type {}'.format(interaction_type))
class CenterSpaceGroup(flowws.Stage): """Attempt to automatically detect the space group of the system and center it accordingly.""" ARGS = [ Arg('minimum_distance', '-d', float, .9, help= 'Precision with which to merge points transformed by space group transformations' ), Arg('use_types', '-t', bool, True, help='Use type information when centering the unit cell'), ] def run(self, scope, storage): """Detect the space group and center the system.""" box = scope['box'] boxmat = plato.math.box_to_matrix(box) fractions = plato.math.make_fractions(box, scope['position']) types = scope['type'] if not self.arguments['use_types']: types = np.zeros_like(types) spglib_cell = (boxmat, fractions, types) try: self.spg_info = identify_spacegroup( spglib_cell, max_iterations=64, minimum_distance=self.arguments['minimum_distance']) except ValueError: # didn't work; don't display things return (box, fractions, types) = standardize_cell(spglib_cell, self.spg_info['common'][1]) scope['box'] = box scope['position'] = plato.math.fractions_to_coordinates(box, fractions) scope['type'] = types scope.setdefault('visuals', []).append(self) def draw_matplotlib(self, figure): ax = figure.add_subplot() histogram = self.spg_info['histogram'] keys = list(histogram) counts = [histogram[k] for k in keys] xs = np.arange(len(keys)) ax.bar(xs, counts) ax.set_title('Space group prevalence') ax.set_xticks(xs) ax.set_xticklabels(keys) ax.set_ylabel('Count')
class Colormap(flowws.Stage): """Access and use matplotlib colormaps on scalar quantities. This module emits a `color` value, calculated using a given scalar argument and matplotlib colormap name. Valid scalars quantities can be provided to this module by saving them in the scope and adding their name to the `color_scalars` list. """ ARGS = [ Arg('colormap_name', '-c', str, 'viridis', help='Name of the matplotlib colormap to use'), Arg('argument', '-a', str, help='Name of the value to map to colors'), Arg('range', '-r', (float, float), help='Minimum and maximum values of the scalar to be mapped'), ] def run(self, scope, storage): """Generate an array of colors using the given color scalars.""" color_scalars = scope.setdefault('color_scalars', []) if 'type' not in color_scalars and 'type' in scope: color_scalars.append('type') argument = self.arguments.get('argument', None) if (argument is not None and argument not in color_scalars and argument in scope): color_scalars.append(argument) self.arg_specifications['argument'].valid_values = color_scalars if argument is None: self.arguments['argument'] = color_scalars[0] self.arg_specifications['colormap_name'].valid_values = \ sorted(matplotlib.cm.cmap_d.keys()) N = len(scope['position']) try: values = scope[self.arguments['argument']].copy() except KeyError: values = np.full(N, 0.5) normalize = None if self.arguments.get('range', None): (vmin, vmax) = self.arguments['range'] normalize = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax) cmap = matplotlib.cm.get_cmap(self.arguments['colormap_name']) smap = matplotlib.cm.ScalarMappable(normalize, cmap) scope['color'] = smap.to_rgba(values)
class Diffraction(flowws.Stage): """Compute a 3D diffraction pattern of the system and display its slice or projection. This stage computes a 3D histogram of the system based on the given periodic system box and particle coordinates and performs the FFT in 3D. Either a slice or full projection through the Fourier space is displayed with the current system orientation. **Note:** This module should be considered experimental in terms of stability for the time being; the inputs and outputs may change drastically in the future, or the module may be removed entirely. """ ARGS = [ Arg('bin_count', '-b', int, default=128, help='Number of bins to use in the x, y, and z directions'), Arg('projection', '-p', bool, False, help= 'If True, project the diffraction pattern all the way through fourier space' ), Arg('min_value', None, float, 0, help='Minimum value of intensity to clip to'), Arg('max_value', None, float, 1, help='Maximum value of intensity to clip to'), Arg('sigma', None, float, 0, help='Lengthscale of blurring the FFT'), ] def run(self, scope, storage): """Prepare to display the diffraction pattern""" self.positions = scope['position'] self.box = scope['box'] scope.setdefault('visuals', []).append(self) scope.setdefault('visual_link_rotation', []).append(self) def draw_plato(self): prim = DiffractionPrimitive(positions=self.positions, box=self.box) prim.size = self.arguments['bin_count'] prim.sigma = self.arguments['sigma'] prim.vmin = self.arguments['min_value'] prim.vmax = self.arguments['max_value'] prim.projection = self.arguments['projection'] prim.cmap = DEFAULT_COLORMAP return plato.draw.Scene(prim, size=(1, 1), pixel_scale=800)
class LSTMLM(flowws.Stage): ARGS = [ Arg('embedding_dimensions', '-e', int, 32, help='Embedding dimensionality for inputs'), Arg('layer_widths', '-w', [int], help='Number of units for each LSTM layer'), Arg('inner_dropout', None, float, help='Dropout rate to use inside LSTM module'), Arg('inter_dropout', None, float, help='Dropout rate to use after each LSTM module'), Arg('sequence_dropout', None, bool, False, help='If True, use sequence-element dropout by default'), ] def run(self, scope, storage): vocabulary_size = scope['vocabulary_size'] sequence_length = scope['sequence_length'] default_dropout = (keras.layers.SpatialDropout1D if self.arguments['sequence_dropout'] else keras.layers.Dropout) dropout_cls = scope.get('dropout_sequence_class', default_dropout) inputs = last = keras.layers.Input((sequence_length, ), dtype=tf.int32) last = keras.layers.Embedding( vocabulary_size, self.arguments['embedding_dimensions'])(inputs) for i, w in enumerate(self.arguments['layer_widths']): layer = keras.layers.LSTM(w, return_sequences=True, dropout=self.arguments.get( 'inner_dropout', 0)) last = layer(last) last = keras.layers.BatchNormalization()(last) if 'inter_dropout' in self.arguments: last = dropout_cls(self.arguments['inter_dropout'])(last) last = keras.layers.Dense(vocabulary_size, activation='softmax')(last) scope['input_symbol'] = inputs scope['output'] = last scope['loss'] = 'sparse_categorical_crossentropy'
class Dshemuchadse2021OPPInteraction(Dshemuchadse2021InteractionBase): """Specify a new interaction potential from the paper "Moving beyond the constraints of chemistry via crystal structure discovery with isotropic multiwell pair potentials" to include in future MD stages These interactions are taken from the methods description in the paper (Proceedings of the National Academy of Sciences May 2021, 118 (21); DOI 10.1073/pnas.2024034118). This module implements the oscillatory pair potential, consisting of a short-range repulsion and a cosine term that scales with r^-3. The potential is rescaled such that the global minimum is -1 epsilon_0. """ ARGS = [ Arg('reset', '-r', bool, False, help='Disable previously-defined interactions'), Arg('k', '-k', float, help='Interaction parameter k'), Arg('phi', '-p', float, help='Interaction parameter phi'), Arg('width', '-w', int, 1000, help='Number of points at which to evaluate the tabulated potential'), Arg('r_min', None, float, .5, help='Minimum distance at which to evaluate the tabulated potential'), ] def run(self, scope, storage): """Registers this object to provide a force compute in future MD stages""" callbacks = scope.setdefault('callbacks', collections.defaultdict(list)) if self.arguments['reset']: pre_run_callbacks = [c for c in callbacks['pre_run'] if not isinstance(c, Dshemuchadse2021OPPInteraction)] callbacks['pre_run'] = pre_run_callbacks return self.potential_kwargs = dict(k=self.arguments['k'], phi=self.arguments['phi']) self.rmax, self.potential_kwargs['scale'] = self.find_potential_parameters() callbacks['pre_run'].append(self) scope.setdefault('visuals', []).append(self) @staticmethod def force(r, k, phi, scale=1.): arg = k*(r - 1.) + phi result = 15*r**(-16) + (k*r*np.sin(arg) + 3*np.cos(arg))*r**(-4) return result*scale @staticmethod def potential(r, k, phi, scale=1.): result = r**(-15) + np.cos(k*(r - 1.) + phi)*r**(-3) return result*scale
class CIFAR100(flowws.Stage): """Use the CIFAR100 dataset from keras.""" ARGS = [ Arg('coarse_labels', '-c', bool, False, help= 'If True, use 20 coarse-grained labels instead of 100 fine-grained labels' ), ] def run(self, scope, storage): label_mode = 'coarse' if self.arguments['coarse_labels'] else 'fine' (x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data( label_mode=label_mode) num_classes = len(np.unique(y_train)) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 input_shape = x_train[0].shape scope['x_train'] = x_train scope['x_test'] = x_test scope['y_train'] = y_train scope['y_test'] = y_test scope['num_classes'] = num_classes
class MobileNetV2(flowws.Stage): """Use the MobileNetV2 architecture as provided by keras.""" ARGS = [ Arg('dropout', '-d', float, 0, help='Dropout probability to use (if any)'), ] def run(self, scope, storage): try: input_shape = scope['x_train'][0].shape except KeyError: input_shape = next(scope['train_generator'])[0][0].shape num_classes = scope['num_classes'] Dropout = scope.get('dropout_spatial2d_class', keras.layers.Dropout) model = MobileNetModel(classes=num_classes, weights=None, input_shape=input_shape) if self.arguments['dropout']: clonefun_ = functools.partial(clonefun, Dropout=Dropout, rate=self.arguments['dropout']) model = keras.models.clone_model(model, clone_function=clonefun_) scope['model'] = model scope['loss'] = 'sparse_categorical_crossentropy' scope.setdefault('metrics', []).append('accuracy')
class Pyriodic(flowws.Stage): """Browse structures available in :std:doc:`pyriodic<pyriodic:index>`. This module provides all the structures available in the pyriodic default database (which uses all available pyriodic libraries installed on the system). Systems are resized to a minimum of the given size and noise may be added before the rest of the workflow is run. """ ARGS = [ Arg('structure', '-s', str, required=True, valid_values=VALID_STRUCTURES, help='Structure to display'), Arg('size', '-n', int, default=1, help='Minimum size of the system'), Arg('noise', None, float, default=0, help='Gaussian noise to apply to each position'), ] def run(self, scope, storage): """Load the given structure into the scope""" structure = self._get_structure(self.arguments['structure'], self.arguments['size'], self.arguments['noise']) scope['position'] = structure.positions scope['type'] = structure.types scope['box'] = structure.box @functools.lru_cache(maxsize=1) def _get_structure(self, name, size, noise): query = 'select structure from unit_cells where name = ? limit 1' for (structure, ) in pyriodic.db.query(query, (name, )): pass structure = structure.rescale_shortest_distance(1) structure = structure.replicate_upto(size) if noise: structure = structure.add_gaussian_noise(self.arguments['noise']) return structure
class ImagenetDirectory(flowws.Stage): """Load ImageNet images from a specified directory.""" ARGS = [ Arg('base', '-b', str, help='Base directory storing images'), Arg('validation_fraction', '-v', float, .3, help='Fraction of files to be used in validation set'), Arg('augmentations', '-a', [str], help='Names of augmentations to perform on each image (use "null" for none)'), Arg('batch_size', None, int, 32, help='Batch size for training and validation'), Arg('train_epoch_scaling', None, float, 1., help='Factor to scale the number of batches considered to be part of an epoch by (train set)'), Arg('val_epoch_scaling', None, float, 1., help='Factor to scale the number of batches considered to be part of an epoch by (validation set)'), Arg('test_epoch_scaling', None, float, 1., help='Factor to scale the number of batches considered to be part of an epoch by (test set)'), ] def run(self, scope, storage): train_dir = os.path.join(self.arguments['base'], 'train') test_dir = os.path.join(self.arguments['base'], 'val') label_names = list(sorted(os.listdir(train_dir))) label_map = {label: i for (i, label) in enumerate(label_names)} train_files, val_files = split_filenames( train_dir, label_names, self.arguments['validation_fraction']) test_files, _ = split_filenames(test_dir, label_names, -1) augmentation_names = (self.arguments['augmentations'] or ['scale', 'crop', 'maybe_flip', 'keras_preprocess']) augmentations = [AUGMENTATIONS[name] for name in augmentation_names] train_generator = batch_generator( train_dir, train_files, label_map, self.arguments['batch_size'], augmentations) val_generator = batch_generator( train_dir, val_files, label_map, self.arguments['batch_size'], augmentations) test_generator = batch_generator( test_dir, test_files, label_map, self.arguments['batch_size'], augmentations) steps_per_epoch = int(len(train_files)//self.arguments['batch_size']* self.arguments['train_epoch_scaling']) validation_steps = (len(val_files)//self.arguments['batch_size']* self.arguments['val_epoch_scaling']) test_steps = (len(test_files)//self.arguments['batch_size']* self.arguments['test_epoch_scaling']) scope['label_names'] = label_names scope['label_map'] = label_map scope['train_generator'] = train_generator scope['generator_train_steps'] = steps_per_epoch scope['validation_generator'] = val_generator scope['generator_val_steps'] = validation_steps scope['test_generator'] = test_generator scope['generator_test_steps'] = test_steps scope['loss'] = 'sparse_categorical_crossentropy' scope['num_classes'] = len(label_names)
class TextMetrics(flowws.Stage): ARGS = [ Arg('metrics', '-m', [str], [], help='Metrics to compute and save'), ] def run(self, scope, storage): metrics = scope.setdefault('metrics', []) for m in self.arguments['metrics']: metrics.append(METRIC_MAP.get(m, m))
class Steinhardt(flowws.Stage): """Compute the Steinhardt order parameter of particles in the system""" ARGS = [ Arg('histogram_bins', '-b', int, default=64, help='Number of bins to use in the histogram plot'), Arg('l', '-l', int, default=6, help='Spherical harmonic degree of the order parameter'), Arg('r_max', '-r', float, help='Maximum radial distance to consider for neighbors (if given)'), Arg('num_neighbors', '-n', int, help='Number of neighbors to use; overrules r_max if given'), Arg('r_guess', None, float, 2, help='Characteristic distance for finding num_neighbors neighboring particles'), ] def run(self, scope, storage): """Compute and provide the Steinhardt order parameter""" compute = freud.order.Steinhardt(self.arguments['l']) box = freud.box.Box.from_box(scope['box'], scope.get('dimensions', 3)) query_options = dict(exclude_ii=True) if 'num_neighbors' in self.arguments: query_options['num_neighbors'] = self.arguments['num_neighbors'] query_options['r_guess'] = self.arguments['r_guess'] else: query_options['r_max'] = self.arguments.get('r_max', None) compute.compute((box, scope['position']), query_options) self.steinhardt = compute.ql name = 'steinhardt_q{}'.format(self.arguments['l']) scope[name] = self.steinhardt scope.setdefault('color_scalars', []).append(name) scope.setdefault('visuals', []).append(self) def draw_matplotlib(self, figure): ax = figure.add_subplot(111) ax.hist(self.steinhardt, bins=self.arguments['histogram_bins'], density=True) ax.set_xlabel('$Q_{{{}}}$'.format(self.arguments['l'])) ax.set_ylabel('Probability')
class ViewQt(flowws.Stage): """Provide an interactive view of the entire workflow using Qt. An interactive display window will be opened that displays visual results while allowing the arguments of all stages in the workflow to be modified. """ ARGS = [ Arg('controls', '-c', bool, True, help='Display controls'), ] def __init__(self, *args, **kwargs): self.workflow = None self._running_threads = None self._rerun_event = threading.Event() self._stage_event = threading.Event() self._exit_event = threading.Event() self._visual_queue = queue.Queue() self._scope_queue = queue.Queue() super().__init__(*args, **kwargs) def run(self, scope, storage): """Displays parameters and outputs for the workflow in a Qt window.""" self.workflow = scope['workflow'] scope['rerun_callback'] = self.rerun self._scope_queue.put((scope, storage)) if self._running_threads is None: our_sigint_handler = functools.partial(sigint_handler, self._exit_event) signal.signal(signal.SIGINT, our_sigint_handler) args = (scope, self.workflow, self._rerun_event, self._stage_event, self._exit_event, self._visual_queue, self.arguments['controls']) self._visual_queue.put(scope.get('visuals', [])) self._running_threads = rerun_thread = RerunThread(args=args) rerun_thread.start() app = ViewQtApp(self.workflow, self._rerun_event, self._stage_event, self._exit_event, self._visual_queue, self._scope_queue, self.arguments['controls'], []) app.exec_() rerun_thread.join() def rerun(self): self._rerun_event.set()
class MLP(flowws.Stage): """Specify a multilayer perceptron model.""" ARGS = [ Arg('hidden_widths', '-w', [int], [32], help='Number of nodes for each hidden layer'), Arg('activation', '-a', str, 'relu'), Arg('batch_norm', '-b', bool, False, help='Apply batch normalization before all hidden layers'), Arg('output_batch_norm', None, bool, False, help='Apply batch normalization after each hidden layer'), Arg( 'flatten', '-f', bool, False, ), Arg('dropout', '-d', float, 0, help='Apply a dropout layer with the given ' 'dropout rate after each hidden layer'), ] def run(self, scope, storage): input_shape = scope['x_train'][0].shape input_symbol = keras.layers.Input(shape=input_shape) Dropout = scope.get('dropout_class', keras.layers.Dropout) layers = [] if self.arguments['batch_norm']: layers.append(keras.layers.BatchNormalization()) if self.arguments['flatten']: layers.append(keras.layers.Flatten()) for w in self.arguments['hidden_widths']: layers.append( keras.layers.Dense(w, activation=self.arguments['activation'])) if self.arguments.get('output_batch_norm', False): layers.append(keras.layers.BatchNormalization()) if self.arguments['dropout']: layers.append(Dropout(self.arguments['dropout'])) scope['input_symbol'] = input_symbol scope['output'] = sequence(input_symbol, layers)
class Classifier(flowws.Stage): """Specify a simple classifier output.""" ARGS = [Arg('activation', default='softmax')] def run(self, scope, storage): layers = [] layers.append( keras.layers.Dense(scope['num_classes'], activation=self.arguments['activation'])) scope['output'] = sequence(scope['output'], layers) scope['loss'] = 'sparse_categorical_crossentropy' scope.setdefault('metrics', []).append('accuracy')
class SaveGarnett(flowws.Stage): """Save trajectory quantities using Garnett. This stage currently only saves an individual frame, but saving an entire trajectory is intended to work in the future. """ ARGS = [ Arg('filename', '-f', str, 'trajectory.sqlite', help='Name of file to save trajectory to'), ] def run(self, scope, storage): """Save trajectory quantities.""" filename = self.arguments['filename'] box = scope['box'] box = garnett.trajectory.Box(Lx=box[0], Ly=box[1], Lz=box[2], xy=box[3], xz=box[4], yz=box[5]) positions = scope['position'] if 'orientation' in scope: orientations = scope['orientation'] else: orientations = np.tile([(1, 0, 0, 0)], (len(positions), 1)) types = scope['type'] if 'type_names' not in scope: type_names = [chr(ord('A') + t) for t in sorted(set(types))] else: type_names = scope['type_names'] the_frame = FakeFrame(box, positions, orientations, type_names, types, None, None, len(positions)) mode = ('w' if any(filename.endswith(suf) for suf in ['.pos', '.cif']) else 'wb') with storage.open(self.arguments['filename'], mode=mode, on_filesystem=True) as f: garnett.write([the_frame], f.name)
class Center(flowws.Stage): """Center the system through periodic boundary conditions. This module modifies the positions of the system to have either the center of mass of the system or a single indicated particle at (0, 0, 0). """ ARGS = [ Arg('particle', '-p', int, -1, help= 'Particle index to center with (default: use center of mass of the system)' ), ] def run(self, scope, storage): """Center the system.""" box = scope.get('box', None) positions = scope['position'] self.arg_specifications['particle'].valid_values = flowws.Range( -1, len(positions), (True, False)) index = self.arguments['particle'] index = index if index >= 0 else None if index is not None: center_point = positions[index] positions -= center_point[np.newaxis] if box is not None: positions = wrap(box, positions) elif box is None: center_point = np.mean(positions, axis=0) positions -= center_point[np.newaxis] else: positions = center(box, positions) scope['position'] = positions
class NeuralPotentialController(flowws.Stage): ARGS = [ Arg('k_p', '-k', float, help='Proportional constant (in log-space)'), Arg('tau', '-t', float, help='Time constant for I-control (in log-space)'), Arg('setpoint', '-s', float, .5, help='Mean passthrough probability setpoint'), Arg('log_history', None, bool, False, help='If True, retain a history in-memory'), Arg('period', '-p', int, 1, help='Period (in batches) for the controller to run'), Arg('disable', '-d', bool, 0, help='If given, disable this callback for future Train invocations' ), ] def run(self, scope, storage): if self.arguments['disable']: callbacks = scope.setdefault('callbacks', []) new_callbacks = [ c for c in callbacks if not isinstance(c, NeuralPotentialControlCallback) ] scope['callbacks'] = new_callbacks return callback = NeuralPotentialControlCallback( self.arguments['k_p'], self.arguments['tau'], self.arguments['setpoint'], self.arguments['period'], self.arguments['log_history']) scope.setdefault('callbacks', []).append(callback)
class GPTModel(flowws.Stage): """Train a model on the wikitext-2 dataset""" ARGS = [ Arg('width', '-w', int, 64, help='Working width of the deep network'), Arg('depth', '-d', int, 6, help='Number of transformer blocks to use'), Arg('use_convolutions', '-c', bool, False, help='Use causal convolutions instead of position embeddings'), Arg('use_agglomeration', '-a', bool, False, help='Use agglomerative instead of full attention'), Arg('use_adaptive_computation', None, bool, False, help='Use adaptive computation time'), Arg('convolution_width', None, int, 8, help='Width of causal convolutions to use'), Arg('num_heads', '-n', int, 8, help='Number of attention/agglomerative heads to use'), Arg('print_summary', '-p', bool, False, help='Print a summary of the model before continuing'), Arg('dropout', None, float, .5, help='Dropout to use in transformer layers'), ] def run(self, scope, storage): vocabulary_size = scope['vocabulary_size'] sequence_length = scope['sequence_length'] kwargs = {} if self.arguments['use_adaptive_computation']: model = universal_transformer_gpt_model( sequence_length, vocabulary_size, self.arguments['width'], self.arguments['depth'], self.arguments['num_heads'], agglomerative_attention=self.arguments['use_agglomeration'], use_convolutions=self.arguments['use_convolutions'], use_coordinate_embeddings=(not self.arguments['use_convolutions']), convolution_width=self.arguments['convolution_width'], transformer_dropout=self.arguments['dropout'], **kwargs ) else: if 'dropout_sequence_class' in scope: kwargs['dropout_cls'] = scope['dropout_sequence_class'] model = vanilla_transformer_gpt_model( sequence_length, vocabulary_size, self.arguments['width'], self.arguments['depth'], self.arguments['num_heads'], agglomerative_attention=self.arguments['use_agglomeration'], use_convolutions=self.arguments['use_convolutions'], use_coordinate_embeddings=(not self.arguments['use_convolutions']), convolution_width=self.arguments['convolution_width'], transformer_dropout=self.arguments['dropout'], **kwargs ) if self.arguments['print_summary']: model.summary() scope['model'] = model
class StageForTesting(flowws.Stage): ARGS = [ Arg('required_value', required=True), Arg('defaulted_value', default='default'), ]
class GalileanModel(flowws.Stage): ARGS = [ Arg('steps', '-s', int, 10, help='Number of galilean steps to perform for each batch'), Arg('move_distance', '-m', float, 1e-3, help='Distance to move for each step'), Arg('log_move_distance', '-d', bool, False, help='If True, log the move distance'), Arg('tune_distance', '-t', bool, False, help= 'Auto-tune the move distance based on loss surface reflection rates' ), Arg('gradient_descent_rate', '-g', float, 0, help='Fraction of steps to use normal gradient descent on'), Arg('reduce_distance_period', None, int, 0, help= 'Patience (in epochs) for a distance reduction method like ReduceLROnPlateau' ), Arg('clear', '-c', bool, False, help='If given, remove the usage of a previous GalileanModel') ] def run(self, scope, storage): if self.arguments['clear']: scope.pop('custom_model_class') keep = lambda x: (not isinstance(x, (DistanceLogger, ReduceStepSizeOnPlateau))) scope['callbacks'] = list(filter(keep, scope.get('callbacks', []))) return timescale = 32 if self.arguments['tune_distance'] else 0 ModelFun = functools.partial( Model, galilean_steps=self.arguments['steps'], galilean_distance=self.arguments['move_distance'], galilean_batch_timescale=timescale, gradient_descent_rate=self.arguments['gradient_descent_rate'], ) scope['custom_model_class'] = ModelFun if self.arguments['log_move_distance']: scope.setdefault('callbacks', []).append(DistanceLogger()) if self.arguments['reduce_distance_period']: callback = ReduceStepSizeOnPlateau( patience=self.arguments['reduce_distance_period'], monitor='val_loss', factor=.75, verbose=True) scope.setdefault('callbacks', []).append(callback)
class PruneNeuralPotentialLayers(flowws.Stage): ARGS = [ Arg('summarize', None, bool, False, help='If True, print the model summary after pruning'), ] def run(self, scope, storage): model = scope['model'] parent_map = get_input_parent_map(model) to_skip, new_descriptions = [], {} for layer in find_learnable_dropout_layers(model): weights = layer.get_weights()[0] probas = tf.math.sigmoid(weights).numpy() sampled_mask = np.random.uniform(0, 1, size=probas.shape) < probas mask = np.where(sampled_mask)[0] to_skip.append(layer) Pruner.mask_children_downward(layer, mask, new_descriptions) Pruner.mask_parents_upward(layer, mask, new_descriptions, parent_map) new_layer_weights = {} nullified_indices = {layer: i for (i, layer) in enumerate(to_skip)} def clonefun(layer): result = None if layer in to_skip: name = 'nullified_learnable_dropout_{}'.format( nullified_indices[layer]) result = keras.layers.Lambda(identity, name=name) if layer in new_descriptions: desc = new_descriptions[layer] result = layer.__class__.from_config(desc.json) new_layer_weights[result] = desc.weights elif isinstance(layer, keras.models.Model): old_input_layer = keras.models.Model.layers.fget(layer)[0] new_input_tensor = None if old_input_layer in new_descriptions: new_input_desc = new_descriptions[old_input_layer] new_input_layer = old_input_layer.__class__.from_config( new_input_desc.json) new_input_tensor = new_input_layer.output result = keras.models.clone_model(layer, new_input_tensor, clone_function=clonefun) if result is not None: return result return layer.__class__.from_config(layer.get_config()) new_model = keras.models.clone_model(model, clone_function=clonefun) for (layer, weights) in new_layer_weights.items(): layer.set_weights(weights) if self.arguments['summarize']: new_model.summary() scope['model'] = new_model
class BasisSelection(flowws.Stage): """Select directions and distances to form the basis vectors for a unit cell. This stage produces two visuals: a bond orientational order diagram that can be used to select symmetric directions, and a "cylindrical RDF" that measures bonds along each given direction. Together, these can be used to select the direction and length of the three basis vectors for the unit cell. """ ARGS = [ Arg('orientations', '-d', [(float, float, float, float)], [], help= 'Quaternions specifying orientations for basis vectors (in the (0, 0, 1) direction)' ), Arg('r_max', '-r', float, 3, help= 'Maximum distance to consider for binds to select for the unit cell basis vectors' ), Arg('angle_tolerance', '-a', float, 5, help='Angle tolerance for selecting bonds (in degrees)'), Arg('rdf_bins', None, int, 128, help='Number of bins to use for cylindrical RDF'), Arg('x_direction', '-x', int, 0, help= 'Candidate direction to take as the x direction in the final unit cell' ), Arg('x_min', None, float, 0, help= 'Minimum distance to take bonds from the cylindrical RDF for the x basis vector' ), Arg('x_max', None, float, 1, help= 'Maximum distance to take bonds from the cylindrical RDF for the x basis vector' ), Arg('y_direction', '-y', int, 1, help= 'Candidate direction to take as the y direction in the final unit cell' ), Arg('y_min', None, float, 0, help= 'Minimum distance to take bonds from the cylindrical RDF for the y basis vector' ), Arg('y_max', None, float, 1, help= 'Maximum distance to take bonds from the cylindrical RDF for the y basis vector' ), Arg('z_direction', '-z', int, 2, help= 'Candidate direction to take as the z direction in the final unit cell' ), Arg('z_min', None, float, 0, help= 'Minimum distance to take bonds from the cylindrical RDF for the z basis vector' ), Arg('z_max', None, float, 1, help= 'Maximum distance to take bonds from the cylindrical RDF for the z basis vector' ), ] def __init__(self, *args, **kwargs): self.direction_visual = DirectionVisual() super().__init__(*args, **kwargs) def run(self, scope, storage): """Display the interactive direction- and distance-selection visuals.""" positions = scope['position'] types = scope['type'] box = freud.box.Box.from_box(scope['box']) aq = freud.AABBQuery(box, positions) args = dict(exclude_ii=True, mode='ball', r_max=self.arguments['r_max']) nlist = aq.query(positions, args).toNeighborList() bonds = positions[nlist.point_indices] - positions[ nlist.query_point_indices] bonds = box.wrap(bonds) angle_tol = self.arguments['angle_tolerance'] * np.pi / 180 R = np.sin(angle_tol) normalized_bonds = bonds / nlist.distances[:, np.newaxis] orientations = np.array(self.arguments['orientations']) orientations /= np.linalg.norm(orientations, axis=-1, keepdims=True) distances = np.linalg.norm(bonds, axis=-1) basis = np.eye(3) bond_filter = np.ones((bonds.shape[0], ), dtype=np.bool) target_quat = np.array((1., 0, 0, 0)) optimized_orientations = [] found_vectors = [] found_bonds = [] circle_positions = [] for q in orientations: qconj = plato.math.quatconj(q) rotated_bonds = plato.math.quatrot(q[np.newaxis, :], bonds) filt_thetas = np.arctan2( np.sqrt(rotated_bonds[:, 0]**2 + rotated_bonds[:, 1]**2), rotated_bonds[:, 2]) filt_thetas[filt_thetas > np.pi / 2] -= np.pi filt = np.abs(filt_thetas) < angle_tol filtered_bonds = rotated_bonds[filt] found_bonds.append(filtered_bonds) if filtered_bonds.shape[0]: (target_quat, mean) = find_best_orientation(filtered_bonds, q) else: target_quat = q qconj = plato.math.quatconj(target_quat) found_vectors.append(plato.math.quatrot(qconj, [0, 0, 1])) optimized_orientations.append(target_quat) circle = circle_patterns([(0, 0, 0)], [R], z=-1) circle = plato.math.quatrot(qconj, circle) circle_positions.append(circle) bond_filter *= np.logical_not(filt) self.direction_radii = [ np.linalg.norm(b, axis=-1) for b in found_bonds ] for i, key in enumerate('xyz'): direction_name = key + '_direction' min_name, max_name = key + '_min', key + '_max' direction_index = self.arguments[direction_name] r_min, r_max = self.arguments[min_name], self.arguments[max_name] r_min, r_max = sorted([r_min, r_max]) try: direction = found_vectors[direction_index] except IndexError: break candidate_distances = self.direction_radii[direction_index] filt = np.logical_and(candidate_distances >= r_min, candidate_distances < r_max) if np.any(filt): candidate_distances = candidate_distances[filt] distance = np.mean(candidate_distances) basis[:, i] = direction * distance if np.linalg.det(basis) < 0: basis[:, 0] *= -1 self.direction_visual.update(self.arguments['angle_tolerance'], circle_positions, bonds[bond_filter], target_quat.copy()) self.basis = scope['basis_vectors'] = basis scope.setdefault('visuals', []).append(self.direction_visual) scope['visuals'].append(self) scope.setdefault('visual_link_rotation', []).append(self.direction_visual) self.gui_actions = [ ('Select bonds', self._select_current_position), ('Undo last selection', self._undo_selection), ] def draw_matplotlib(self, figure): import matplotlib ax = figure.add_subplot() colors = matplotlib.rcParams['axes.prop_cycle'].by_key()['color'] for i, (rs, color) in enumerate(zip(self.direction_radii, colors)): (hist, _) = np.histogram(rs, bins=self.arguments['rdf_bins']) for key in 'xyz': direction_name = key + '_direction' if self.arguments[direction_name] != i: continue min_name, max_name = key + '_min', key + '_max' r_min, r_max = self.arguments[min_name], self.arguments[ max_name] r_min, r_max = sorted([r_min, r_max]) r_taken = np.linalg.norm(self.basis[:, 'xyz'.index(key)]) ax.vlines([r_min, r_max], 0, np.max(hist) * 1.05, linestyles='dashed', color=color) ax.vlines([r_taken], 0, np.max(hist) * 1.05, linestyles='solid', color=color) ax.hist(rs, bins=self.arguments['rdf_bins'], alpha=.5, label='Direction {}'.format(i), color=color) ax.legend() ax.set_xlabel('$R$') ax.set_ylabel('$Count(R)$') def _select_current_position(self, scope, storage): plato_scene = scope['visual_objects'][self.direction_visual] self.arguments['orientations'].append(plato_scene.rotation.copy()) if scope.get('rerun_callback', None) is not None: scope['rerun_callback']() def _undo_selection(self, scope, storage): if self.arguments['orientations']: self.arguments['orientations'].pop() if scope.get('rerun_callback', None) is not None: scope['rerun_callback']()
class GTAR(flowws.Stage): """Emit the contents of a :std:doc:`libgetar<libgetar:index>`-format file into the scope. The GTAR module outputs the records found in a getar-format file directly into the scope. It provides a notion of frames in a trajectory using the discretely-varying record with the most indices as the basis. """ ARGS = [ Arg('filename', '-i', str, required=True, help='Getar-format filename to open'), Arg('frame', '-f', int, 0, help='Frame to load'), Arg('loop_frames', type=bool, default=False, help= 'If True, loop the workflow over frames found in the trajectory file, beginning at the given frame' ), Arg('group', '-g', str, '', help='GTAR group to restrict results to'), ] def __init__(self, *args, **kwargs): self._cached_record_frames = {} self._gtar_traj = self._gtar_file = None self._looping = False super().__init__(*args, **kwargs) @functools.lru_cache(maxsize=1) def _get_traj(self, filename, storage): for handle in (self._gtar_traj, self._gtar_file): if handle is not None: handle.close() try: gtar_file = gtar_traj = None gtar_file = storage.open(self.arguments['filename'], 'rb', on_filesystem=True) gtar_traj = gtar.GTAR(gtar_file.name, 'r') self._gtar_file, self._gtar_traj = gtar_file, gtar_traj gtar_file = gtar_traj = None finally: for handle in (gtar_traj, gtar_file): if handle is not None: handle.close() return self._gtar_traj def run(self, scope, storage): """Load records found in a getar file into the scope.""" scope['filename'] = self.arguments['filename'] scope['frame'] = self.arguments['frame'] scope['cache_key'] = scope['filename'], scope['frame'] gtar_traj = self._get_traj(self.arguments['filename'], storage) self._cache_record_frames(gtar_traj, scope, storage, self.arguments['group']) if self.arguments['loop_frames'] and not self._looping: self._looping, self.arguments['loop_frames'] = True, False try: # loop over all frames except the last one frame_max = len(self._cached_frame_indices) frames = range(self.arguments['frame'], frame_max - 1) for frame in frames: self.arguments['frame'] = frame scope['workflow'].run() finally: self._looping, self.arguments['loop_frames'] = False, True # now run the last frame self.arguments['frame'] = max( 0, len(self._cached_frame_indices) - 1) recs = self._set_record_frames() for rec in recs: callback = functools.partial(gtar_traj.getRecord, rec, rec.getIndex()) scope.set_call(rec.getName(), callback) def _cache_record_frames(self, traj, scope, storage, group): self._cached_record_frames = {} for rec in traj.getRecordTypes(): # ignore per-bond records and so on, for now if rec.getGroup() != group: continue self._cached_record_frames[rec] = list( map(index_sort_key, traj.queryFrames(rec))) (_, self._cached_frame_indices) = max( (len(indices), indices) for (rec, indices) in self._cached_record_frames.items() if rec.getBehavior() == gtar.Behavior.Discrete) def _set_record_frames(self): frame = self.arguments['frame'] index_to_find = index_sort_key( self._cached_frame_indices[self.arguments['frame']][1]) self.arg_specifications['frame'].valid_values = flowws.Range( 0, len(self._cached_frame_indices), (True, False)) for (rec, indices) in self._cached_record_frames.items(): try: index = find_le(indices, index_to_find)[1] rec.setIndex(index) except ValueError: pass return self._cached_record_frames.keys()
class Train(flowws.Stage): ARGS = [ Arg('optimizer', '-o', str, 'adam', help='optimizer to use'), Arg('epochs', '-e', int, 2000, help='Max number of epochs'), Arg('batch_size', '-b', int, 256, help='Batch size'), Arg('validation_split', '-v', float, .3), Arg('early_stopping', type=int), Arg('reduce_lr', type=int), Arg('ring_count', type=int), Arg('ring_k', type=float, default=1), Arg('ring_eps', type=float), Arg('dump_filename', '-f', default='dump.tar'), Arg('dump_period', '-d', int), Arg('seed', '-s', int), Arg('verbose', None, bool, True, help='If True, print the training progress'), ] @staticmethod def ring_name_updater(layer, i): cfg = layer.get_config() cfg['name'] = cfg['name'] + '_ring{}'.format(i) return layer.__class__.from_config(cfg) def run(self, scope, storage): if 'seed' in self.arguments: s = self.arguments['seed'] random.seed(s) random.seed(random.randrange(2**32)) np.random.seed(random.randrange(2**32)) tf.random.set_seed(random.randrange(2**32)) model = keras.models.Model(scope['input_symbol'], scope['output']) if self.arguments.get('ring_count', None): models = [] for i in range(self.arguments['ring_count']): clone = functools.partial(self.ring_name_updater, i=i) models.append( keras.models.clone_model(model, scope['input_symbol'], clone)) final_output = K.sum([m.output for m in models], axis=0) final_output = K.softmax(final_output) model = keras.models.Model(scope['input_symbol'], final_output) for (left, right) in zip(models, np.roll(models, -1)): harmonic = lambda left=left, right=right: (.5 * self.arguments[ 'ring_k'] * sum( K.sum(K.square(l - r)) for (l, r) in zip( left.trainable_weights, right.trainable_weights))) model.add_loss(harmonic) scope['model'] = model for term in scope.get('extra_losses', []): model.add_loss(term) metrics = scope.get('metrics', []) model.compile(self.arguments['optimizer'], loss=scope['loss'], metrics=metrics) if self.arguments.get('ring_count', None) and self.arguments.get( 'ring_eps', None): print('randomizing ring weights') eps = self.arguments['ring_eps'] names = [l.name for l in model.layers if 'ring' in l.name] base_values = {} for name in names: base = re.sub(r'_ring\d+$', '', name) layer = model.get_layer(name) if base in base_values: for (value, tensor) in zip(base_values[base], layer.trainable_weights): new_value = value * np.random.normal( loc=1, scale=eps, size=value.shape) tensor.assign(value) else: base_values[base] = [ w.numpy() for w in layer.trainable_weights ] else: print('not randomizing ring weights') callbacks = scope.get('callbacks', []) verbose = self.arguments['verbose'] if tfa is not None and verbose: callbacks.append( tfa.callbacks.TQDMProgressBar(show_epoch_progress=False, update_per_second=1)) verbose = False if 'early_stopping' in self.arguments: callbacks.append( keras.callbacks.EarlyStopping( patience=self.arguments['early_stopping'], monitor='val_loss')) if 'reduce_lr' in self.arguments: callbacks.append( keras.callbacks.ReduceLROnPlateau( patience=self.arguments['reduce_lr'], monitor='val_loss', factor=.5, verbose=True)) with contextlib.ExitStack() as context_stack: if self.arguments.get('dump_period', None): modifiers = [ hashlib.sha1( json.dumps( scope['workflow'].to_JSON()).encode()).hexdigest() [:32] ] handle = context_stack.enter_context( storage.open(scope.get('dump_filename', 'dump.tar'), 'a', modifiers, on_filesystem=True)) cbk = keras_gtar.GTARLogger(handle.name, self.arguments['dump_period'], append=True, when='pre_epoch') callbacks.append(cbk) model.fit(scope['x_train'], scope['y_train'], verbose=verbose, epochs=self.arguments['epochs'], batch_size=self.arguments['batch_size'], validation_split=self.arguments['validation_split'], callbacks=callbacks)