def construct_merger(self, n_spatial_dims, n_channels, patch_shape, patch_cnn_spec, patch_mlp_spec, merge_mlp_spec, response_mlp_spec, batch_normalize, batch_normalize_patch, task_name, hyperparameters, **kwargs): # construct patch interpretation network patch_transforms = [] if task_name == "featurelevel_ucf101": n_channels = 512 + 4096 shape = self.cropper.output_shape else: if patch_cnn_spec == "pretrained": import pretrained patch_transforms.append( pretrained.get_patch_transform(**hyperparameters)) shape = patch_transforms[-1].get_dim("output") elif patch_cnn_spec: patch_transforms.append( masonry.construct_cnn( name="patch_cnn", layer_specs=patch_cnn_spec, input_shape=patch_shape, n_channels=n_channels, batch_normalize=batch_normalize_patch)) shape = patch_transforms[-1].get_dim("output") patch_transforms.append(bricks.FeedforwardFlattener(input_shape=shape)) if patch_mlp_spec: patch_transforms.append( masonry.construct_mlp( name="patch_mlp", hidden_dims=patch_mlp_spec, input_dim=patch_transforms[-1].output_dim, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize_patch)) self.patch_transform = bricks.FeedforwardSequence( [brick.apply for brick in patch_transforms], name="ffs") # construct theta interpretation network self.merge_mlp = masonry.construct_mlp( name="merge_mlp", input_dim=2 * n_spatial_dims, hidden_dims=merge_mlp_spec, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) self.response_mlp = masonry.construct_mlp( name="response_mlp", hidden_dims=response_mlp_spec, input_dim=self.patch_transform.output_dim + self.merge_mlp.output_dim, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) self.children.extend( [self.patch_transform, self.merge_mlp, self.response_mlp])
def construct_model(task, patch_shape, initargs, n_channels, n_spatial_dims, hidden_dim, batch_normalize, hyperparameters, patch_cnn_spec=None, patch_mlp_spec=None, prefork_area_mlp_spec=[], postmerge_area_mlp_spec=[], response_mlp_spec=[], **kwargs): patch_transforms = [] if patch_cnn_spec: patch_transforms.append(masonry.construct_cnn( name="patch_cnn", layer_specs=patch_cnn_spec, input_shape=patch_shape, n_channels=n_channels, batch_normalize=batch_normalize).apply) shape = patch_transforms[-1].brick.get_dim("output") else: shape = (n_channels,) + tuple(patch_shape) patch_transforms.append(masonry.FeedforwardFlattener(input_shape=shape).apply) if patch_mlp_spec: patch_transforms.append(masonry.construct_mlp( name="patch_mlp", hidden_dims=patch_mlp_spec, input_dim=patch_transforms[-1].brick.output_dim, batch_normalize=batch_normalize, initargs=initargs).apply) patch_transform = FeedforwardSequence(patch_transforms, name="ffs") prefork_area_transform = masonry.construct_mlp( name="prefork_area_mlp", input_dim=hidden_dim, hidden_dims=prefork_area_mlp_spec, batch_normalize=batch_normalize, initargs=initargs) postmerge_area_transform = masonry.construct_mlp( name="postmerge_area_mlp", input_dim=2*n_spatial_dims, hidden_dims=postmerge_area_mlp_spec, batch_normalize=batch_normalize, initargs=initargs) # LSTM requires the input to have dim=4*hidden_dim response_mlp_spec.append(4*hidden_dim) response_transform = masonry.construct_mlp( name="response_mlp", hidden_dims=response_mlp_spec[1:], input_dim=response_mlp_spec[0], batch_normalize=batch_normalize, initargs=initargs) emitter = task.get_emitter(**hyperparameters) return Ram(patch_transform=patch_transform.apply, prefork_area_transform=prefork_area_transform.apply, postmerge_area_transform=postmerge_area_transform.apply, response_transform=response_transform.apply, emitter=emitter, **hyperparameters)
def construct_locator(self, locate_mlp_spec, n_spatial_dims, location_std, scale_std, batch_normalize, **kwargs): self.n_spatial_dims = n_spatial_dims self.locate_mlp = masonry.construct_mlp( name="locate_mlp", input_dim=self.get_dim(self.attention_state_name), hidden_dims=locate_mlp_spec, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) self.theta_from_area = bricks.Linear( input_dim=self.locate_mlp.output_dim, output_dim=2*n_spatial_dims, name="theta_from_area", # normalize columns because the fan-in is large weights_init=initialization.NormalizedInitialization( initialization.IsotropicGaussian()), # initialize location biases to zero and scale biases to one # so the model will zoom in by default biases_init=initialization.Constant(np.array( [0.] * n_spatial_dims + [1.] * n_spatial_dims))) self.T_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(12345) self.location_std = location_std self.scale_std = scale_std self.children.extend([ self.locate_mlp, self.theta_from_area])
def __init__(self, input_dim, n_classes, batch_normalize): self.input_dim = input_dim self.n_classes = n_classes self.mlp = masonry.construct_mlp( name="mlp", activations=[None, bricks.Identity()], input_dim=input_dim, hidden_dims=[input_dim/2, self.n_classes], batch_normalize=batch_normalize, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0)) self.softmax = bricks.Softmax() self.children = [self.mlp, self.softmax]
def __init__(self, input_dim, n_classes, batch_normalize): self.input_dim = input_dim self.n_classes = n_classes self.mlp = masonry.construct_mlp( name="mlp", activations=[None, bricks.Identity()], input_dim=input_dim, hidden_dims=[input_dim / 2, self.n_classes], batch_normalize=batch_normalize, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0)) self.softmax = bricks.Softmax() self.children = [self.mlp, self.softmax]
def __init__(self, hidden_dim, n_classes, batch_normalize, **kwargs): super(SingleSoftmax, self).__init__(**kwargs) self.hidden_dim = hidden_dim self.n_classes = n_classes self.mlp = masonry.construct_mlp( activations=[None, Identity()], input_dim=hidden_dim, hidden_dims=[hidden_dim / 2, self.n_classes], batch_normalize=batch_normalize, weights_init=Orthogonal(), biases_init=Constant(0)) self.softmax = Softmax() self.children = [self.mlp, self.softmax]
def __init__(self, input_dim, n_classes, batch_normalize): self.input_dim = input_dim self.n_classes = n_classes # TODO: use TensorLinear or some such self.emitters = [ masonry.construct_mlp( activations=[None, bricks.Identity()], input_dim=input_dim, hidden_dims=[input_dim/2, n], name="mlp_%i" % i, batch_normalize=batch_normalize, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0)) for i, n in enumerate(self.n_classes)] self.softmax = bricks.Softmax() self.children = self.emitters + [self.softmax]
def __init__(self, hidden_dim, n_classes, batch_normalize, **kwargs): super(Emitter, self).__init__(**kwargs) self.hidden_dim = hidden_dim self.n_classes = n_classes # TODO: use TensorLinear or some such self.emitters = [ masonry.construct_mlp( activations=[None, Identity()], input_dim=hidden_dim, hidden_dims=[hidden_dim/2, n], name="mlp_%i" % i, batch_normalize=batch_normalize, initargs=dict(weights_init=Orthogonal(), biases_init=Constant(0))) for i, n in enumerate(self.n_classes)] self.softmax = Softmax() self.children = self.emitters + [self.softmax]
def __init__(self, hidden_dim, n_classes, batch_normalize, **kwargs): super(Emitter, self).__init__(**kwargs) self.hidden_dim = hidden_dim self.n_classes = n_classes # TODO: use TensorLinear or some such self.emitters = [ masonry.construct_mlp(activations=[None, Identity()], input_dim=hidden_dim, hidden_dims=[hidden_dim / 2, n], name="mlp_%i" % i, batch_normalize=batch_normalize, initargs=dict(weights_init=Orthogonal(), biases_init=Constant(0))) for i, n in enumerate(self.n_classes) ] self.softmax = Softmax() self.children = self.emitters + [self.softmax]
def construct_merger(self, n_spatial_dims, n_channels, patch_shape, response_dim, patch_cnn_spec, patch_mlp_spec, merge_mlp_spec, response_mlp_spec, batch_normalize, batch_normalize_patch, **kwargs): # construct patch interpretation network patch_transforms = [] if patch_cnn_spec: patch_transforms.append(masonry.construct_cnn( name="patch_cnn", layer_specs=patch_cnn_spec, input_shape=patch_shape, n_channels=n_channels, batch_normalize=batch_normalize_patch)) shape = patch_transforms[-1].get_dim("output") else: shape = (n_channels,) + tuple(patch_shape) patch_transforms.append(bricks.FeedforwardFlattener(input_shape=shape)) if patch_mlp_spec: patch_transforms.append(masonry.construct_mlp( name="patch_mlp", hidden_dims=patch_mlp_spec, input_dim=patch_transforms[-1].output_dim, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize_patch)) self.patch_transform = bricks.FeedforwardSequence( [brick.apply for brick in patch_transforms], name="ffs") # construct theta interpretation network self.merge_mlp = masonry.construct_mlp( name="merge_mlp", input_dim=2*n_spatial_dims, hidden_dims=merge_mlp_spec, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) # construct what-where merger network self.response_merge = bricks.Merge( input_names="area patch".split(), input_dims=[self.merge_mlp.output_dim, self.patch_transform.output_dim], output_dim=response_dim, prototype=bricks.Linear( use_bias=False, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0)), child_prefix="response_merge") self.response_merge_activation = bricks.NormalizedActivation( shape=[response_dim], name="response_merge_activation", batch_normalize=batch_normalize) self.response_mlp = masonry.construct_mlp( name="response_mlp", hidden_dims=response_mlp_spec, input_dim=response_dim, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) self.children.extend([ self.response_merge_activation, self.response_merge, self.patch_transform, self.merge_mlp, self.response_mlp])
def construct_model( task, patch_shape, initargs, n_channels, n_spatial_dims, hidden_dim, batch_normalize, hyperparameters, patch_cnn_spec=None, patch_mlp_spec=None, prefork_area_mlp_spec=[], postmerge_area_mlp_spec=[], response_mlp_spec=[], **kwargs ): patch_transforms = [] if patch_cnn_spec: patch_transforms.append( masonry.construct_cnn( name="patch_cnn", layer_specs=patch_cnn_spec, input_shape=patch_shape, n_channels=n_channels, batch_normalize=batch_normalize, ).apply ) shape = patch_transforms[-1].brick.get_dim("output") else: shape = (n_channels,) + tuple(patch_shape) patch_transforms.append(masonry.FeedforwardFlattener(input_shape=shape).apply) if patch_mlp_spec: patch_transforms.append( masonry.construct_mlp( name="patch_mlp", hidden_dims=patch_mlp_spec, input_dim=patch_transforms[-1].brick.output_dim, batch_normalize=batch_normalize, initargs=initargs, ).apply ) patch_transform = FeedforwardSequence(patch_transforms, name="ffs") prefork_area_transform = masonry.construct_mlp( name="prefork_area_mlp", input_dim=hidden_dim, hidden_dims=prefork_area_mlp_spec, batch_normalize=batch_normalize, initargs=initargs, ) postmerge_area_transform = masonry.construct_mlp( name="postmerge_area_mlp", input_dim=2 * n_spatial_dims, hidden_dims=postmerge_area_mlp_spec, batch_normalize=batch_normalize, initargs=initargs, ) # LSTM requires the input to have dim=4*hidden_dim response_mlp_spec.append(4 * hidden_dim) response_transform = masonry.construct_mlp( name="response_mlp", hidden_dims=response_mlp_spec[1:], input_dim=response_mlp_spec[0], batch_normalize=batch_normalize, initargs=initargs, ) emitter = task.get_emitter(**hyperparameters) return Ram( patch_transform=patch_transform.apply, prefork_area_transform=prefork_area_transform.apply, postmerge_area_transform=postmerge_area_transform.apply, response_transform=response_transform.apply, emitter=emitter, **hyperparameters )