def __call__(self, in_obj, *args, **kwargs): # Also accept "time" axis as a recurrent axis if in_obj.axes.recurrent_axis() is None: in_obj = ng.map_roles(in_obj, {"time": "REC"}) assert in_obj.axes.recurrent_axis() is not None, "in_obj has no recurrent or time axis" return super(DeepBiRNN, self).__call__(in_obj, *args, **kwargs)
def __call__(self, in_obj): if self.W is None: self.W = ng.variable( axes=ng.make_axes(self.axes_map.keys()) + in_obj.axes.feature_axes(), initial_value=self.init, scope=self.scope, ).named('LinW') # in the event that the in_obj feature axes and the output feature axes # share axis names, self.W will have duplicate axes, which are not # allowed. To get around this, we rename the output feature axes to # something unique that we can undo after the dot. This map_roles is # undoing this temporary axes name change. return ng.map_roles(ng.dot(self.W, in_obj), self.axes_map)
def __call__(self, in_obj, **kwargs): """ Arguments: in_obj (Tensor): object that provides the lookup indices """ LABELS = {"weight": "weight", "bias": "bias"} in_obj = ng.axes_with_order( in_obj, ng.make_axes( [in_obj.axes.recurrent_axis(), in_obj.axes.batch_axis()])) in_obj = ng.flatten(in_obj) in_axes = in_obj.axes # label lut_v_axis as shadow axis for initializers ... once #1158 is # in, shadow axis will do more than just determine fan in/out for # initializers. self.lut_v_axis = ng.make_axis(self.vocab_size).named('V') self.axes_map = shadow_axes_map([self.lut_v_axis]) self.lut_v_axis = list(self.axes_map.values())[0] self.lut_f_axis = ng.make_axis(self.embed_dim).named('F') self.w_axes = ng.make_axes([self.lut_v_axis, self.lut_f_axis]) self.lut_o_axes = in_axes | ng.make_axes([self.lut_f_axis]) self.o_axes = ng.make_axes([self.lut_f_axis]) | in_axes[0].axes if not self.initialized: self.W = ng.variable( axes=self.w_axes, initial_value=self.lut_init(self.w_axes, self.lut_v_axis, self.pad_idx), metadata={ "label": LABELS["weight"] }, ).named('LutW') lut_result = ng.lookuptable(self.W, in_obj, self.lut_o_axes, update=self.update, pad_idx=self.pad_idx) return ng.axes_with_order( ng.map_roles(ng.unflatten(lut_result), self.axes_map), self.o_axes)
def make_layers(use_large, vocab_size): if use_large: init = GaussianInit(0., 0.02) else: init = GaussianInit(0., 0.05) layers = [] layers.append(make_embedding_layer(vocab_size)) layers.append(lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})) kernel_sizes = [7, 7, 3, 3, 3, 3] pool_layer_idxs = [0, 1, 5] conv_nout = 1024 if use_large else 256 fc_nout = 2048 if use_large else 1024 for i in range(6): conv_layer = Convolution( **conv_params(kernel_sizes[i], conv_nout, init)) layers.append(conv_layer) if i in pool_layer_idxs: pool_layer = Pooling(pool_shape=(3, ), strides=3) layers.append(pool_layer) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(axes=(ax.Y, ), weight_init=init, bias_init=ConstantInit(0.), activation=Softmax())) return layers
def __call__(self, in_obj, **kwargs): """ Arguments: in_obj (Tensor): object that provides the lookup indices """ in_obj = ng.flatten(in_obj) in_axes = in_obj.axes # label lut_v_axis as shadow axis for initializers ... once #1158 is # in, shadow axis will do more than just determine fan in/out for # initializers. self.lut_v_axis = ng.make_axis(self.vocab_size).named('V') self.axes_map = shadow_axes_map([self.lut_v_axis]) self.lut_v_axis = list(self.axes_map.values())[0] self.lut_f_axis = ng.make_axis(self.embed_dim).named('F') self.w_axes = ng.make_axes([self.lut_v_axis, self.lut_f_axis]) self.lut_o_axes = in_axes | ng.make_axes([self.lut_f_axis]) self.o_axes = ng.make_axes([self.lut_f_axis]) | in_axes[0].axes if not self.initialized: self.W = ng.variable( axes=self.w_axes, initial_value=self.lut_init( self.w_axes, self.lut_v_axis, self.pad_idx), metadata={ "label": LABELS["weight"]}, ).named('LutW') lut_result = ng.lookuptable( self.W, in_obj, self.lut_o_axes, update=self.update, pad_idx=self.pad_idx) return ng.map_roles(ng.unflatten(lut_result), self.axes_map)
def __call__(self, in_obj): cpm = self.convparams.copy() in_obj = reorder_spatial_axes(in_obj) in_axes = in_obj.axes if self.f_axes is None: self.f_axes = ng.make_axes([in_axes[0]]) for nm in 'TRSK': self.f_axes |= ng.make_axis(length=cpm[nm], name=nm) # mark 'K' as a shadow axis for the initializers. self.axes_map = shadow_axes_map(self.f_axes.find_by_name('K')) self.f_axes = ng.make_axes([ axis if axis.name != 'K' else list(self.axes_map.keys())[0] for axis in self.f_axes ]) self.W = ng.variable(axes=self.f_axes, initial_value=self.init, scope=self.scope).named('convwt') if self.o_axes is None: self.o_axes = ng.make_axes([ ng.make_axis(name=a.name) for a in in_axes if not a.is_batch ]) # set lengths out_shape = [ self.f_axes[-1].length, output_dim(in_axes[1].length, cpm['T'], cpm['pad_d'], cpm['str_d'], False, cpm['dil_d']), output_dim(in_axes[2].length, cpm['R'], cpm['pad_h'], cpm['str_h'], False, cpm['dil_h']), output_dim(in_axes[3].length, cpm['S'], cpm['pad_w'], cpm['str_w'], False, cpm['dil_w']) ] self.o_axes.set_shape(out_shape) self.o_axes |= in_axes.batch_axis() return ng.map_roles(ng.convolution(cpm, in_obj, self.W, axes=self.o_axes), self.axes_map)
def __call__(self, in_obj, channel_axes="C", spatial_axes=("D", "H", "W"), **kwargs): """ Arguments: in_obj (Op): Input op channel_axes (str): name of the expected channel axis type - defaults to "C" spatial_axes (tuple): names of expected depth, height and width axis types - defaults to "D", "H", and "W" """ if isinstance(spatial_axes, dict): spatial_axes = tuple( spatial_axes.get(name, name) for name in ("D", "H", "W")) elif isinstance(spatial_axes, tuple): if len(spatial_axes) < 3: raise ValueError( "spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))") spatial_axes = tuple( name if name else default for name, default in zip(spatial_axes, ("D", "H", "W"))) orig_axes = in_obj.axes in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes) channel_axes = in_obj.axes.get_by_names(channel_axes) spatial_axes = in_obj.axes.get_by_names(*spatial_axes) filter_axes = self._filter_axes(channel_axes, spatial_axes) # mark 'K' as a shadow axis for the initializers. axes_map = shadow_axes_map(filter_axes.find_by_name('K')) filter_axes = ng.make_axes([ axis if axis.name != 'K' else list(axes_map.keys())[0] for axis in filter_axes ]) if not self.initialized: if not self.weight_norm: self.W = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("W") else: self.v = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("v") out_axes = ng.make_axes( [filter_axes.get_by_names("K__NG_SHADOW")]) v_norm = ng.mean(ng.square(self.v), out_axes=out_axes) self.g = ng.variable(axes=out_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("g") self.W = self.g * self.v * ng.reciprocal( ng.sqrt(v_norm + 1e-3)) else: if filter_axes != self.W.axes: raise ValueError( ("{layer_name} layer has already been initialized with an " "input object which has resulted in filter axes: " "{existing_filter_axes}. This new input object has axes: " "{input_axes}, which implies the need for filter axes: " "{new_filter_axes} which are different than the existing " "filter axes.").format( layer_name=self.name, existing_filter_axes=self.W.axes, input_axes=in_obj.axes, new_filter_axes=filter_axes, )) output = ng.map_roles( self._conv_op(in_obj, channel_axes, spatial_axes), axes_map) # Reorder the output to match the input order output_axis_order = ng.make_axes( [output.axes.find_by_name(ax.name)[0] for ax in orig_axes]) # Remove introduced axes. If their length is > 1, then perhaps they should be kept slices = [ 0 if (ax not in orig_axes) and ax.length == 1 else slice(None) for ax in output.axes ] output = ng.tensor_slice(output, slices) # New axes with length > 1 may have been introduced. Add them to the end. output_axis_order = output_axis_order | output.axes return ng.axes_with_order(output, output_axis_order)
'gamma': 0.94, 'base_lr': 0.01 } optimizer = RMSProp(learning_rate=learning_rate_policy, wdecay=4e-5, decay_rate=0.9, momentum_coef=0.9, epsilon=1., iteration=inputs['iteration']) else: raise NotImplementedError("Unrecognized Optimizer") # Build the main and auxiliary loss functions y_onehot = ng.one_hot(inputs['label'], axis=ax.Y) train_prob_main = inception.seq2(inception.seq1(inputs['image'])) train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name}) train_loss_main = ng.cross_entropy_multi(train_prob_main, y_onehot, enable_softmax_opt=False) train_prob_aux = inception.seq_aux(inception.seq1(inputs['image'])) train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name}) train_loss_aux = ng.cross_entropy_multi(train_prob_aux, y_onehot, enable_softmax_opt=False) batch_cost = ng.sequential([ optimizer(train_loss_main + 0.4 * train_loss_aux), ng.mean(train_loss_main, out_axes=()) ])
in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) # define model if args.modeltype == "TCN": affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Logistic()) model = Sequential( [lambda op: ng.map_roles(op, { 'F': 'C', 'REC': 'W' })] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + [lambda op: ng.map_roles(op, { 'C': 'F', 'W': 'REC' })] + [affine_layer]) elif args.modeltype == "LSTM": model = Sequential( recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=hidden_sizes, return_sequence=True).layers + [Logistic()])
# define model if args.modeltype == "TCN": # take only the last timepoint of output sequence to predict sum last_timepoint = [ lambda op: ng.tensor_slice(op, [ slice(seq_len - 1, seq_len, 1) if ax.name == "W" else slice(None) for ax in op.axes ]) ] affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Identity()) model = Sequential( [lambda op: ng.map_roles(op, { 'REC': 'W', 'F': 'C' })] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + last_timepoint + [affine_layer]) elif args.modeltype == "LSTM": model = recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=hidden_sizes, return_sequence=False) # Optimizer if args.modeltype == "TCN": optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) else: optimizer = GradientDescentMomentum(
in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) preds_inputs = dict(X=inputs['X']) # define model n_hidden = list(map(int, args.n_hidden.split(","))) filter_shape = list(map(int, args.filter_shape.split(","))) if args.modeltype in ["RNN", "LSTM"]: seq1 = Sequential(recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=n_hidden, return_sequence=args.predict_seq).layers + [Rectlin()]) elif args.modeltype == "CNN": seq1 = convolutional_model.define_model(out_axis, filter_shapes=filter_shape, n_filters=n_hidden) layers_modified = [lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})] + seq1.layers + [Rectlin()] seq1 = Sequential(layers_modified) # Optimizer optimizer = RMSProp(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (squared L2 loss) fwd_prop = seq1(inputs['X']) train_loss = ng.squared_L2(fwd_prop - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Forward prop of test set # Required for correct functioning of batch norm and dropout layers during inference mode
inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) # take only the last timepoint of output sequence to predict RUL last_timepoint = [ lambda op: ng.tensor_slice(op, [ slice(seq_len - 1, seq_len, 1) if ax.name == "W" else slice(None) for ax in op.axes ]) ] affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Rectlin()) model = Sequential([lambda op: ng.map_roles(op, { 'F': 'C', 'REC': 'W' })] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + last_timepoint + [affine_layer]) # Optimizer optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value) fwd_prop = model(inputs['X']) fwd_prop = ng.axes_with_order(fwd_prop, out_axes) train_loss = ng.squared_L2(fwd_prop - inputs['y']) with Layer.inference_mode_on(): preds = model(inputs['X']) preds = ng.axes_with_order(preds, out_axes) eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=())