def interpret_axes(self, in_obj, init_state): if self.w_in_axes is None: self.in_axes = in_obj.axes self.recurrent_axis = self.in_axes.recurrent_axis() self.in_feature_axes = self.in_axes.sample_axes() - self.recurrent_axis # if init state is given, use that as hidden axes if init_state: self.out_feature_axes = (init_state.axes.sample_axes() - init_state.axes.recurrent_axis()) if sum(self.out_feature_axes.full_lengths) != self.nout: raise ValueError("Length of init_state must be the same as nout: " + "{} != {}".format(sum(self.out_feature_axes.full_lengths), self.nout)) else: self.out_feature_axes = ng.make_axes([ng.make_axis(self.nout)]) if len(self.in_feature_axes) == 1: self.out_feature_axes[0].named(self.in_feature_axes[0].name) self.out_axes = self.out_feature_axes + self.in_axes.batch_axis() self.recurrent_axis_idx = len(self.out_feature_axes) # create temporary out axes which the dot ops will output. These # temporary axes will be immediately cast to self.out_axes # afterwards. We can't go directly to self.out_axes from the DotOp # because sometimes the self.out_axes intersect with the self.in_axes # and so the weight matrix would have a duplicate Axis which isn't # allowed. temp_out_axes = ng.make_axes(shadow_axes_map(self.out_feature_axes).keys()) # determine the shape of the weight matrices self.w_in_axes = temp_out_axes + self.in_feature_axes self.w_re_axes = temp_out_axes + self.out_feature_axes
def __call__(self, in_obj, **kwargs): """ Arguments: in_obj (Tensor): object that provides the lookup indices """ LABELS = {"weight": "weight", "bias": "bias"} in_obj = ng.axes_with_order( in_obj, ng.make_axes( [in_obj.axes.recurrent_axis(), in_obj.axes.batch_axis()])) in_obj = ng.flatten(in_obj) in_axes = in_obj.axes # label lut_v_axis as shadow axis for initializers ... once #1158 is # in, shadow axis will do more than just determine fan in/out for # initializers. self.lut_v_axis = ng.make_axis(self.vocab_size).named('V') self.axes_map = shadow_axes_map([self.lut_v_axis]) self.lut_v_axis = list(self.axes_map.values())[0] self.lut_f_axis = ng.make_axis(self.embed_dim).named('F') self.w_axes = ng.make_axes([self.lut_v_axis, self.lut_f_axis]) self.lut_o_axes = in_axes | ng.make_axes([self.lut_f_axis]) self.o_axes = ng.make_axes([self.lut_f_axis]) | in_axes[0].axes if not self.initialized: self.W = ng.variable( axes=self.w_axes, initial_value=self.lut_init(self.w_axes, self.lut_v_axis, self.pad_idx), metadata={ "label": LABELS["weight"] }, ).named('LutW') lut_result = ng.lookuptable(self.W, in_obj, self.lut_o_axes, update=self.update, pad_idx=self.pad_idx) return ng.axes_with_order( ng.map_roles(ng.unflatten(lut_result), self.axes_map), self.o_axes)
def __call__(self, in_obj, **kwargs): """ Arguments: in_obj (Tensor): object that provides the lookup indices """ in_obj = ng.flatten(in_obj) in_axes = in_obj.axes # label lut_v_axis as shadow axis for initializers ... once #1158 is # in, shadow axis will do more than just determine fan in/out for # initializers. self.lut_v_axis = ng.make_axis(self.vocab_size).named('V') self.axes_map = shadow_axes_map([self.lut_v_axis]) self.lut_v_axis = list(self.axes_map.values())[0] self.lut_f_axis = ng.make_axis(self.embed_dim).named('F') self.w_axes = ng.make_axes([self.lut_v_axis, self.lut_f_axis]) self.lut_o_axes = in_axes | ng.make_axes([self.lut_f_axis]) self.o_axes = ng.make_axes([self.lut_f_axis]) | in_axes[0].axes if not self.initialized: self.W = ng.variable( axes=self.w_axes, initial_value=self.lut_init( self.w_axes, self.lut_v_axis, self.pad_idx), metadata={ "label": LABELS["weight"]}, ).named('LutW') lut_result = ng.lookuptable( self.W, in_obj, self.lut_o_axes, update=self.update, pad_idx=self.pad_idx) return ng.map_roles(ng.unflatten(lut_result), self.axes_map)
def __call__(self, in_obj): cpm = self.convparams.copy() in_obj = reorder_spatial_axes(in_obj) in_axes = in_obj.axes if self.f_axes is None: self.f_axes = ng.make_axes([in_axes[0]]) for nm in 'TRSK': self.f_axes |= ng.make_axis(length=cpm[nm], name=nm) # mark 'K' as a shadow axis for the initializers. self.axes_map = shadow_axes_map(self.f_axes.find_by_name('K')) self.f_axes = ng.make_axes([ axis if axis.name != 'K' else list(self.axes_map.keys())[0] for axis in self.f_axes ]) self.W = ng.variable(axes=self.f_axes, initial_value=self.init, scope=self.scope).named('convwt') if self.o_axes is None: self.o_axes = ng.make_axes([ ng.make_axis(name=a.name) for a in in_axes if not a.is_batch ]) # set lengths out_shape = [ self.f_axes[-1].length, output_dim(in_axes[1].length, cpm['T'], cpm['pad_d'], cpm['str_d'], False, cpm['dil_d']), output_dim(in_axes[2].length, cpm['R'], cpm['pad_h'], cpm['str_h'], False, cpm['dil_h']), output_dim(in_axes[3].length, cpm['S'], cpm['pad_w'], cpm['str_w'], False, cpm['dil_w']) ] self.o_axes.set_shape(out_shape) self.o_axes |= in_axes.batch_axis() return ng.map_roles(ng.convolution(cpm, in_obj, self.W, axes=self.o_axes), self.axes_map)
def __init__(self, init, nout=None, axes=None, **kwargs): """ Args: nout (int or iterable of ints, optional): length or lengths of feature axes the Linear layer should output. Must not be provided in combination with axes. axes (Axes, optional): axes of feature axes the Linear layer should output. Must not be provided in combination with nout. Axes should not include recurrent or batch axes. """ super(Linear, self).__init__(**kwargs) # axes should not include recurrent or batch axes if axes is not None: axes = ng.make_axes(axes) if axes.batch_axis() is not None: raise ValueError(( 'Axes passed to Linear layer should only be the output feature' 'axis. A batch axis {} was included.' ).format(axes.batch_axis())) if axes.recurrent_axis() is not None: raise ValueError(( 'Axes passed to Linear layer should only be the output feature' 'axis. A recurrent axis {} was included.' ).format(axes.recurrent_axis())) if any(is_shadow_axis(axis) for axis in axes): raise ValueError(( "Shadow Axes are not allowed in the output axes passed to " "Linear. Found {}." ).format([is_shadow_axis(axis) for axis in axes])) self.axes = infer_axes(nout, axes) self.axes_map = shadow_axes_map(self.axes) self.init = init self.W = None
def __call__(self, in_obj, channel_axes="C", spatial_axes=("D", "H", "W"), **kwargs): """ Arguments: in_obj (Op): Input op channel_axes (str): name of the expected channel axis type - defaults to "C" spatial_axes (tuple): names of expected depth, height and width axis types - defaults to "D", "H", and "W" """ if isinstance(spatial_axes, dict): spatial_axes = tuple( spatial_axes.get(name, name) for name in ("D", "H", "W")) elif isinstance(spatial_axes, tuple): if len(spatial_axes) < 3: raise ValueError( "spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))") spatial_axes = tuple( name if name else default for name, default in zip(spatial_axes, ("D", "H", "W"))) orig_axes = in_obj.axes in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes) channel_axes = in_obj.axes.get_by_names(channel_axes) spatial_axes = in_obj.axes.get_by_names(*spatial_axes) filter_axes = self._filter_axes(channel_axes, spatial_axes) # mark 'K' as a shadow axis for the initializers. axes_map = shadow_axes_map(filter_axes.find_by_name('K')) filter_axes = ng.make_axes([ axis if axis.name != 'K' else list(axes_map.keys())[0] for axis in filter_axes ]) if not self.initialized: if not self.weight_norm: self.W = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("W") else: self.v = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("v") out_axes = ng.make_axes( [filter_axes.get_by_names("K__NG_SHADOW")]) v_norm = ng.mean(ng.square(self.v), out_axes=out_axes) self.g = ng.variable(axes=out_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("g") self.W = self.g * self.v * ng.reciprocal( ng.sqrt(v_norm + 1e-3)) else: if filter_axes != self.W.axes: raise ValueError( ("{layer_name} layer has already been initialized with an " "input object which has resulted in filter axes: " "{existing_filter_axes}. This new input object has axes: " "{input_axes}, which implies the need for filter axes: " "{new_filter_axes} which are different than the existing " "filter axes.").format( layer_name=self.name, existing_filter_axes=self.W.axes, input_axes=in_obj.axes, new_filter_axes=filter_axes, )) output = ng.map_roles( self._conv_op(in_obj, channel_axes, spatial_axes), axes_map) # Reorder the output to match the input order output_axis_order = ng.make_axes( [output.axes.find_by_name(ax.name)[0] for ax in orig_axes]) # Remove introduced axes. If their length is > 1, then perhaps they should be kept slices = [ 0 if (ax not in orig_axes) and ax.length == 1 else slice(None) for ax in output.axes ] output = ng.tensor_slice(output, slices) # New axes with length > 1 may have been introduced. Add them to the end. output_axis_order = output_axis_order | output.axes return ng.axes_with_order(output, output_axis_order)