Exemplo n.º 1
0
def local_abstract_batch_norm_train(fgraph, node):
    if not isinstance(node.op, AbstractBatchNormTrain):
        return None

    x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
    axes = node.op.axes
    if min(axes) < 0 or max(axes) > x.ndim:
        return None
    if (
        not isinstance(x.type, TensorType)
        or not isinstance(scale.type, TensorType)
        or not isinstance(bias.type, TensorType)
        or not isinstance(epsilon.type, TensorType)
        or not isinstance(running_average_factor.type, TensorType)
    ):
        return None
    # optional running_mean and running_var
    if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
        return None
    if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
        return None

    mean = x.mean(axes, keepdims=True)
    var = x.var(axes, keepdims=True)
    # The epsilon should not upcast the dtype.
    if var.dtype == "float32" and epsilon.dtype == "float64":
        epsilon = epsilon.astype("float32")
    invstd = tt.inv(tt.sqrt(var + epsilon))
    out = (x - mean) * (scale * invstd) + bias
    results = [out, mean, invstd]

    if len(node.inputs) > 5:
        running_mean = node.inputs[5]
        running_mean = (
            running_mean * (1.0 - running_average_factor)
            + mean * running_average_factor
        )
        results.append(running_mean)
    if len(node.inputs) > 6:
        m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX)
        running_var = node.inputs[6]
        running_var = (
            running_var * (1.0 - running_average_factor)
            + (m / (m - 1)) * var * running_average_factor
        )
        results.append(running_var)

    results = [
        tt.patternbroadcast(r, r_orig.broadcastable)
        for (r, r_orig) in zip(results, node.outputs)
    ]

    for var in theano.gof.graph.variables(node.inputs, results):
        if var not in node.inputs:
            copy_stack_trace(node.outputs[0], var)
    return results
Exemplo n.º 2
0
def local_abstract_batch_norm_train(node):
    if not isinstance(node.op, AbstractBatchNormTrain):
        return None

    x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
    axes = node.op.axes
    if min(axes) < 0 or max(axes) > x.ndim:
        return None
    if not isinstance(x.type, TensorType) or \
       not isinstance(scale.type, TensorType) or \
       not isinstance(bias.type, TensorType) or \
       not isinstance(epsilon.type, TensorType) or \
       not isinstance(running_average_factor.type, TensorType):
        return None
    # optional running_mean and running_var
    if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
        return None
    if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
        return None

    mean = x.mean(axes, keepdims=True)
    var = x.var(axes, keepdims=True)
    # The epsilon should not upcast the dtype.
    if var.dtype == 'float32' and epsilon.dtype == 'float64':
        epsilon = epsilon.astype('float32')
    invstd = T.inv(T.sqrt(var + epsilon))
    out = (x - mean) * (scale * invstd) + bias
    results = [out, mean, invstd]

    if len(node.inputs) > 5:
        running_mean = node.inputs[5]
        running_mean = running_mean * (1.0 - running_average_factor) + \
            mean * running_average_factor
        results.append(running_mean)
    if len(node.inputs) > 6:
        m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX)
        running_var = node.inputs[6]
        running_var = running_var * (1.0 - running_average_factor) + \
            (m / (m - 1)) * var * running_average_factor
        results.append(running_var)

    results = [T.patternbroadcast(r, r_orig.broadcastable)
               for (r, r_orig) in zip(results, node.outputs)]

    for var in theano.gof.graph.variables(node.inputs, results):
        if var not in node.inputs:
            copy_stack_trace(node.outputs[0], var)
    return results
Exemplo n.º 3
0
 def infer_shape(self, fgraph, node, i0_shapes):
     ret = fgraph.shape_feature.default_infer_shape(fgraph, node, i0_shapes)
     if self.axis is not None:
         self_axis = self.axis
         ndim = len(i0_shapes[0])
         if self_axis < 0:
             self_axis += ndim
         if self_axis < 0 or self_axis >= ndim:
             raise RuntimeError(
                 f"Unique axis `{self.axis}` is outside of input ndim = {ndim}."
             )
         ret[0] = tuple([
             fgraph.shape_feature.shape_ir(i, node.outputs[0])
             for i in range(ndim)
         ])
     if self.return_inverse:
         if self.axis is None:
             shape = (basic.prod(i0_shapes[0]), )
         else:
             shape = (i0_shapes[0][self_axis], )
         if self.return_index:
             ret[2] = shape
             return ret
         ret[1] = shape
         return ret
     return ret
Exemplo n.º 4
0
 def infer_shape(self, node, i0_shapes):
     ret = node.fgraph.shape_feature.default_infer_shape(node, i0_shapes)
     if self.axis is not None:
         self_axis = self.axis
         ndim = len(i0_shapes[0])
         if self_axis < 0:
             self_axis += ndim
         if self_axis < 0 or self_axis >= ndim:
             raise RuntimeError(
                 "Unique axis `{}` is outside of input ndim = "
                 "{}.".format(self.axis, ndim)
                 )
         ret[0] = tuple([node.fgraph.shape_feature.shape_ir(i,
                                                            node.outputs[0])
                         for i in xrange(ndim)])
     if self.return_inverse:
         if self.axis is None:
             shape = (basic.prod(i0_shapes[0]), )
         else:
             shape = (i0_shapes[0][self_axis], )
         if self.return_index:
             ret[2] = shape
             return ret
         ret[1] = shape
         return ret
     return ret
Exemplo n.º 5
0
 def infer_shape(self, node, i0_shapes):
     ret = node.fgraph.shape_feature.default_infer_shape(node, i0_shapes)
     if self.return_inverse:
         shape = (basic.prod(i0_shapes[0]), )
         if self.return_index:
             ret[2] = shape
             return ret
         ret[1] = shape
         return ret
     return ret
Exemplo n.º 6
0
    def infer_shape(self, fgraph, node, shapes):
        if self.axis is None:
            return [(basic.prod(shapes[0]), )]  # Flatten

        return shapes