def local_exp_over_1_plus_exp(node): """ exp(x)/(1+exp(x)) -> sigm(x) c/(1+exp(x)) -> c*sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.true_div: # find all the exp() terms in the numerator num, denom = node.inputs num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp) denom_1pexp, denom_rest, \ denom_neg = partition_num_or_denom(denom, is_1pexp) sigmoids = [] for t in denom_1pexp: if t in num_exp_x: # case: exp(x) /(1+exp(x)) sigmoids.append(sigmoid(t)) del num_exp_x[num_exp_x.index(t)] else: # case: 1/(1+exp(x)) sigmoids.append(sigmoid(-t)) copy_stack_trace(node.outputs[0], sigmoids[-1]) if not sigmoids: # we didn't find any. abort return # put the new numerator together new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest if len(new_num) == 1: new_num = new_num[0] else: new_num = tensor.mul(*new_num) if num_neg ^ denom_neg: new_num = -new_num copy_stack_trace(num, new_num) if len(denom_rest) == 0: return [new_num] elif len(denom_rest) == 1: out = new_num / denom_rest[0] else: out = new_num / tensor.mul(*denom_rest) copy_stack_trace(node.outputs[0], out) return [out]
def local_exp_over_1_plus_exp(node): """ exp(x)/(1+exp(x)) -> sigm(x) c/(1+exp(x)) -> c*sigm(-x) """ # this optimization should be done for numerical stability # so we don't care to check client counts if node.op == tensor.true_div: # find all the exp() terms in the numerator num, denom = node.inputs num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp) denom_1pexp, denom_rest, denom_neg = partition_num_or_denom( denom, is_1pexp) sigmoids = [] for t in denom_1pexp: if t in num_exp_x: # case: exp(x) /(1+exp(x)) sigmoids.append(sigmoid(t)) del num_exp_x[num_exp_x.index(t)] else: # case: 1/(1+exp(x)) sigmoids.append(sigmoid(-t)) copy_stack_trace(node.outputs[0], sigmoids[-1]) if not sigmoids: # we didn't find any. abort return # put the new numerator together new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest if len(new_num) == 1: new_num = new_num[0] else: new_num = tensor.mul(*new_num) if num_neg ^ denom_neg: new_num = -new_num copy_stack_trace(num, new_num) if len(denom_rest) == 0: return [new_num] elif len(denom_rest) == 1: out = new_num / denom_rest[0] else: out = new_num / tensor.mul(*denom_rest) copy_stack_trace(node.outputs[0], out) return [out]
def compute_mul(tree): """ Compute the Variable that is the output of a multiplication tree. This is the inverse of the operation performed by `parse_mul_tree`, i.e. compute_mul(parse_mul_tree(tree)) == tree. Parameters ---------- tree A multiplication tree (as output by `parse_mul_tree`). Returns ------- object A Variable that computes the multiplication represented by the tree. """ neg, inputs = tree if inputs is None: raise AssertionError( "Function `compute_mul` found a missing leaf, did you forget to " "call `simplify_mul` on the tree first?") elif isinstance(inputs, list): # Recurse through inputs. rval = tensor.mul(*list(map(compute_mul, inputs))) else: rval = inputs if neg: rval = -rval return rval
def compute_mul(tree): """ Compute the Variable that is the output of a multiplication tree. This is the inverse of the operation performed by `parse_mul_tree`, i.e. compute_mul(parse_mul_tree(tree)) == tree. Parameters ---------- tree A multiplication tree (as output by `parse_mul_tree`). Returns ------- object A Variable that computes the multiplication represented by the tree. """ neg, inputs = tree if inputs is None: raise AssertionError( "Function `compute_mul` found a missing leaf, did you forget to " "call `simplify_mul` on the tree first?" ) elif isinstance(inputs, list): # Recurse through inputs. rval = tensor.mul(*list(map(compute_mul, inputs))) else: rval = inputs if neg: rval = -rval return rval
def is_neg(var): """ Match a variable with the `-x` pattern. :param var: The Variable to analyze. :return: `x` if `var` is of the form `-x`, or None otherwise. """ apply = var.owner if not apply: return None # First match against `tensor.neg`. if apply.op == tensor.neg: return apply.inputs[0] # Then match against a multiplication by -1. if apply.op == tensor.mul and len(apply.inputs) >= 2: for idx, mul_input in enumerate(apply.inputs): try: constant = opt.get_scalar_constant_value(mul_input) is_minus_1 = numpy.allclose(constant, -1) except NotScalarConstantError: is_minus_1 = False if is_minus_1: # Found a multiplication by -1. if len(apply.inputs) == 2: # Only return the other input. return apply.inputs[1 - idx] else: # Return the multiplication of all other inputs. return tensor.mul(*(apply.inputs[0:idx] + apply.inputs[idx + 1:])) # No match. return None
def infer_shape(self, node, inputs_shapes): if isinstance(node.inputs[1], theano.Constant) and node.inputs[1].data is None: return [(mul(*inputs_shapes[0]),)] # axis should not be None, so there should be the same number of # dimensions in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if _variable_is_none(node.inputs[1]): return [(mul(*inputs_shapes[0]), )] # axis should not be None, so there should be the same number of # dimensions in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if _variable_is_none(node.inputs[1]): return [(mul(*inputs_shapes[0]),)] # axis should not be None, so there should be the same number of # dimensions in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if (isinstance(node.inputs[1], theano.Constant) and node.inputs[1].data is None): return [(mul(*inputs_shapes[0]),)] # axis should not be None, so there should be the same number of # dimensions in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if isinstance(node.inputs[1], theano.Constant) and node.inputs[1].data is None: # That means axis = None, # So the array is flattened before being sorted return [(mul(*inputs_shapes[0]),)] # axis should not be None # So there should be the same number of dimensions # in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if _variable_is_none(node.inputs[1]): # That means axis = None, # So the array is flattened before being sorted return [(mul(*inputs_shapes[0]), )] # axis should not be None # So there should be the same number of dimensions # in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if _variable_is_none(node.inputs[1]): # That means axis = None, # So the array is flattened before being sorted return [(mul(*inputs_shapes[0]),)] # axis should not be None # So there should be the same number of dimensions # in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def infer_shape(self, node, inputs_shapes): if (isinstance(node.inputs[1], theano.Constant) and node.inputs[1].data is None): # That means axis = None, # So the array is flattened before being sorted return [(mul(*inputs_shapes[0]),)] # axis should not be None # So there should be the same number of dimensions # in the input and output assert node.inputs[0].ndim == node.outputs[0].ndim assert inputs_shapes[1] == () return [inputs_shapes[0]]
def local_sigm_times_exp(node): """ exp(x)*sigm(-x) -> -sigm(x) """ # this is a numerical stability thing, so we dont check clients if node.op == tensor.mul: exp_x = [] exp_minus_x = [] sigm_x = [] sigm_minus_x = [] other = [] neg = False for i in node.inputs: while i.owner and i.owner.op == tensor.neg: neg ^= True i = i.owner.inputs[0] if i.owner and i.owner.op == tensor.exp: exp_arg = i.owner.inputs[0] if exp_arg.owner and exp_arg.owner.op == tensor.neg: exp_minus_x.append(exp_arg.owner.inputs[0]) else: exp_x.append(exp_arg) elif i.owner and i.owner.op == sigmoid: sigm_arg = i.owner.inputs[0] if sigm_arg.owner and sigm_arg.owner.op == tensor.neg: sigm_minus_x.append(sigm_arg.owner.inputs[0]) else: sigm_x.append(sigm_arg) else: other.append(i) # remove matched pairs in exp_x and sigm_minus_x did_something = False for i in exp_x: if i in sigm_minus_x: del sigm_minus_x[sigm_minus_x.index(i)] other.append(sigmoid(i)) did_something = True else: other.append(i) # remove matched pairs in exp_minus_x and sigm_x for i in exp_minus_x: if i in sigm_x: del sigm_x[sigm_x.index(i)] other.append(sigm(-i)) did_something = True else: other.append(i) if did_something: terms = other + [sigmoid(x) for x in sigm_x] \ + [sigmoid(-x) for x in sigm_minus_x] if len(terms)>1: rval = tensor.mul(*terms) else: rval = terms[0] if neg: return [-rval] else: return [rval]