Exemplo n.º 1
0
def Recurrence(over, go_backwards=False, initial_state=initial_state_default_or_None):
    # helper to compute previous value
    # can take a single Variable/Function or a tuple
    initial_state = initial_state if _is_given(initial_state) else _current_default_options.initial_state
    # if initial state is given and a numeric constant, then turn it into a Constant() object
    if np.isscalar(initial_state):
        initial_state = Constant(initial_state, shape=(1)) # TODO: This should be automatically done inside the API.
    def previous_hook(state):
        if isinstance (state, tuple):  # if multiple then apply to each element
            return tuple([previous_hook(s) for s in state])
        # not a tuple: must be a 'scalar', i.e. a single element
        return past_value  (state, initial_state) if not go_backwards else \
               future_value(state, initial_state)
    x = Placeholder(name='recurrence_arg')
    state_forward = over.create_placeholder() # create a placeholder or a tuple of placeholders
    prev_state = previous_hook(state_forward)  # delay (h, c)
    f_x_h_c = over(x, prev_state) # apply the recurrent over
    # this returns a Function (x, (h_prev, c_prev)) -> (h, c)
    h_c = f_x_h_c.outputs
    replacements = { value_forward: value for (value_forward, value) in zip(list(_as_tuple(state_forward)), h_c) }
    f_x_h_c.replace_placeholders(replacements)  # resolves state_forward := h_c
    h = f_x_h_c.outputs[0]  # 'h' is a Variable (the output of a Function that computed it)
    if _trace_layers:
        _log_node(h)
        _log_node(combine([h.owner]))
    apply_x = combine([h])     # the Function that yielded 'h', so we get to know its inputs
    # apply_x is a Function x -> h
    return Block(apply_x, 'Recurrence', Record(over=over))
Exemplo n.º 2
0
def Recurrence(over, go_backwards=False, initial_state=initial_state_default_or_None):
    # helper to compute previous value
    # can take a single Variable/Function or a tuple
    initial_state = initial_state if _is_given(initial_state) else _current_default_options.initial_state
    # if initial state is given and a numeric constant, then turn it into a Constant() object
    if np.isscalar(initial_state):
        initial_state = Constant(initial_state, shape=(1)) # TODO: This should be automatically done inside the API.
    def previous_hook(state):
        if isinstance (state, tuple):  # if multiple then apply to each element
            return tuple([previous_hook(s) for s in state])
        # not a tuple: must be a 'scalar', i.e. a single element
        return past_value  (state, initial_state) if not go_backwards else \
               future_value(state, initial_state)
    x = Placeholder(name='recurrence_arg')
    state_forward = over.create_placeholder() # create a placeholder or a tuple of placeholders
    prev_state = previous_hook(state_forward)  # delay (h, c)
    f_x_h_c = over(x, prev_state) # apply the recurrent over
    # this returns a Function (x, (h_prev, c_prev)) -> (h, c)
    h_c = f_x_h_c.outputs
    replacements = { value_forward: value for (value_forward, value) in zip(list(_as_tuple(state_forward)), h_c) }
    f_x_h_c.replace_placeholders(replacements)  # resolves state_forward := h_c
    h = f_x_h_c.outputs[0]  # 'h' is a Variable (the output of a Function that computed it)
    if _trace_layers:
        _log_node(h)
        _log_node(combine([h.owner]))
    apply_x = combine([h])     # the Function that yielded 'h', so we get to know its inputs
    # apply_x is a Function x -> h
    return Block(apply_x, 'Recurrence', Record(over=over))
Exemplo n.º 3
0
def Recurrence(over, _inf=None, go_backwards=False, initial_state=None):
    # helper to compute previous value
    # can take a single Variable/Function or a tuple
    if go_backwards:
        UntestedBranchError("Recurrence, go_backwards option")
    def previous_hook(state):
        if hasattr(state, 'outputs'):
           outputs = state.outputs
           if len(outputs) > 1:  # if multiple then apply to each element
               return tuple([previous_hook(s) for s in outputs])
        # not a tuple: must be a 'scalar', i.e. a single element
        return past_value  (state, initial_state) if not go_backwards else \
               future_value(state, initial_state)
    x = Placeholder(_inf=_inf, name='recurrence_arg')
    prev_state_forward = over.create_placeholder() # create a placeholder or a tuple of placeholders
    f_x_h_c = over(x, prev_state_forward) # apply the recurrent over
    # this returns a Function (x, (h_prev, c_prev)) -> (h, c)
    h = f_x_h_c.outputs[0]  # 'h' is a Variable (the output of a Function that computed it)
    if _trace_layers:
        _log_node(h)
        _log_node(combine([h.owner]))
    prev_state = previous_hook(f_x_h_c)  # delay (h, c)
    replacements = { value_forward: value.output for (value_forward, value) in zip(list(prev_state_forward), list(prev_state)) }
    f_x_h_c.replace_placeholders(replacements)  # binds _h_c := prev_state
    apply_x = combine([h.owner])     # the Function that yielded 'h', so we get to know its inputs
    # apply_x is a Function x -> h
    _name_and_extend_Function(apply_x, 'Recurrence')
    if _trace_layers:
        _log_node(apply_x)
    return apply_x
Exemplo n.º 4
0
def LSTM(shape, cell_shape=None, use_peepholes=use_peepholes_default_or_False,
         init=init_default_or_glorot_uniform, init_bias=init_bias_default_or_0,
         enable_self_stabilization=enable_self_stabilization_default_or_False): # (x, (h, c))

    use_peepholes             = use_peepholes             if _is_given(use_peepholes)             else _current_default_options.use_peepholes
    enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization
    has_projection = cell_shape is not None
    has_aux = False

    if has_aux:
        UntestedBranchError("LSTM, has_aux option")

    shape = _as_tuple(shape)

    cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape
    if len(shape) != 1 or len(cell_shape) != 1:
        raise ValueError("LSTM: shape and cell_shape must be vectors (rank-1 tensors)")
        # otherwise we'd need to fix slicing and Param initializers

    stack_axis = -1  # stacking along the fastest-changing one, to match BS
    # determine stacking dimensions
    cell_shape_list = list(cell_shape)
    stacked_dim = cell_shape_list[0]
    cell_shape_list[stack_axis] = stacked_dim*4
    cell_shape_stacked = tuple(cell_shape_list)  # patched dims with stack_axis duplicated 4 times

    # parameters
    b  = Parameter(            cell_shape_stacked, init=init_bias, name='b')                              # a bias
    W  = Parameter(_INFERRED + cell_shape_stacked, init=init,      name='W')                              # input
    A  = Parameter(_INFERRED + cell_shape_stacked, init=init,      name='A') if has_aux else None         # aux input (optional)
    H  = Parameter(shape     + cell_shape_stacked, init=init,      name='H')                              # hidden-to-hidden
    Ci = Parameter(            cell_shape,         init=init,      name='Ci') if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Cf = Parameter(            cell_shape,         init=init,      name='Cf') if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Co = Parameter(            cell_shape,         init=init,      name='Co') if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}

    Wmr = Parameter(cell_shape + shape, init=init) if has_projection else None  # final projection

    Sdh = Stabilizer() if enable_self_stabilization else identity
    Sdc = Stabilizer() if enable_self_stabilization else identity
    Sct = Stabilizer() if enable_self_stabilization else identity
    Sht = Stabilizer() if enable_self_stabilization else identity

    def create_hc_placeholder():
        # we pass the known dimensions here, which makes dimension inference easier
        return (Placeholder(shape=shape, name='hPh'), Placeholder(shape=cell_shape, name='cPh')) # (h, c)

    # parameters to model function
    x = Placeholder(name='lstm_block_arg')
    prev_state = create_hc_placeholder()

    # formula of model function
    dh, dc = prev_state

    dhs = Sdh(dh)  # previous values, stabilized
    dcs = Sdc(dc)
    # note: input does not get a stabilizer here, user is meant to do that outside

    # projected contribution from input(s), hidden, and bias
    proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \
            b + times(x, W) + times(dhs, H)

    it_proj  = slice (proj4, stack_axis, 0*stacked_dim, 1*stacked_dim)  # split along stack_axis
    bit_proj = slice (proj4, stack_axis, 1*stacked_dim, 2*stacked_dim)
    ft_proj  = slice (proj4, stack_axis, 2*stacked_dim, 3*stacked_dim)
    ot_proj  = slice (proj4, stack_axis, 3*stacked_dim, 4*stacked_dim)

    # add peephole connection if requested
    def peep(x, c, C):
        return x + C * c if use_peepholes else x

    it = sigmoid (peep (it_proj, dcs, Ci))        # input gate(t)
    bit = it * tanh (bit_proj)                    # applied to tanh of input network

    ft = sigmoid (peep (ft_proj, dcs, Cf))        # forget-me-not gate(t)
    bft = ft * dc                                 # applied to cell(t-1)

    ct = bft + bit                                # c(t) is sum of both

    ot = sigmoid (peep (ot_proj, Sct(ct), Co))    # output gate(t)
    ht = ot * tanh (ct)                           # applied to tanh(cell(t))

    c = ct                                        # cell value
    h = times(Sht(ht), Wmr) if has_projection else \
        ht

    _name_node(h, 'h')
    if _trace_layers:
        _log_node(h)  # this looks right
    _name_node(c, 'c')

    # TODO: figure out how to do scoping, and also rename all the apply... to expression
    apply_x_h_c = combine ([h, c])
    # return to caller a helper function to create placeholders for recurrence
    # Note that this function will only exist in the object returned here, but not any cloned version of it.
    apply_x_h_c.create_placeholder = create_hc_placeholder
    #return Block(apply_x_h_c, 'LSTM') # BUGBUG: fails with "RuntimeError: A Function instance with more than one output cannot be implicitly converted to a Variable"
    return apply_x_h_c
Exemplo n.º 5
0
def LSTM(shape,
         _inf,
         cell_shape=None,
         use_peepholes=False,
         init=_default_initializer,
         init_bias=0,
         enable_self_stabilization=False):  # (x, (h, c))
    has_projection = cell_shape is not None
    has_aux = False

    if has_aux:
        UntestedBranchError("LSTM, has_aux option")
    if enable_self_stabilization:
        UntestedBranchError("LSTM, enable_self_stabilization option")

    shape = _as_tuple(shape)

    cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape

    #stack_axis = -1  #
    stack_axis = 0  # BUGBUG: should be -1, i.e. the fastest-changing one, to match BS
    # determine stacking dimensions
    cell_shape_list = list(cell_shape)
    stacked_dim = cell_shape_list[0]
    cell_shape_list[stack_axis] = stacked_dim * 4
    cell_shape_stacked = tuple(
        cell_shape_list)  # patched dims with stack_axis duplicated 4 times

    # parameters
    b = Parameter(cell_shape_stacked, init=init_bias, name='b')  # a bias
    W = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='W')  # input
    A = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='A') if has_aux else None  # aux input (optional)
    H = Parameter(shape + cell_shape_stacked, init=init,
                  name='H')  # hidden-to-hidden
    Ci = Parameter(
        cell_shape, init=init, name='Ci'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Cf = Parameter(
        cell_shape, init=init, name='Cf'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Co = Parameter(
        cell_shape, init=init, name='Co'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}

    Wmr = ParameterTensor(
        cell_shape + shape, init=init, init_value_scale=init_value_scale
    ) if has_projection else None  # final projection

    Sdh = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))
    Sdc = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sct = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sht = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))

    def create_hc_placeholder():
        return (Placeholder(_inf=_inf.with_shape(shape), name='hPh'),
                Placeholder(_inf=_inf.with_shape(cell_shape),
                            name='cPh'))  # (h, c)

    # parameters to model function
    x = Placeholder(_inf=_inf, name='lstm_block_arg')
    prev_state = create_hc_placeholder()

    # formula of model function
    dh, dc = prev_state

    dhs = Sdh(dh)  # previous values, stabilized
    dcs = Sdc(dc)
    # note: input does not get a stabilizer here, user is meant to do that outside

    # projected contribution from input(s), hidden, and bias
    proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \
            b + times(x, W) + times(dhs, H)

    it_proj = slice(proj4, stack_axis, 0 * stacked_dim,
                    1 * stacked_dim)  # split along stack_axis
    bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim)
    ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim)
    ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim)

    # add peephole connection if requested
    def peep(x, c, C):
        return x + C * c if use_peepholes else x

    it = sigmoid(peep(it_proj, dcs, Ci))  # input gate(t)
    bit = it * tanh(bit_proj)  # applied to tanh of input network

    ft = sigmoid(peep(ft_proj, dcs, Cf))  # forget-me-not gate(t)
    bft = ft * dc  # applied to cell(t-1)

    ct = bft + bit  # c(t) is sum of both

    ot = sigmoid(peep(ot_proj, Sct(ct), Co))  # output gate(t)
    ht = ot * tanh(ct)  # applied to tanh(cell(t))

    c = ct  # cell value
    h = times(Sht(ht), Wmr) if has_projection else \
        ht

    _name_node(h, 'h')
    if _trace_layers:
        _log_node(h)  # this looks right
    _name_node(c, 'c')

    # TODO: figure out how to do scoping, and also rename all the apply... to expression
    apply_x_h_c = combine([h, c])
    # return to caller a helper function to create placeholders for recurrence
    apply_x_h_c.create_placeholder = create_hc_placeholder
    _name_and_extend_Function(apply_x_h_c, 'LSTM')
    return apply_x_h_c