Exemplo n.º 1
0
def Stabilizer(
        steepness=4,
        enable_self_stabilization=enable_self_stabilization_default_or_False):
    if _is_given(enable_self_stabilization):
        raise NotImplementedError(
            'Stagbilizer: enable_self_stabilization flag not implemented yet')
    #enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization
    #if not enable_self_stabilization: # disabled (typically through global option)
    #    return identity

    # parameters bound to this Function
    param = Parameter((1), init=0.99537863, name='stabilizer_param'
                      )  # 1/steepness*ln (e^steepness-1) for steepness==4
    #param = Parameter((1), init=1, name='stabilizer_param')  # 1/steepness*ln (e^steepness-1) for steepness==4
    # TODO: compute this strange value directly in Python

    # expression
    x = Placeholder(name='stabilizer_arg')

    # sharpened Softplus: 1/steepness ln(1+e^{steepness*beta})
    # this behaves linear for weights around 1, yet guarantees positiveness
    # TODO: risk of confusion; can these functions be namespaced?
    beta = log(1 + exp(steepness * param)) * (
        1 / steepness
    )  # perf BUGBUG: "log() / steepness" should optimize to the samething
    apply_x = beta * x
    return Block(apply_x, 'Stabilizer', Record(beta=beta))
Exemplo n.º 2
0
def attention_model(context_memory,
                    query_memory,
                    init_status,
                    hidden_dim,
                    att_dim,
                    max_steps=5,
                    init=glorot_uniform()):
    """
  Create the attention model for reasonet
  Args:
    context_memory: Context memory
    query_memory: Query memory
    init_status: Intialize status
    hidden_dim: The dimention of hidden state
    att_dim: The dimention of attention
    max_step: Maxuim number of step to revisit the context memory
  """
    gru = gru_cell((hidden_dim * 2, ), name='control_status')
    status = init_status
    output = [None] * max_steps * 2
    sum_prob = None
    context_cos_sim = project_cosine_sim(att_dim, name='context_attention')
    query_cos_sim = project_cosine_sim(att_dim, name='query_attention')
    ans_cos_sim = project_cosine_sim(att_dim, name='candidate_attention')
    stop_gate = termination_gate(name='terminate_prob')
    prev_stop = 0
    for step in range(max_steps):
        context_attention_weight = context_cos_sim(status, context_memory)
        query_attention_weight = query_cos_sim(status, query_memory)
        context_attention = sequence.reduce_sum(times(context_attention_weight,
                                                      context_memory),
                                                name='C-Att')
        query_attention = sequence.reduce_sum(times(query_attention_weight,
                                                    query_memory),
                                              name='Q-Att')
        attention = ops.splice(query_attention,
                               context_attention,
                               name='att-sp')
        status = gru(attention, status).output
        termination_prob = stop_gate(status)
        ans_attention = ans_cos_sim(status, context_memory)
        output[step * 2] = ans_attention
        if step < max_steps - 1:
            stop_prob = prev_stop + ops.log(termination_prob, name='log_stop')
        else:
            stop_prob = prev_stop
        output[step * 2 + 1] = sequence.broadcast_as(
            ops.exp(stop_prob, name='exp_log_stop'),
            output[step * 2],
            name='Stop_{0}'.format(step))
        prev_stop += ops.log(1 - termination_prob, name='log_non_stop')

    final_ans = None
    for step in range(max_steps):
        if final_ans is None:
            final_ans = output[step * 2] * output[step * 2 + 1]
        else:
            final_ans += output[step * 2] * output[step * 2 + 1]
    combine_func = combine(output + [final_ans], name='Attention_func')
    return combine_func
Exemplo n.º 3
0
def seq_softmax(x, name=''):
    """
  Compute softmax along with a squence values
  """
    x_exp = exp((x - seq_max(x)) * 10)
    x_softmax = element_divide(x_exp,
                               sequence.broadcast_as(
                                   sequence.reduce_sum(x_exp), x),
                               name=name)
    return x_softmax
Exemplo n.º 4
0
def Stabilizer(steepness=4, enable_self_stabilization=enable_self_stabilization_default_or_False, name=''):
    if _is_given(enable_self_stabilization):
        raise NotImplementedError('Stagbilizer: enable_self_stabilization flag not implemented yet')
    #enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization
    #if not enable_self_stabilization: # disabled (typically through global option)
    #    return identity

    # parameters bound to this Function
    param = Parameter((1), init=0.99537863, name='stabilizer_param')  # 1/steepness*ln (e^steepness-1) for steepness==4
    #param = Parameter((1), init=1, name='stabilizer_param')  # 1/steepness*ln (e^steepness-1) for steepness==4
    # TODO: compute this strange value directly in Python

    # expression
    x = Placeholder(name='stabilizer_arg')

    # sharpened Softplus: 1/steepness ln(1+e^{steepness*beta})
    # this behaves linear for weights around 1, yet guarantees positiveness
    # TODO: risk of confusion; can these functions be namespaced?
    beta = log (1 + exp (steepness * param)) * (1 / steepness)   # perf BUGBUG: "log() / steepness" should optimize to the samething
    apply_x = beta * x
    return Block(apply_x, 'Stabilizer', name, Record(beta=beta), make_block=True)