def Stabilizer( steepness=4, enable_self_stabilization=enable_self_stabilization_default_or_False): if _is_given(enable_self_stabilization): raise NotImplementedError( 'Stagbilizer: enable_self_stabilization flag not implemented yet') #enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization #if not enable_self_stabilization: # disabled (typically through global option) # return identity # parameters bound to this Function param = Parameter((1), init=0.99537863, name='stabilizer_param' ) # 1/steepness*ln (e^steepness-1) for steepness==4 #param = Parameter((1), init=1, name='stabilizer_param') # 1/steepness*ln (e^steepness-1) for steepness==4 # TODO: compute this strange value directly in Python # expression x = Placeholder(name='stabilizer_arg') # sharpened Softplus: 1/steepness ln(1+e^{steepness*beta}) # this behaves linear for weights around 1, yet guarantees positiveness # TODO: risk of confusion; can these functions be namespaced? beta = log(1 + exp(steepness * param)) * ( 1 / steepness ) # perf BUGBUG: "log() / steepness" should optimize to the samething apply_x = beta * x return Block(apply_x, 'Stabilizer', Record(beta=beta))
def attention_model(context_memory, query_memory, init_status, hidden_dim, att_dim, max_steps=5, init=glorot_uniform()): """ Create the attention model for reasonet Args: context_memory: Context memory query_memory: Query memory init_status: Intialize status hidden_dim: The dimention of hidden state att_dim: The dimention of attention max_step: Maxuim number of step to revisit the context memory """ gru = gru_cell((hidden_dim * 2, ), name='control_status') status = init_status output = [None] * max_steps * 2 sum_prob = None context_cos_sim = project_cosine_sim(att_dim, name='context_attention') query_cos_sim = project_cosine_sim(att_dim, name='query_attention') ans_cos_sim = project_cosine_sim(att_dim, name='candidate_attention') stop_gate = termination_gate(name='terminate_prob') prev_stop = 0 for step in range(max_steps): context_attention_weight = context_cos_sim(status, context_memory) query_attention_weight = query_cos_sim(status, query_memory) context_attention = sequence.reduce_sum(times(context_attention_weight, context_memory), name='C-Att') query_attention = sequence.reduce_sum(times(query_attention_weight, query_memory), name='Q-Att') attention = ops.splice(query_attention, context_attention, name='att-sp') status = gru(attention, status).output termination_prob = stop_gate(status) ans_attention = ans_cos_sim(status, context_memory) output[step * 2] = ans_attention if step < max_steps - 1: stop_prob = prev_stop + ops.log(termination_prob, name='log_stop') else: stop_prob = prev_stop output[step * 2 + 1] = sequence.broadcast_as( ops.exp(stop_prob, name='exp_log_stop'), output[step * 2], name='Stop_{0}'.format(step)) prev_stop += ops.log(1 - termination_prob, name='log_non_stop') final_ans = None for step in range(max_steps): if final_ans is None: final_ans = output[step * 2] * output[step * 2 + 1] else: final_ans += output[step * 2] * output[step * 2 + 1] combine_func = combine(output + [final_ans], name='Attention_func') return combine_func
def seq_softmax(x, name=''): """ Compute softmax along with a squence values """ x_exp = exp((x - seq_max(x)) * 10) x_softmax = element_divide(x_exp, sequence.broadcast_as( sequence.reduce_sum(x_exp), x), name=name) return x_softmax
def Stabilizer(steepness=4, enable_self_stabilization=enable_self_stabilization_default_or_False, name=''): if _is_given(enable_self_stabilization): raise NotImplementedError('Stagbilizer: enable_self_stabilization flag not implemented yet') #enable_self_stabilization = enable_self_stabilization if _is_given(enable_self_stabilization) else _current_default_options.enable_self_stabilization #if not enable_self_stabilization: # disabled (typically through global option) # return identity # parameters bound to this Function param = Parameter((1), init=0.99537863, name='stabilizer_param') # 1/steepness*ln (e^steepness-1) for steepness==4 #param = Parameter((1), init=1, name='stabilizer_param') # 1/steepness*ln (e^steepness-1) for steepness==4 # TODO: compute this strange value directly in Python # expression x = Placeholder(name='stabilizer_arg') # sharpened Softplus: 1/steepness ln(1+e^{steepness*beta}) # this behaves linear for weights around 1, yet guarantees positiveness # TODO: risk of confusion; can these functions be namespaced? beta = log (1 + exp (steepness * param)) * (1 / steepness) # perf BUGBUG: "log() / steepness" should optimize to the samething apply_x = beta * x return Block(apply_x, 'Stabilizer', name, Record(beta=beta), make_block=True)