def merge_experts_contribution(alpha_list, concat_outputs): # weighted sum on locations contributions = [] for ii in range(num_user): alpha = util.slice_layer(1, ii, ii + 1)(alpha_list) contribution = util.slice_layer(1, ii, ii + 1)(concat_outputs) contributions.append(multiply([alpha, contribution])) contributions = Lambda(lambda x: K.concatenate(x, axis=1))(contributions) outputs = reduce_sum_layer(contributions) outputs = expand_dim_layer(outputs) return outputs
def merge_experts_contribution2(alpha_list, concat_hidden): # weighted sum on hidden states contributions = [] for ii in range(num_user): alpha = util.slice_layer(1, ii, ii + 1)(alpha_list) contribution = util.slice_layer(1, ii, ii + 1)(concat_hidden) contributions.append(multiply([alpha, contribution])) contributions = Lambda(lambda x: K.concatenate(x, axis=1))(contributions) outputs = reduce_sum_layer(contributions) final_dense = Dense(num_decoder_tokens, activation='tanh') outputs = final_dense(outputs) outputs = expand_dim_layer(outputs) return outputs
def get_CNN_fea(saliency_inputs, time_ind, final_dim=256): saliency_inputs_slice = util.slice_layer(1, time_ind, time_ind + 1)(saliency_inputs) _saliency = conv1(get_dim1_layer(saliency_inputs_slice)) _saliency = conv2(_saliency) _saliency = pooling(_saliency) _saliency = conv3(_saliency) _saliency = conv4(_saliency) _saliency = pooling(_saliency) _saliency = conv5(_saliency) _saliency = Flatten()(_saliency) _saliency = Dense(final_dim, activation='relu')(_saliency) return _saliency
def likelihood_loss(y_true, y_pred): """ if we assume the distribution follows the N(mean_pred,var_pred), then we can use the ground truth samples to compute the likelihood. Use the NLL as the cost. """ #Note that var=sigma**2 ux = util.slice_layer(2, 0, 1)(y_pred) uy = util.slice_layer(2, 1, 2)(y_pred) uz = util.slice_layer(2, 2, 3)(y_pred) varx = util.slice_layer(2, 3, 4)(y_pred) vary = util.slice_layer(2, 4, 5)(y_pred) varz = util.slice_layer(2, 5, 6)(y_pred) cliplayer = Lambda( lambda x: K.clip(K.abs(x), min_value=0.0001, max_value=2)) cliplayer2 = Lambda(lambda x: K.clip(x, min_value=-2000, max_value=2000)) varx = cliplayer(varx) vary = cliplayer(vary) varz = cliplayer(varz) ux = K.repeat_elements(ux, 30, axis=-1) uy = K.repeat_elements(uy, 30, axis=-1) uz = K.repeat_elements(uz, 30, axis=-1) varx = K.repeat_elements(varx, 30, axis=-1) vary = K.repeat_elements(vary, 30, axis=-1) varz = K.repeat_elements(varz, 30, axis=-1) x = y_true[:, :, 0::3] y = y_true[:, :, 1::3] z = y_true[:, :, 2::3] lossx = K.log(varx + K.epsilon()) + ((x - ux)**2) / (varx + K.epsilon()) lossy = K.log(vary + K.epsilon()) + ((y - uy)**2) / (vary + K.epsilon()) lossz = K.log(varz + K.epsilon()) + ((z - uz)**2) / (varz + K.epsilon()) # lossx = varx-1+ ((x-ux)**2)/(varx+ K.epsilon()) # lossy = vary-1+ ((y-uy)**2)/(vary+ K.epsilon()) # lossz = varz-1+ ((z-uz)**2)/(varz+ K.epsilon()) lossx = cliplayer2(lossx) lossy = cliplayer2(lossy) lossz = cliplayer2(lossz) #constraint on x,y,z lambda_xyz = 0 lossxyz = lambda_xyz * (1 - (ux**2 + uy**2 + uz**2))**2 loss = K.mean(K.sum(K.sum(lossx + lossy + lossz + lossxyz, axis=2), axis=1)) return loss / cfg.running_length / cfg.fps
return_state=True) encoder_dense = Dense(3, activation='tanh') if cfg.predict_mean_var: decoder_dense = Dense(num_decoder_tokens, activation=None) else: decoder_dense = Dense(3, activation=None) ## concat states all_outputs = [] inputs = decoder_inputs all_outputs_target_past = [] for time_ind in range(max_encoder_seq_length): #predict for target user's past (reconstruction) encoder_outputs_slice = slice_layer(1, time_ind, time_ind + 1)(pst_outputs_sqns) # get cnn feature cnn_input = get_dim1_layer( slice_layer(1, time_ind, time_ind + 1)(encoder_inputs_oth)) cnn_oth_output3 = _get_cnn_fea(cnn_input) concat_cnn_state = Concatenatelayer_dim3( [cnn_oth_output3, get_dim1_layer(encoder_outputs_slice)]) outputs = encoder_dense(concat_cnn_state) all_outputs_target_past.append(outputs) for time_ind in range(max_decoder_seq_length): # decoder fut_outputs_sqns0, fut_state_h, fut_state_c = convlstm_decoder([inputs] + states0) states0 = [fut_state_h, fut_state_c]
if shared_LSTM: lstm = LSTM(latent_dim, stateful=cfg.stateful_across_batch, return_state=True, return_sequences=True) if shared_WiVi: W_i = Dense(256, activation='relu') V_i = Dense(256, activation='relu') get_dim2_layer = Lambda(lambda x: x[:, :, 0, :]) expand_dim2_layer = Lambda(lambda x: K.expand_dims(x, 2)) oth_states_list = [] for user_ind in range(num_user - 1): if shared_LSTM: oth_states, state_h_oth, state_c_oth = lstm( get_dim2_layer( util.slice_layer(2, user_ind, user_ind + 1)(others_inputs))) else: oth_states, state_h_oth, state_c_oth = lstm_pool[user_ind]( get_dim2_layer( util.slice_layer(2, user_ind, user_ind + 1)(others_inputs))) oth_states_list.append(expand_dim2_layer(oth_states)) oth_states_list = Lambda(lambda x: K.concatenate(x, axis=2))( oth_states_list) #(batch,10,33,64) or (batch,20,33,64) # ************************************************************ def ui_usi_similarity(u_i_list, u_si_list): logit = reduce_sum_layer_1(multiply([u_si_list, u_i_list])) #(batch,34) alpha_list = Softmax()(logit) return alpha_list
# single layer LSTM if not cfg.input_mean_var: inputs = Input(shape=(None, num_encoder_tokens)) else: inputs = Input(shape=(None, num_decoder_tokens)) lstm = LSTM(latent_dim, return_state=True) # encoder_outputs, state_h, state_c = lstm(inputs) # states = [state_h, state_c] output_dense = Dense(num_decoder_tokens,activation='tanh') all_outputs = [] for time_ind in range(max_decoder_seq_length): this_inputs = util.slice_layer(1,time_ind,time_ind+1)(inputs) if time_ind==0: decoder_states, state_h, state_c = lstm(this_inputs)#no initial states else: decoder_states, state_h, state_c = lstm(this_inputs, initial_state=states) outputs = output_dense(decoder_states) all_outputs.append(expand_dim_layer(outputs)) # this_inputs = outputs states = [state_h, state_c] all_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs) model = Model(inputs, all_outputs) model.compile(optimizer='Adam', loss='mean_squared_error',metrics=['accuracy'])
return_state=True) if cfg.predict_mean_var: encoder_dense = Dense(6, activation='tanh') decoder_dense = Dense(6, activation=None) else: encoder_dense = Dense(3, activation='tanh') decoder_dense = Dense(3, activation=None) ## concat states all_outputs = [] all_outputs_oth = [] if not cfg.teacher_forcing: inputs = decoder_inputs else: inputs = util.slice_layer(1, 0, 1)(decoder_inputs) all_outputs_target_past = [] for time_ind in range(max_encoder_seq_length): #predict for others' past (reconstruction) outputs_sqns_oth_slice = util.slice_layer(1, time_ind, time_ind + 1)(outputs_sqns_oth) outputs_oth = pred_conv_lstm_dense(outputs_sqns_oth_slice) all_outputs_oth.append(outputs_oth) #predict for target user's past (reconstruction) if use_fclstm_tar: encoder_outputs_slice = util.slice_layer(1, time_ind, time_ind + 1)(encoder_outputs) else: encoder_outputs_slice = util.slice_layer(1, time_ind, time_ind +
# build decoder model if is_train: decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_input') else: decoder_inputs = Input(shape=(1, num_decoder_tokens)) # decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_gru = GRU(latent_dim, return_sequences=True, return_state=True) decoder_dense = Dense(num_decoder_tokens, activation='tanh') # latent_inputs = Input(shape=(vae_latent_dim,), name='z_sampling') #used as the initial states! # h0 = Dense(latent_dim, activation='tanh')(latent_inputs) h0 = Dense(latent_dim, activation='tanh')(encoder(encoder_inputs)[2]) all_outputs = [] if is_train: inputs = util.slice_layer(1, 0, 1)(decoder_inputs) else: inputs = decoder_inputs states = h0 for time_ind in range(max_decoder_seq_length): teacher_key = np.random.randint(1, max_decoder_seq_length / 2) # also use the same embedding for the decoder inputs = conv0(inputs) inputs = conv1(inputs) inputs = conv2(inputs) # decoder_states, state_h, state_c = decoder_lstm(inputs,initial_state=states) # states = [state_h, state_c] decoder_states, state_h = decoder_gru(inputs, initial_state=states) states = state_h outputs = decoder_dense(decoder_states) all_outputs.append(outputs)
decoder_dense = Dense(num_decoder_tokens,activation='tanh') all_outputs = [] inputs = decoder_inputs for time_ind in range(max_decoder_seq_length): # if cfg.include_time_ind: #as input # this_time_ind_input = util.slice_layer(1,time_ind,time_ind+1)(time_ind_input) # inputs = Concatenatelayer_1([inputs,this_time_ind_input]) if use_one_layer: decoder_states, state_h, state_c = decoder_lstm(inputs,initial_state=states) states = [state_h, state_c] if cfg.include_time_ind: #as embedding input this_time_ind_input = util.slice_layer(1,time_ind,time_ind+1)(time_ind_input) decoder_states = Concatenatelayer_1([decoder_states,this_time_ind_input]) outputs = decoder_dense(decoder_states) else: decoder1_outputs, state_decoder1_h, state_decoder1_c = decoder_lstm1(inputs, initial_state=decoder1_states_inputs) decoder1_states_inputs = [state_decoder1_h, state_decoder1_c] decoder2_outputs, state_decoder2_h, state_decoder2_c = decoder_lstm2(decoder1_outputs, initial_state=decoder2_states_inputs) decoder2_states_inputs = [state_decoder2_h, state_decoder2_c] outputs = decoder_dense(decoder2_outputs) all_outputs.append(outputs) inputs = outputs
all_outputs = [] inputs = decoder_inputs for time_ind in range(max_decoder_seq_length): decoder1_outputs, state_decoder1_h, state_decoder1_c = decoder_lstm1(inputs, initial_state=decoder1_states_inputs) decoder1_states_inputs = [state_decoder1_h, state_decoder1_c] decoder2_outputs, state_decoder2_h, state_decoder2_c = decoder_lstm2(decoder1_outputs,initial_state=decoder2_states_inputs) decoder2_states_inputs = [state_decoder2_h, state_decoder2_c] decoder_pred = decoder_dense(decoder2_outputs) if oth_from_past: #use the last 10 locations as the h_i gt_mean_var_oth = util.slice_layer(1,time_ind+max_encoder_seq_length-10,time_ind+max_encoder_seq_length)(others_inputs) #(,10,33,6) gt_mean_var_oth = Permute((2,3,1))(gt_mean_var_oth) #shape=(?, 33, 6, 10) gt_mean_var_oth = oth_past_dense(gt_mean_var_oth) #shape=(?, 33, 6, 1) gt_mean_var_oth = Permute((3,1,2))(gt_mean_var_oth) #permute back:shape=(?, 1, 33, 6) else: gt_mean_var_oth = util.slice_layer(1,time_ind,time_ind+1)(others_inputs) concat_outputs = Concatenatelayer([get_dim1_layer(gt_mean_var_oth),decoder_pred]) if mlp_mixing: concat_outputs = Flatten()(concat_outputs) outputs = mixing(concat_outputs) outputs = expand_dim_layer(outputs) if ame: # concat_outputs_flat = Flatten()(concat_outputs) # h_all = expand_dim_layer(concat_outputs_flat) #(batch,1,34*6) u_i_list = []
# outputs = pred_conv_lstm_conv3(outputs) # outputs = bnlayer3(outputs) # outputs = pred_conv_lstm_conv4(outputs) # outputs = bnlayer4(outputs) # outputs = pred_conv_lstm_conv5(outputs) # outputs = bnlayer5(outputs) # outputs = pred_conv_lstm_conv6(outputs) # residual # outputs = Add()([outputs,squeeze_for_residual(get_dim_layer(fut_outputs_sqns))]) # outputs = Add()([outputs,get_dim_layer(inputs)]) outputs = expand_dim_layer(outputs) outputs = expand_dim_layer(outputs) if cfg.predict_mean_var and cfg.sample_and_refeed: #for training ### generated from gaussian ux_temp = slice_layer(2, 0, 1)(outputs) uy_temp = slice_layer(2, 1, 2)(outputs) uz_temp = slice_layer(2, 2, 3)(outputs) varx_temp = slice_layer(2, 3, 4)(outputs) vary_temp = slice_layer(2, 4, 5)(outputs) varz_temp = slice_layer(2, 5, 6)(outputs) temp_newdata = expand_dim_layer( expand_dim_layer( Concatenatelayer1([ generate_fake_batch_layer([ux_temp, varx_temp]), generate_fake_batch_layer([uy_temp, vary_temp]), generate_fake_batch_layer([uz_temp, varz_temp]) ]))) inputs = temp_newdata else:
if not cfg.teacher_forcing: # 2-layer fclstm, without teacher forcing decoder1_outputs, state_decoder1_h, state_decoder1_c = decoder_lstm1(inputs, initial_state=decoder1_states_inputs) decoder1_states_inputs = [state_decoder1_h, state_decoder1_c] decoder2_outputs, state_decoder2_h, state_decoder2_c = decoder_lstm2(decoder1_outputs,initial_state=decoder2_states_inputs) decoder2_states_inputs = [state_decoder2_h, state_decoder2_c] if target_user_only: outputs = decoder_dense(decoder2_outputs) else: if model_others: # model others' trend if others_mlp: others_fut_inputs_slice = util.slice_layer(1,time_ind,time_ind+1)(others_fut_inputs) others_fut_inputs1 = Flatten()(others_fut_inputs_slice) others_fut_inputs1 = others_dense1(others_fut_inputs1) others_fut_inputs1 = others_dense2(others_fut_inputs1) if cfg.teacher_forcing: concat_state = Concatenatelayer([others_fut_inputs1,get_dim1_layer(util.slice_layer(1,time_ind,time_ind+1)(decoder2_outputs))]) else: concat_state = Concatenatelayer([others_fut_inputs1,get_dim1_layer(decoder2_outputs)]) outputs = expand_dim_layer(decoder_dense(concat_state)) elif others_lstm: #LSTM only others_fut_inputs2_slice = util.slice_layer(1,time_ind,time_ind+1)(others_fut_inputs2[0]) concat_state = Concatenatelayer([get_dim1_layer(others_fut_inputs2_slice),get_dim1_layer(decoder2_outputs)]) ###use Gated Linear Unit instead of concatenating # concat_state = GLU_layer([get_dim1_layer(others_fut_inputs2_slice),get_dim1_layer(decoder2_outputs)]) outputs = expand_dim_layer(decoder_dense(concat_state))
## concat states all_outputs = [] inputs = decoder_inputs for time_ind in range(max_decoder_seq_length): # Run the decoder on one timestep decoder_states, state_h, state_c = decoder_lstm(inputs,initial_state=states) # ### caution: it seems keras convLSTM is by default stateful, don't have to feed back last hidden states. # ### is this true? # fut_outputs, others_state_h, others_state_c = other_fut_lstm(others_fut_inputs) # # fut_outputs, others_state_h, others_state_c = other_fut_lstm(others_fut_inputs,initial_state=others_states)#erros?!!! # fut_outputs = identity_layer(fut_outputs_sqns[:,time_ind,:,:,:]) fut_outputs = util.slice_layer(1,time_ind,time_ind+1)(fut_outputs_sqns) convlstm_state = flatten_layer(fut_outputs) convlstm_state = flatten_conv_lstm_state_dense(convlstm_state) concat_state = Concatenatelayer([get_dim1_layer(decoder_states),convlstm_state]) outputs = decoder_dense(concat_state) outputs = expand_dim_layer(outputs) all_outputs.append(outputs) inputs = outputs states = [state_h, state_c] # others_fut_inputs = others_fut_inputs #TODO feed gt others for next step # others_states = [others_state_h, others_state_c] # Concatenate all predictions
all_outputs = [] inputs = decoder_inputs for time_ind in range(max_decoder_seq_length): if not cfg.teacher_forcing: # 2-layer fclstm, without teacher forcing decoder1_outputs, state_decoder1_h, state_decoder1_c = decoder_lstm1( inputs, initial_state=decoder1_states_inputs) decoder1_states_inputs = [state_decoder1_h, state_decoder1_c] decoder2_outputs, state_decoder2_h, state_decoder2_c = decoder_lstm2( decoder1_outputs, initial_state=decoder2_states_inputs) decoder2_states_inputs = [state_decoder2_h, state_decoder2_c] if model_others: # model others' trend if others_mlp: others_fut_inputs_slice = util.slice_layer(1, time_ind, time_ind + 1)(others_fut_inputs) others_fut_inputs1 = Flatten()(others_fut_inputs_slice) others_fut_inputs1 = others_dense1(others_fut_inputs1) others_fut_inputs1 = others_dense2(others_fut_inputs1) if cfg.teacher_forcing: concat_state = Concatenatelayer([ others_fut_inputs1, get_dim1_layer( util.slice_layer(1, time_ind, time_ind + 1)(decoder2_outputs)) ]) else: concat_state = Concatenatelayer( [others_fut_inputs1, get_dim1_layer(decoder2_outputs)]) outputs = expand_dim_layer(decoder_dense(concat_state))