Python is_LSTM示例

编程语言: Python

命名空间/包名称: utils.model_util

方法/功能: is_LSTM

hotexamples.com的示例: 6

Python is_LSTM - 已找到6个示例。这些是从开源项目中提取的最受好评的utils.model_util.is_LSTM现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def localise_by_random_selection(number_of_place_to_fix, target_weights):
    """
	randomly select places to fix
	"""
    from collections.abc import Iterable

    total_indices = []
    for idx_to_tl, vs in target_weights.items():
        t_w, lname = vs
        if not model_util.is_LSTM(lname):
            l_indices = list(np.ndindex(t_w.shape))
            total_indices.extend(
                list(zip([idx_to_tl] * len(l_indices), l_indices)))
        else:  # to handle the layers with more than one weights (e.g., LSTM)
            for idx_to_w, a_t_w in enumerate(t_w):
                l_indices = list(np.ndindex(a_t_w.shape))
                total_indices.extend(
                    list(
                        zip([(idx_to_tl, idx_to_w)] * len(l_indices),
                            l_indices)))

    np.random.shuffle(total_indices)
    if number_of_place_to_fix > 0 and number_of_place_to_fix < len(
            total_indices):
        selected_indices = np.random.choice(np.arange(len(total_indices)),
                                            number_of_place_to_fix,
                                            replace=False)
        indices_to_places_to_fix = [
            total_indices[idx] for idx in selected_indices
        ]
    else:
        indices_to_places_to_fix = total_indices

    return indices_to_places_to_fix

示例#2

显示文件

def get_target_weights(model, path_to_keras_model, indices_to_target=None):
    """
	return indices to weight layers denoted by indices_to_target, or return all trainable layers
	"""
    import re
    targeting_clname_pattns = ['Dense*', 'Conv*',
                               '.*LSTM*']  #if not target_all else None
    is_target = lambda clname, targets: (targets is None) or any(
        [bool(re.match(t, clname)) for t in targets])

    if model is None:
        assert path_to_keras_model is not None
        model = load_model(path_to_keras_model, compile=False)

    target_weights = {}  # key = layer index, value: [weight value, layer name]
    if indices_to_target is not None:
        num_layers = len(model.layers)
        indices_to_target = [
            idx if idx >= 0 else num_layers + idx for idx in indices_to_target
        ]

        for i, layer in enumerate(model.layers):
            if i in indices_to_target:
                ws = layer.get_weights()
                assert len(ws) > 0, "the target layer doesn't have weight"
                #target_weights[i] = ws[0] # we will target only the weight, and not the bias
                target_weights[i] = [ws[0], type(layer).__name__]
    else:
        for i, layer in enumerate(model.layers):
            class_name = type(layer).__name__
            if is_target(class_name, targeting_clname_pattns):
                ws = layer.get_weights()
                if len(ws):  # has weight
                    if model_util.is_FC(class_name) or model_util.is_C2D(
                            class_name):
                        target_weights[i] = [ws[0], type(layer).__name__]
                    elif model_util.is_LSTM(class_name):
                        # for LSTM, even without bias, a fault can be in the weights of the kernel
                        # or the recurrent kernel (hidden state handling)
                        assert len(ws) == 3, ws
                        # index 0: for the kernel, index 1: for the recurrent kernel
                        target_weights[i] = [ws[:-1], type(layer).__name__]
                    else:
                        print("{} not supported yet".format(class_name))
                        assert False

    return target_weights

示例#3

显示文件

def gen_and_run_model(mdl,
                      path_to_patch,
                      X,
                      y,
                      num_label,
                      has_lstm_layer=False,
                      is_multi_label=True,
                      need_act=False,
                      batch_size=None):
    """
	** this is the part that should be fixed (this is a temporary fix)
	"""
    import pandas as pd
    import utils.kfunc_util as kfunc_util
    import utils.data_util as data_util
    from collections.abc import Iterable

    act_func = tf.nn.relu if need_act else None
    patch = pd.read_pickle(path_to_patch)
    indices_to_tls = sorted(list(patch.keys()),
                            key=lambda v: v[0]
                            if isinstance(v, Iterable) else v)
    if is_multi_label:
        formated_y = data_util.format_label(y, num_label)
    else:
        formated_y = y

    if not has_lstm_layer:
        k_fn_mdl_lst = kfunc_util.generate_base_mdl(
            mdl,
            X,
            indices_to_tls=indices_to_tls,
            batch_size=batch_size,
            act_func=act_func)

        predictions = kfunc_util.compute_kfunc(
            k_fn_mdl_lst,
            formated_y, [patch[idx] for idx in indices_to_tls],
            batch_size=batch_size)[0]
    else:
        from utils.gen_frame_graph import build_mdl_lst
        from tensorflow.keras.models import Model

        # compute previous outputs
        min_idx_to_tl = np.min([
            idx if not isinstance(idx, Iterable) else idx[0]
            for idx in indices_to_tls
        ])
        prev_l = mdl.layers[min_idx_to_tl - 1 if min_idx_to_tl > 0 else 0]
        if model_util.is_Input(
                type(prev_l).__name__):  # previous layer is an input layer
            prev_outputs = X
        else:  # otherwise, compute the output of the previous layer
            t_mdl = Model(inputs=mdl.input, outputs=prev_l.output)
            prev_outputs = t_mdl.predict(X)
        ##
        k_fn_mdl = build_mdl_lst(mdl, prev_outputs.shape[1:], indices_to_tls)
        init_weights = {}
        init_biases = {}
        for idx_to_tl in indices_to_tls:
            idx_to_tl = idx_to_tl[0] if isinstance(idx_to_tl,
                                                   tuple) else idx_to_tl
            ws = mdl.layers[idx_to_tl].get_weights()
            lname = type(mdl.layers[idx_to_tl]).__name__
            if model_util.is_FC(lname) or model_util.is_C2D(lname):
                init_weights[idx_to_tl] = ws[0]
                init_biases[idx_to_tl] = ws[1]
            elif model_util.is_LSTM(lname):
                # get only the kernel and recurrent kernel, not the bias
                for i in range(2):
                    init_weights[(idx_to_tl, i)] = ws[i]
                init_biases[idx_to_tl] = ws[-1]
            else:
                print("Not supported layer: {}".format(lname))
                assert False

        chunks = data_util.return_chunks(len(X), batch_size=batch_size)
        predictions = model_util.predict_with_new_delat(
            k_fn_mdl, patch, min_idx_to_tl, init_biases, init_weights,
            prev_outputs, chunks)

    if len(predictions.shape) > len(
            formated_y.shape) and predictions.shape[1] == 1:
        predictions = np.squeeze(predictions, axis=1)

    if is_multi_label:
        pred_labels = np.argmax(predictions, axis=1)
    else:
        pred_labels = np.round(predictions).flatten()

    aft_preds = []
    aft_preds_column = ['index', 'true', 'pred', 'flag']
    for i, (true_label, pred_label) in enumerate(zip(y, pred_labels)):
        aft_preds.append([i, true_label, pred_label, true_label == pred_label])

    aft_pred_df = pd.DataFrame(aft_preds, columns=aft_preds_column)
    return aft_pred_df

示例#4

显示文件

def localise_by_gradient(X,
                         y,
                         indices_to_chgd,
                         indices_to_unchgd,
                         target_weights,
                         path_to_keras_model=None,
                         is_multi_label=True):
    """
	localise using chgd & unchgd
	"""
    from collections.abc import Iterable

    total_cands = {}
    # set loss func
    loss_func = model_util.get_loss_func(is_multi_label=is_multi_label)
    ## slice inputs
    for idx_to_tl, vs in target_weights.items():
        t_w, lname = vs
        #print ("targeting layer {} ({})".format(idx_to_tl, lname))
        if model_util.is_C2D(lname) or model_util.is_FC(
                lname):  # either FC or C2D
            # for changed inputs
            grad_scndcr_for_chgd = compute_gradient_to_loss(
                path_to_keras_model,
                idx_to_tl,
                X[indices_to_chgd],
                y[indices_to_chgd],
                loss_func=loss_func,
                by_batch=True)
            # for unchanged inputs
            grad_scndcr_for_unchgd = compute_gradient_to_loss(
                path_to_keras_model,
                idx_to_tl,
                X[indices_to_unchgd],
                y[indices_to_unchgd],
                loss_func=loss_func,
                by_batch=True)

            assert t_w.shape == grad_scndcr_for_chgd.shape, "{} vs {}".format(
                t_w.shape, grad_scndcr_for_chgd.shape)
            total_cands[idx_to_tl] = {
                'shape':
                grad_scndcr_for_chgd.shape,
                'costs':
                grad_scndcr_for_chgd.flatten() /
                (1. + grad_scndcr_for_unchgd.flatten())
            }
        elif model_util.is_LSTM(lname):
            # for changed inputs
            grad_scndcr_for_chgd = compute_gradient_to_loss(
                path_to_keras_model,
                idx_to_tl,
                X[indices_to_chgd],
                y[indices_to_chgd],
                loss_func=loss_func,
                by_batch=True)
            # for unchanged inptus
            grad_scndcr_for_unchgd = compute_gradient_to_loss(
                path_to_keras_model,
                idx_to_tl,
                X[indices_to_unchgd],
                y[indices_to_unchgd],
                loss_func=loss_func,
                by_batch=True)

            # check the shape of kernel (index = 0) and recurrent kernel (index =1) weights
            assert t_w[0].shape == grad_scndcr_for_chgd[
                0].shape, "{} vs {}".format(t_w[0].shape,
                                            grad_scndcr_for_chgd[0].shape)
            assert t_w[1].shape == grad_scndcr_for_chgd[
                1].shape, "{} vs {}".format(t_w[1].shape,
                                            grad_scndcr_for_chgd[1].shape)

            # generate total candidates
            total_cands[idx_to_tl] = {'shape': [], 'costs': []}
            for _grad_scndr_chgd, _grad_scndr_unchgd in zip(
                    grad_scndcr_for_chgd, grad_scndcr_for_unchgd):
                #_grad_scndr_chgd & _grad_scndr_unchgd -> can be for either kernel or recurrent kernel
                _costs = _grad_scndr_chgd.flatten() / (
                    1. + _grad_scndr_unchgd.flatten())
                total_cands[idx_to_tl]['shape'].append(_grad_scndr_chgd.shape)
                total_cands[idx_to_tl]['costs'].append(_costs)
        else:
            print("{} not supported yet".format(lname))
            assert False

    indices_to_tl = list(total_cands.keys())
    costs_and_keys = []
    for idx_to_tl in indices_to_tl:
        if not model_util.is_LSTM(target_weights[idx_to_tl][1]):
            for local_i, c in enumerate(total_cands[idx_to_tl]['costs']):
                cost_and_key = ([
                    idx_to_tl,
                    np.unravel_index(local_i, total_cands[idx_to_tl]['shape'])
                ], c)
                costs_and_keys.append(cost_and_key)
        else:
            num = len(total_cands[idx_to_tl]['shape'])
            for idx_to_w in range(num):
                for local_i, c in enumerate(
                        total_cands[idx_to_tl]['costs'][idx_to_w]):
                    cost_and_key = ([
                        (idx_to_tl, idx_to_w),
                        np.unravel_index(
                            local_i, total_cands[idx_to_tl]['shape'][idx_to_w])
                    ], c)
                    costs_and_keys.append(cost_and_key)

    sorted_costs_and_keys = sorted(costs_and_keys,
                                   key=lambda vs: vs[1],
                                   reverse=True)
    return sorted_costs_and_keys

示例#5

显示文件

def localise_by_chgd_unchgd(X,
                            y,
                            indices_to_chgd,
                            indices_to_unchgd,
                            target_weights,
                            path_to_keras_model=None,
                            is_multi_label=True):
    """
	Find those likely to be highly influential to the changed behaviour 
	while less influential to the unchanged behaviour
	"""
    from collections.abc import Iterable
    #loc_start_time = time.time()
    #print ("Layers to inspect", list(target_weights.keys()))
    # compute FI and GL with changed inputs
    #target_weights = {k:target_weights[k] for k in [2]}
    total_cands_chgd = compute_FI_and_GL(
        X,
        y,
        indices_to_chgd,
        target_weights,
        is_multi_label=is_multi_label,
        path_to_keras_model=path_to_keras_model)

    # compute FI and GL with unchanged inputs
    total_cands_unchgd = compute_FI_and_GL(
        X,
        y,
        indices_to_unchgd,
        target_weights,
        is_multi_label=is_multi_label,
        path_to_keras_model=path_to_keras_model)

    indices_to_tl = list(total_cands_chgd.keys())
    costs_and_keys = []
    indices_to_nodes = []
    shapes = {}
    for idx_to_tl in tqdm(indices_to_tl):
        if not model_util.is_LSTM(target_weights[idx_to_tl]
                                  [1]):  # we have only one weight to process
            #assert not isinstance(
            #	total_cands_unchgd[idx_to_tl]['shape'], Iterable),
            # 	type(total_cands_unchgd[idx_to_tl]['shape'])
            cost_from_chgd = total_cands_chgd[idx_to_tl]['costs']
            cost_from_unchgd = total_cands_unchgd[idx_to_tl]['costs']
            ## key: more influential to changed behaviour and less influential to unchanged behaviour
            costs_combined = cost_from_chgd / (1. + cost_from_unchgd
                                               )  # shape = (N,2)
            #costs_combined = cost_from_chgd
            shapes[idx_to_tl] = total_cands_chgd[idx_to_tl]['shape']

            for i, c in enumerate(costs_combined):
                costs_and_keys.append(([idx_to_tl, i], c))
                indices_to_nodes.append(
                    [idx_to_tl,
                     np.unravel_index(i, shapes[idx_to_tl])])
        else:  #
            #assert isinstance(
            #	total_cands_unchgd[idx_to_tl]['shape'], Iterable),
            #	type(total_cands_unchgd[idx_to_tl]['shape'])
            num = len(total_cands_unchgd[idx_to_tl]['shape'])
            shapes[idx_to_tl] = []
            for idx_to_pair in range(num):
                cost_from_chgd = total_cands_chgd[idx_to_tl]['costs'][
                    idx_to_pair]
                cost_from_unchgd = total_cands_unchgd[idx_to_tl]['costs'][
                    idx_to_pair]
                costs_combined = cost_from_chgd / (1. + cost_from_unchgd
                                                   )  # shape = (N,2)
                shapes[idx_to_tl].append(
                    total_cands_chgd[idx_to_tl]['shape'][idx_to_pair])

                for i, c in enumerate(costs_combined):
                    costs_and_keys.append(([(idx_to_tl, idx_to_pair), i], c))
                    indices_to_nodes.append([
                        (idx_to_tl, idx_to_pair),
                        np.unravel_index(i, shapes[idx_to_tl][idx_to_pair])
                    ])

    costs = np.asarray([vs[1] for vs in costs_and_keys])
    #t4 = time.time()
    _costs = costs.copy()
    is_efficient = np.arange(costs.shape[0])
    next_point_index = 0  # Next index in the is_efficient array to search for
    while next_point_index < len(_costs):
        nondominated_point_mask = np.any(_costs > _costs[next_point_index],
                                         axis=1)
        nondominated_point_mask[next_point_index] = True
        is_efficient = is_efficient[
            nondominated_point_mask]  # Remove dominated points
        _costs = _costs[nondominated_point_mask]
        next_point_index = np.sum(
            nondominated_point_mask[:next_point_index]) + 1

    pareto_front = [
        tuple(v)
        for v in np.asarray(indices_to_nodes, dtype=object)[is_efficient]
    ]
    #t5 = time.time()
    #print ("Time for computing the pareto front: {}".format(t5 - t4))
    #loc_end_time = time.time()
    #print ("Time for total localisation: {}".format(loc_end_time - loc_start_time))
    return pareto_front, costs_and_keys

示例#6

显示文件

def compute_FI_and_GL(X,
                      y,
                      indices_to_target,
                      target_weights,
                      is_multi_label=True,
                      path_to_keras_model=None):
    """
	compute FL and GL for the given inputs
	"""

    ## Now, start localisation !!! ##
    from sklearn.preprocessing import Normalizer
    from collections.abc import Iterable
    norm_scaler = Normalizer(norm="l1")
    total_cands = {}
    FIs = None
    grad_scndcr = None

    #t0 = time.time()
    ## slice inputs
    target_X = X[indices_to_target]
    target_y = y[indices_to_target]

    # get loss func
    loss_func = model_util.get_loss_func(is_multi_label=is_multi_label)
    model = None
    for idx_to_tl, vs in target_weights.items():
        t1 = time.time()
        t_w, lname = vs
        model = load_model(path_to_keras_model, compile=False)
        if idx_to_tl == 0:
            # meaning the model doesn't specify the input layer explicitly
            prev_output = target_X
        else:
            prev_output = model.layers[idx_to_tl - 1].output
        layer_config = model.layers[idx_to_tl].get_config()

        if model_util.is_FC(lname):
            from_front = []
            if idx_to_tl == 0 or idx_to_tl - 1 == 0:
                prev_output = target_X
            else:
                t_model = Model(inputs=model.input,
                                outputs=model.layers[idx_to_tl - 1].output)
                prev_output = t_model.predict(target_X)
            if len(prev_output.shape) == 3:
                prev_output = prev_output.reshape(prev_output.shape[0],
                                                  prev_output.shape[-1])

            for idx in tqdm(range(t_w.shape[-1])):
                assert int(
                    prev_output.shape[-1]) == t_w.shape[0], "{} vs {}".format(
                        int(prev_output.shape[-1]), t_w.shape[0])

                output = np.multiply(prev_output,
                                     t_w[:,
                                         idx])  # -> shape = prev_output.shape
                output = np.abs(output)
                output = norm_scaler.fit_transform(output)
                output = np.mean(output, axis=0)
                from_front.append(output)

            from_front = np.asarray(from_front)
            from_front = from_front.T
            from_behind = compute_gradient_to_output(path_to_keras_model,
                                                     idx_to_tl, target_X)
            #print ("shape", from_front.shape, from_behind.shape)
            FIs = from_front * from_behind
            ############ FI end #########

            # Gradient
            grad_scndcr = compute_gradient_to_loss(path_to_keras_model,
                                                   idx_to_tl,
                                                   target_X,
                                                   target_y,
                                                   loss_func=loss_func)
            # G end
        elif model_util.is_C2D(lname):
            is_channel_first = layer_config['data_format'] == 'channels_first'
            if idx_to_tl == 0 or idx_to_tl - 1 == 0:
                prev_output_v = target_X
            else:
                t_model = Model(inputs=model.input,
                                outputs=model.layers[idx_to_tl - 1].output)
                prev_output_v = t_model.predict(target_X)
            tr_prev_output_v = np.moveaxis(
                prev_output_v, [1, 2, 3],
                [3, 1, 2]) if is_channel_first else prev_output_v

            kernel_shape = t_w.shape[:2]
            strides = layer_config['strides']
            padding_type = layer_config['padding']
            if padding_type == 'valid':
                paddings = [0, 0]
            else:
                if padding_type == 'same':
                    #P = ((S-1)*W-S+F)/2
                    true_ws_shape = [t_w.shape[0],
                                     t_w.shape[-1]]  # Channel_in, Channel_out
                    paddings = [
                        int(((strides[i] - 1) * true_ws_shape[i] - strides[i] +
                             kernel_shape[i]) / 2) for i in range(2)
                    ]
                elif not isinstance(padding_type, str) and isinstance(
                        padding_type, Iterable):  # explicit paddings given
                    paddings = list(padding_type)
                    if len(paddings) == 1:
                        paddings = [paddings[0], paddings[0]]
                else:
                    print(
                        "padding type: {} not supported".format(padding_type))
                    paddings = [0, 0]
                    assert False

                # add padding
                if is_channel_first:
                    paddings_per_axis = [[0, 0], [0, 0],
                                         [paddings[0], paddings[0]],
                                         [paddings[1], paddings[1]]]
                else:
                    paddings_per_axis = [[0, 0], [paddings[0], paddings[0]],
                                         [paddings[1], paddings[1]], [0, 0]]

                tr_prev_output_v = np.pad(tr_prev_output_v,
                                          paddings_per_axis,
                                          mode='constant',
                                          constant_values=0)  # zero-padding

            if is_channel_first:
                num_kernels = int(prev_output.shape[1])  # Channel_in
            else:  # channels_last
                assert layer_config[
                    'data_format'] == 'channels_last', layer_config[
                        'data_format']
                num_kernels = int(prev_output.shape[-1])  # Channel_in
            assert num_kernels == t_w.shape[2], "{} vs {}".format(
                num_kernels, t_w.shape[2])
            #print ("t_w***", t_w.shape)

            # H x W
            if is_channel_first:
                # the last two (front two are # of inputs and # of kernels (Channel_in))
                input_shape = [int(v) for v in prev_output.shape[2:]]
            else:
                input_shape = [int(v) for v in prev_output.shape[1:-1]]

            # (W1−F+2P)/S+1, W1 = input volumne , F = kernel, P = padding
            n_mv_0 = int((input_shape[0] - kernel_shape[0] + 2 * paddings[0]) /
                         strides[0] + 1)  # H_out
            n_mv_1 = int((input_shape[1] - kernel_shape[1] + 2 * paddings[1]) /
                         strides[1] + 1)  # W_out

            n_output_channel = t_w.shape[-1]  # Channel_out
            from_front = []
            # move axis for easier computation
            for idx_ol in tqdm(range(n_output_channel)):  # t_w.shape[-1]
                for i in range(n_mv_0):  # H
                    for j in range(n_mv_1):  # W
                        curr_prev_output_slice = tr_prev_output_v[:, i *
                                                                  strides[0]:
                                                                  i *
                                                                  strides[0] +
                                                                  kernel_shape[
                                                                      0], :, :]
                        curr_prev_output_slice = curr_prev_output_slice[:, :, j * strides[
                            1]:j * strides[1] + kernel_shape[1], :]
                        output = curr_prev_output_slice * t_w[:, :, :, idx_ol]
                        sum_output = np.sum(np.abs(output))
                        output = output / sum_output
                        sum_output = np.nan_to_num(output, posinf=0.)
                        output = np.mean(output, axis=0)
                        from_front.append(output)

            from_front = np.asarray(from_front)
            #from_front.shape: [Channel_out * n_mv_0 * n_mv_1, F1, F2, Channel_in]
            if is_channel_first:
                from_front = from_front.reshape(
                    (n_output_channel, n_mv_0, n_mv_1, kernel_shape[0],
                     kernel_shape[1], int(prev_output.shape[1])))
            else:  # channels_last
                from_front = from_front.reshape(
                    (n_mv_0, n_mv_1, n_output_channel, kernel_shape[0],
                     kernel_shape[1], int(prev_output.shape[-1])))

            # [F1,F2,Channel_in, Channel_out, n_mv_0, n_mv_1]
            # 	or [F1,F2,Channel_in, n_mv_0, n_mv_1,Channel_out]
            from_front = np.moveaxis(from_front, [0, 1, 2], [3, 4, 5])
            # [Channel_out, H_out(n_mv_0), W_out(n_mv_1)]
            from_behind = compute_gradient_to_output(path_to_keras_model,
                                                     idx_to_tl,
                                                     target_X,
                                                     by_batch=True)

            #t1 = time.time()
            # [F1,F2,Channel_in, Channel_out, n_mv_0, n_mv_1] (channels_firs)
            # or [F1,F2,Channel_in,n_mv_0, n_mv_1,Channel_out] (channels_last)
            FIs = from_front * from_behind
            #t2 = time.time()
            #print ('Time for multiplying front and behind results: {}'.format(t2 - t1))
            #FIs = np.mean(np.mean(FIs, axis = -1), axis = -1) # [F1, F2, Channel_in, Channel_out]
            if is_channel_first:
                FIs = np.sum(np.sum(FIs, axis=-1),
                             axis=-1)  # [F1, F2, Channel_in, Channel_out]
            else:
                FIs = np.sum(np.sum(FIs, axis=-2),
                             axis=-2)  # [F1, F2, Channel_in, Channel_out]
            #t3 = time.time()
            #print ('Time for computing mean for FIs: {}'.format(t3 - t2))
            ## Gradient
            # will be [F1, F2, Channel_in, Channel_out]
            grad_scndcr = compute_gradient_to_loss(path_to_keras_model,
                                                   idx_to_tl,
                                                   target_X,
                                                   target_y,
                                                   by_batch=True,
                                                   loss_func=loss_func)
        elif model_util.is_LSTM(lname):  #
            from scipy.special import expit as sigmoid
            num_weights = 2
            assert len(t_w) == num_weights, t_w
            # t_w_kernel:
            # (input_feature_size, 4 * num_units). t_w_recurr_kernel: (num_units, 4 * num_units)
            t_w_kernel, t_w_recurr_kernel = t_w

            # get the previous output, which will be the input of the lstm
            if model_util.is_Input(type(model.layers[idx_to_tl - 1]).__name__):
                prev_output = target_X
            else:
                # shape = (batch_size, time_steps, input_feature_size)
                t_model = Model(inputs=model.input,
                                outputs=model.layers[idx_to_tl - 1].output)
                prev_output = t_model.predict(target_X)

            assert len(prev_output.shape) == 3, prev_output.shape
            num_features = prev_output.shape[
                -1]  # the dimension of features that will be processed by the model

            num_units = t_w_recurr_kernel.shape[0]
            assert t_w_kernel.shape[
                0] == num_features, "{} (kernel) vs {} (input)".format(
                    t_w_kernel.shape[0], num_features)

            # hidden state and cell state sequences computation
            # generate a temporary model that only contains the target lstm layer
            # but with the modification to return sequences of hidden and cell states
            temp_lstm_layer_inst = lstm_layer.LSTM_Layer(
                model.layers[idx_to_tl])
            hstates_sequence, cell_states_sequence = temp_lstm_layer_inst.gen_lstm_layer_from_another(
                prev_output)
            init_hstates, init_cell_states = lstm_layer.LSTM_Layer.get_initial_state(
                model.layers[idx_to_tl])
            if init_hstates is None:
                init_hstates = np.zeros((len(target_X), num_units))
            if init_cell_states is None:
                # shape = (batch_size, num_units)
                init_cell_states = np.zeros((len(target_X), num_units))

            # shape = (batch_size, time_steps + 1, num_units)
            hstates_sequence = np.insert(hstates_sequence,
                                         0,
                                         init_hstates,
                                         axis=1)
            # shape = (batch_size, time_steps + 1, num_units)
            cell_states_sequence = np.insert(cell_states_sequence,
                                             0,
                                             init_cell_states,
                                             axis=1)
            bias = model.layers[idx_to_tl].get_weights()[
                -1]  # shape = (4 * num_units,)
            indices_to_each_gates = np.array_split(np.arange(num_units * 4), 4)

            ## prepare all the intermediate outputs and the variables that will be used later
            idx_to_input_gate = 0
            idx_to_forget_gate = 1
            idx_to_cand_gate = 2
            idx_to_output_gate = 3

            # for kenerl, weight shape = (input_feature_size, num_units)
            # and for recurrent, (num_units, num_units), bias (num_units)
            # and the shape of all the intermedidate outpu is "(batch_size, time_step, num_units)"

            # input
            t_w_kernel_I = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_input_gate]]
            t_w_recurr_kernel_I = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_input_gate]]
            bias_I = bias[indices_to_each_gates[idx_to_input_gate]]
            I = sigmoid(
                np.dot(prev_output, t_w_kernel_I) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_I) +
                bias_I)

            # forget
            t_w_kernel_F = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_forget_gate]]
            t_w_recurr_kernel_F = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_forget_gate]]
            bias_F = bias[indices_to_each_gates[idx_to_forget_gate]]
            F = sigmoid(
                np.dot(prev_output, t_w_kernel_F) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_F) +
                bias_F)

            # cand
            t_w_kernel_C = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_cand_gate]]
            t_w_recurr_kernel_C = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_cand_gate]]
            bias_C = bias[indices_to_each_gates[idx_to_cand_gate]]
            C = np.tanh(
                np.dot(prev_output, t_w_kernel_C) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_C) +
                bias_C)

            # output
            t_w_kernel_O = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_output_gate]]
            t_w_recurr_kernel_O = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_output_gate]]
            bias_O = bias[indices_to_each_gates[idx_to_output_gate]]
            # shape = (batch_size, time_steps, num_units)
            O = sigmoid(
                np.dot(prev_output, t_w_kernel_O) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_O) +
                bias_O)

            # set arguments to compute forward impact for the neural weights from these four gates
            t_w_kernels = {
                'input': t_w_kernel_I,
                'forget': t_w_kernel_F,
                'cand': t_w_kernel_C,
                'output': t_w_kernel_O
            }
            t_w_recurr_kernels = {
                'input': t_w_recurr_kernel_I,
                'forget': t_w_recurr_kernel_F,
                'cand': t_w_recurr_kernel_C,
                'output': t_w_recurr_kernel_O
            }

            consts = {}
            consts['input'] = get_constants('input', F, I, C, O,
                                            cell_states_sequence)
            consts['forget'] = get_constants('forget', F, I, C, O,
                                             cell_states_sequence)
            consts['cand'] = get_constants('cand', F, I, C, O,
                                           cell_states_sequence)
            consts['output'] = get_constants('output', F, I, C, O,
                                             cell_states_sequence)

            # from_front's shape = (num_units, (num_features + num_units) * 4)
            # gate_orders = ['input', 'forget', 'cand', 'output']
            from_front, gate_orders = lstm_local_front_FI_for_target_all(
                prev_output, hstates_sequence[:, :-1, :], num_units,
                t_w_kernels, t_w_recurr_kernels, consts)

            from_front = from_front.T  # ((num_features + num_units) * 4, num_units)
            N_k_rk_w = int(from_front.shape[0] / 4)
            assert N_k_rk_w == num_features + num_units, "{} vs {}".format(
                N_k_rk_w, num_features + num_units)

            ## from behind
            from_behind = compute_gradient_to_output(
                path_to_keras_model, idx_to_tl, target_X,
                by_batch=True)  # shape = (num_units,)

            #t1 = time.time()
            # shape = (N_k_rk_w, num_units)
            FIs_combined = from_front * from_behind
            #print ("Shape", from_behind.shape, FIs_combined.shape)
            #t2 = time.time()
            #print ('Time for multiplying front and behind results: {}'.format(t2 - t1))

            # reshaping
            FIs_kernel = np.zeros(
                t_w_kernel.shape
            )  # t_w_kernel's shape (num_features, num_units * 4)
            FIs_recurr_kernel = np.zeros(
                t_w_recurr_kernel.shape
            )  # t_w_recurr_kernel's shape (num_units, num_units * 4)
            # from (4 * N_k_rk_w, num_units) to 4 * (N_k_rk_w, num_units)
            for i, FI_p_gate in enumerate(
                    np.array_split(FIs_combined, 4, axis=0)):
                # FI_p_gate's shape = (N_k_rk_w, num_units)
                # 	-> will divided into (num_features, num_units) & (num_units, num_units)
                # local indices that will split FI_p_gate (shape = (N_k_rk_w, num_units))
                # since we append the weights in order of a kernel weight and a recurrent kernel weight
                indices_to_features = np.arange(num_features)
                indices_to_units = np.arange(num_units) + num_features
                #FIs_kernel[indices_to_features + (i * N_k_rk_w)]
                # = FI_p_gate[indices_to_features] # shape = (num_features, num_units)
                #FIs_recurr_kernel[indices_to_units + (i * N_k_rk_w)]
                # = FI_p_gate[indices_to_units] # shape = (num_units, num_units)
                FIs_kernel[:, i * num_units:(i + 1) * num_units] = FI_p_gate[
                    indices_to_features]  # shape = (num_features, num_units)
                FIs_recurr_kernel[:, i * num_units:(
                    i + 1) * num_units] = FI_p_gate[
                        indices_to_units]  # shape = (num_units, num_units)

            #t3 =time.time()
            FIs = [FIs_kernel, FIs_recurr_kernel
                   ]  # [(num_features, num_units*4), (num_units, num_units*4)]
            #print ('Time for formatting: {}'.format(t3 - t2))

            ## Gradient
            grad_scndcr = compute_gradient_to_loss(path_to_keras_model,
                                                   idx_to_tl,
                                                   target_X,
                                                   target_y,
                                                   by_batch=True,
                                                   loss_func=loss_func)

        else:
            print("Currenlty not supported: {}. (shoulde be filtered before)".
                  format(lname))
            import sys
            sys.exit()

        #t2 = time.time()
        #print ("Time for computing cost for the {} layer: {}".format(idx_to_tl, t2 - t1))
        if not model_util.is_LSTM(target_weights[idx_to_tl]
                                  [1]):  # only one weight variable to process
            pairs = np.asarray([grad_scndcr.flatten(), FIs.flatten()]).T
            total_cands[idx_to_tl] = {'shape': FIs.shape, 'costs': pairs}
        else:  # currently, all of them go into here
            total_cands[idx_to_tl] = {'shape': [], 'costs': []}
            pairs = []
            for _FIs, _grad_scndcr in zip(FIs, grad_scndcr):
                pairs = np.asarray([_grad_scndcr.flatten(), _FIs.flatten()]).T
                total_cands[idx_to_tl]['shape'].append(_FIs.shape)
                total_cands[idx_to_tl]['costs'].append(pairs)

    #t3 = time.time()
    #print ("Time for computing total costs: {}".format(t3 - t0))
    return total_cands