# load the skeleton normalisation --Lio didn't normalise video input, but should we?
import cPickle
f = open('CNN_normalization.pkl','rb')
CNN_normalization = cPickle.load(f)
Mean_CNN = CNN_normalization ['Mean_CNN']
Std_CNN = CNN_normalization['Std_CNN']

# customized data loader for both video module and skeleton module
loader = DataLoader_with_skeleton_normalisation(src, tr.batch_size, Mean_CNN, Std_CNN) # Lio changed it to read from HDF5 files

####################################################################
# 3DCNN for video module
#################################################################### 
# we load the CNN parameteres here
video_cnn = conv3d_chalearn(x, use, lr, batch, net, reg, drop, mom, tr, res_dir)

#####################################################################
# fuse the ConvNet output with skeleton output  -- need to change here
######################################################################  
out = video_cnn.out
# some activation inspection
insp =  []
for insp_temp in video_cnn.insp:    insp.append(insp_temp)
insp = T.stack(insp)

# softmax layer
layers.append(LogRegr(out, rng=tr.rng, n_in=net.hidden_vid, 
    W_scale=net.W_scale[-1], b_scale=net.b_scale[-1], n_out=net.n_class))

    def __init__(self, res_dir, load_path):

        self.layers = []  # only contain the layers from fusion
        self.insp_mean = []  # inspection for each layer mean activation
        self.insp_std = []  # inspection for each layer std activation
        self.params = []  # parameter list
        self.idx_mini = T.lscalar(name="idx_mini")  # minibatch index
        self.idx_micro = T.lscalar(name="idx_micro")  # microbatch index

        # symbolic variables
        self.x = ndtensor(len(tr.in_shape))(name='x')  # video input
        self.y = T.ivector(name='y')  # labels
        # symbolic variables
        self.x_skeleton = ndtensor(len(tr._skeleon_in_shape))(
            name='x_skeleton')  # video input

        if use.drop:
            drop.p_vid = shared(float32(drop.p_vid_val))
            drop.p_hidden = shared(float32(drop.p_hidden_val))
        video_cnn = conv3d_chalearn(self.x, use, lr, batch, net, reg, drop, mom, \
                                             tr, res_dir, load_path)

        dbn = GRBM_DBN(numpy_rng=random.RandomState(123), n_ins=891, \
                hidden_layers_sizes=[2000, 2000, 1000], n_outs=101, input_x=self.x_skeleton, label=self.y )
        # we load the pretrained DBN skeleton parameteres here
        if use.load == True:
            dbn.load(os.path.join(load_path, 'dbn_2015-06-19-11-34-24.npy'))

        #####################################################################
        # fuse the ConvNet output with skeleton output  -- need to change here
        ######################################################################
        out = T.concatenate([video_cnn.out, dbn.sigmoid_layers[-1].output],
                            axis=1)

        #####################################################################
        # wudi add the mean and standard deviation of the activation values to exam the neural net
        # Reference: Understanding the difficulty of training deep feedforward neural networks, Xavier Glorot, Yoshua Bengio
        #####################################################################
        insp_mean_list = []
        insp_std_list = []
        insp_mean_list.extend(dbn.out_mean)
        insp_mean_list.extend(video_cnn.insp_mean)
        insp_std_list.extend(dbn.out_std)
        insp_std_list.extend(video_cnn.insp_std)

        ######################################################################
        #MLP layer
        self.layers.append(
            HiddenLayer(out,
                        n_in=net.hidden,
                        n_out=net.hidden,
                        rng=tr.rng,
                        W_scale=net.W_scale[-1],
                        b_scale=net.b_scale[-1],
                        activation=net.activation))
        out = self.layers[-1].output

        if tr.inspect:
            insp_mean_list.extend([T.mean(out)])
            insp_std_list.extend([T.std(out)])
        self.insp_mean = T.stacklists(insp_mean_list)
        self.insp_std = T.stacklists(insp_std_list)

        if use.drop:
            out = DropoutLayer(out, rng=tr.rng, p=drop.p_hidden).output

        ######################################################################
        # softmax layer
        self.layers.append(
            LogRegr(out,
                    rng=tr.rng,
                    n_in=net.hidden,
                    W_scale=net.W_scale[-1],
                    b_scale=net.b_scale[-1],
                    n_out=net.n_class))

        self.p_y_given_x = self.layers[-1].p_y_given_x
        ######################################################################
        # cost function
        self.cost = self.layers[-1].negative_log_likelihood(self.y)

        # function computing the number of errors
        self.errors = self.layers[-1].errors(self.y)

        # parameter list
        for layer in video_cnn.layers:
            self.params.extend(layer.params)

        # pre-trained dbn parameter last layer  (W, b) doesn't need to incorporate into the params
        # for calculating the gradient
        self.params.extend(dbn.params[:-2])

        # MLP hidden layer params
        self.params.extend(self.layers[-2].params)
        # softmax layer params
        self.params.extend(self.layers[-1].params)
        # number of inputs for MLP = (# maps last stage)*(# convnets)*(resulting video shape) + trajectory size
        print 'MLP:', video_cnn.n_in_MLP, "->", net.hidden_penultimate, "+", net.hidden_traj, '->', \
           net.hidden, '->', net.hidden, '->', net.n_class, ""

        return
Std_CNN = CNN_normalization['Std_CNN']

# customized data loader for both video module and skeleton module
loader = DataLoader_with_skeleton_normalisation(
    src, tr.batch_size, Mean_CNN,
    Std_CNN)  # Lio changed it to read from HDF5 files

####################################################################
# 3DCNN for video module
####################################################################
#if u have already trained the network before, the might pause for some reason, u can load the updated paramters
# we load the CNN parameteres here
#use.load = True
#load_path = '/home/zhiquan/fancy/meterials/chalearn2014_fancy_data/result_temp/3dcnn/try/70.9% 2018.05.05.02.28.21/'
#video_cnn = conv3d_chalearn(x, use, lr, batch, net, reg, drop, mom, tr, res_dir,load_path)
video_cnn = conv3d_chalearn(x, use, lr, batch, net, reg, drop, mom, tr,
                            res_dir)

#####################################################################
# fuse the ConvNet output with skeleton output  -- need to change here
######################################################################
out = video_cnn.out
# some activation inspection
insp = []
for insp_temp in video_cnn.insp_mean:
    insp.append(insp_temp)
insp = T.stack(insp)

# softmax layer
layers.append(
    LogRegr(out,
            rng=tr.rng,
    def __init__(self, res_dir, load_path):
        
        self.layers = [] # only contain the layers from fusion
        self.insp_mean = [] # inspection for each layer mean activation
        self.insp_std = []  # inspection for each layer std activation
        self.params = [] # parameter list
        self.idx_mini = T.lscalar(name="idx_mini") # minibatch index
        self.idx_micro = T.lscalar(name="idx_micro") # microbatch index

        # symbolic variables
        self.x = ndtensor(len(tr.in_shape))(name = 'x') # video input
        self.y = T.ivector(name = 'y') # labels
        # symbolic variables
        self.x_skeleton = ndtensor(len(tr._skeleon_in_shape))(name = 'x_skeleton') # video input

        if use.drop: 
            drop.p_vid = shared(float32(drop.p_vid_val) )
            drop.p_hidden = shared(float32(drop.p_hidden_val))
        video_cnn = conv3d_chalearn(self.x, use, lr, batch, net, reg, drop, mom, \
                                             tr, res_dir, load_path)

        dbn = GRBM_DBN(numpy_rng=random.RandomState(123), n_ins=891, \
                hidden_layers_sizes=[2000, 2000, 1000], n_outs=101, input_x=self.x_skeleton, label=self.y ) 
        # we load the pretrained DBN skeleton parameteres here
        if use.load == True: dbn.load(os.path.join(load_path,'dbn_2015-06-19-11-34-24.npy'))


        #####################################################################
        # fuse the ConvNet output with skeleton output  -- need to change here
        ######################################################################  
        out = T.concatenate([video_cnn.out, dbn.sigmoid_layers[-1].output], axis=1)

        #####################################################################
        # wudi add the mean and standard deviation of the activation values to exam the neural net
        # Reference: Understanding the difficulty of training deep feedforward neural networks, Xavier Glorot, Yoshua Bengio
        #####################################################################
        insp_mean_list = []
        insp_std_list = []
        insp_mean_list.extend(dbn.out_mean)
        insp_mean_list.extend(video_cnn.insp_mean)
        insp_std_list.extend(dbn.out_std)
        insp_std_list.extend(video_cnn.insp_std)

        ######################################################################
        #MLP layer                
        self.layers.append(HiddenLayer(out, n_in=net.hidden, n_out=net.hidden, rng=tr.rng, 
            W_scale=net.W_scale[-1], b_scale=net.b_scale[-1], activation=net.activation))
        out = self.layers[-1].output

        if tr.inspect: 
            insp_mean_list.extend([T.mean(out)])
            insp_std_list.extend([T.std(out)])
        self.insp_mean = T.stacklists(insp_mean_list)
        self.insp_std = T.stacklists(insp_std_list)


        if use.drop: out = DropoutLayer(out, rng=tr.rng, p=drop.p_hidden).output

        ######################################################################
        # softmax layer
        self.layers.append(LogRegr(out, rng=tr.rng, n_in=net.hidden, 
            W_scale=net.W_scale[-1], b_scale=net.b_scale[-1], n_out=net.n_class))

        self.p_y_given_x = self.layers[-1].p_y_given_x
        ######################################################################
        # cost function
        self.cost = self.layers[-1].negative_log_likelihood(self.y)

        # function computing the number of errors
        self.errors = self.layers[-1].errors(self.y)

        # parameter list
        for layer in video_cnn.layers: 
            self.params.extend(layer.params)

        # pre-trained dbn parameter last layer  (W, b) doesn't need to incorporate into the params
        # for calculating the gradient
        self.params.extend(dbn.params[:-2])

        # MLP hidden layer params
        self.params.extend(self.layers[-2].params)
        # softmax layer params
        self.params.extend(self.layers[-1].params)
        # number of inputs for MLP = (# maps last stage)*(# convnets)*(resulting video shape) + trajectory size
        print 'MLP:', video_cnn.n_in_MLP, "->", net.hidden_penultimate, "+", net.hidden_traj, '->', \
           net.hidden, '->', net.hidden, '->', net.n_class, ""

        return