示例#1
0
    def setUp(self):

        u_dim = 2
        y_dim = 3
        ts_length = 20
        sequences_no = 3
        #U, Y = generate_data( sequences_no, ts_length, u_dim = u_dim, y_dim = y_dim)
        U_2, Y_2 = generate_data(sequences_no * 2,
                                 ts_length,
                                 u_dim=u_dim,
                                 y_dim=y_dim)

        Q = 3  # 200 # Inducing points num. Take small number ofr speed

        back_cstr = True
        inference_method = 'svi'
        minibatch_inference = True

        #        # 1 layer:
        #        wins = [0, win_out] # 0-th is output layer
        #        nDims = [out_train.shape[1],1]

        # 2 layers:
        win_out = 3
        win_in = 2
        wins = [0, win_out, win_out]
        nDims = [y_dim, 2, 3]  #

        MLP_dims = [3, 2]  # !!! 300, 200 For speed.
        #print("Input window:  ", win_in)
        #print("Output window:  ", win_out)

        data_streamer = RandomPermutationDataStreamer(Y_2, U_2)
        minibatch_index, minibatch_indices, Y_mb, X_mb = data_streamer.next_minibatch(
        )

        m_1 = autoreg.DeepAutoreg_new(
            wins,
            Y_mb,
            U=X_mb,
            U_win=win_in,
            num_inducing=Q,
            back_cstr=back_cstr,
            MLP_dims=MLP_dims,
            nDims=nDims,
            init='Y',  # how to initialize hidden states means
            X_variance=0.05,  # how to initialize hidden states variances
            inference_method=inference_method,  # Inference method
            minibatch_inference=minibatch_inference,
            mb_inf_tot_data_size=sequences_no * 2,
            mb_inf_init_xs_means='all',
            mb_inf_init_xs_vars='all',
            mb_inf_sample_idxes=minibatch_indices,
            # 1 layer:
            # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True),
            #        GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] )

            # 2 layers:
            kernels=[
                GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True),
                GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2],
                             ARD=True,
                             inv_l=True),
                GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim,
                             ARD=True,
                             inv_l=True)
            ])

        self.model_1 = m_1
        self.model_1._trigger_params_changed()

        self.mll_1_1 = float(self.model_1._log_marginal_likelihood)
        self.g_mll_1_1 = np.hstack(
            self.model_1[pp.replace(' ', '_')].gradient.flatten()
            for pp in self.model_1.parameter_names()
            if ('init_Xs' not in pp) and ('X_var' not in pp)).copy()
        #self.g_mll_1_1 = self.model_1._log_likelihood_gradients().copy()

        self.model_2 = copy.deepcopy(m_1)

        self.model_1.set_DataStreamer(data_streamer)
        self.model_1._trigger_params_changed()

        self.model_1._next_minibatch()
        self.model_1._trigger_params_changed()

        self.mll_1_2 = float(self.model_1._log_marginal_likelihood)
        self.g_mll_1_2 = np.hstack(
            self.model_1[pp.replace(' ', '_')].gradient.flatten()
            for pp in self.model_1.parameter_names()
            if ('init_Xs' not in pp) and ('X_var' not in pp)).copy()
        #self.g_mll_1_2 = self.model_1._log_likelihood_gradients().copy()

        data_streamer_1 = StdMemoryDataStreamer(Y_2, U_2, sequences_no)

        self.model_1.set_DataStreamer(data_streamer_1)
        self.model_1._next_minibatch()
        self.model_1._trigger_params_changed()

        self.mll_2_1 = float(self.model_1._log_marginal_likelihood)

        # exclude 'init_Xs' and 'X_var' from gradients
        self.g_mll_2_1 = np.hstack(
            self.model_1[pp.replace(' ', '_')].gradient.flatten()
            for pp in self.model_1.parameter_names()
            if ('init_Xs' not in pp) and ('X_var' not in pp)).copy()

        #import pdb; pdb.set_trace()

        self.model_1._next_minibatch()
        self.model_1._trigger_params_changed()

        self.mll_2_2 = float(self.model_1._log_marginal_likelihood)

        # exclude 'init_Xs' and 'X_var' from gradients
        self.g_mll_2_2 = np.hstack(
            self.model_1[pp.replace(' ', '_')].gradient.flatten()
            for pp in self.model_1.parameter_names()
            if ('init_Xs' not in pp) and ('X_var' not in pp)).copy()
示例#2
0
    def setUp(self):

        print("ho-ho")

        u_dim = 2
        y_dim = 3
        U, Y = generate_data(3, 20, u_dim=2, y_dim=3)

        Q = 3  # 200 # Inducing points num. Take small number ofr speed

        back_cstr = True
        inference_method = 'svi'
        minibatch_inference = True

        #        # 1 layer:
        #        wins = [0, win_out] # 0-th is output layer
        #        nDims = [out_train.shape[1],1]

        # 2 layers:
        win_out = 3
        win_in = 2
        wins = [0, win_out, win_out]
        nDims = [y_dim, 2, 3]  #

        MLP_dims = [3, 2]  # !!! 300, 200 For speed.
        #print("Input window:  ", win_in)
        #print("Output window:  ", win_out)

        m = autoreg.DeepAutoreg_new(
            wins,
            Y,
            U=U,
            U_win=win_in,
            num_inducing=Q,
            back_cstr=back_cstr,
            MLP_dims=MLP_dims,
            nDims=nDims,
            init='Y',  # how to initialize hidden states means
            X_variance=0.05,  # how to initialize hidden states variances
            inference_method=inference_method,  # Inference method
            minibatch_inference=minibatch_inference,
            mb_inf_tot_data_size=len(Y),
            mb_inf_init_xs_means='one',
            mb_inf_init_xs_vars='one',
            mb_inf_sample_idxes=range(len(Y)),
            # 1 layer:
            # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True),
            #        GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] )

            # 2 layers:
            kernels=[
                GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True),
                GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2],
                             ARD=True,
                             inv_l=True),
                GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim,
                             ARD=True,
                             inv_l=True)
            ])

        self.model_1 = m
        self.model_1._trigger_params_changed()

        self.model_2 = copy.deepcopy(m)

        data_streamer = TrivialDataStreamer(Y, U)
        self.model_2.set_DataStreamer(data_streamer)
        self.model_2._trigger_params_changed()
        print("ho-ho")
示例#3
0
def svi_test_1(debug=False,
               train_model=False,
               model=1,
               second_model_svi=False,
               input_scaling_factor=1):
    """
    After new svi classes are implemented the first test is chaking that non-svi
    inference is not broken.
    
    Basically, two similar models are created using corresponding classes and
    the results are tested. This case is tested when second_model_svi= False.
    
    We can also test 
    """
    experiment_path = '/Users/grigoral/work/code/RGP/examples'

    #data = load_data()
    data = load_data_xyz()

    # In[7]:

    y = data['Y']
    u = data['Yxyz_list']
    u_flat = np.vstack(u)

    lbls = data['lbls']
    data_out_train = y

    # Ask: why first 3 dimensions are removed? # 44 and 56 -output variable is 0.
    data_out_train = y[:, 3:]
    data_out_mean = data_out_train.mean(axis=0)
    data_out_std = data_out_train.std(axis=0)
    data_out_train = (y[:, 3:] - data_out_mean) / data_out_std
    #data_out_train_list = [data_out_train[np.where(lbls[:,i]==1)[0]][1:] for i in range(lbls.shape[1])]
    data_out_train_list = [
        data_out_train[np.where(lbls[:, i] == 1)[0]]
        for i in range(lbls.shape[1])
    ]

    # Create controls
    #data_in_train_list = [y[np.where(lbls[:,i]==1)[0]][:,2][1:] - y[np.where(lbls[:,i]==1)[0]][:,2][:-1] for i in range(lbls.shape[1])]
    #from scipy.ndimage.filters import gaussian_filter1d
    #data_in_train_list = [np.ones(d.shape+(1,))*d.mean() for d in data_in_train_list]

    ##data_in_train_list = [gaussian_filter1d(d,8.)[:,None] for d in data_in_train_list]
    ##data_in_train_list = [np.vstack([d[:10],d]) for d in data_in_train_list]

    data_in_train_list = u
    u_flat_mean = u_flat.mean(axis=0)
    u_flat_std = u_flat.std(axis=0)
    data_in_train = (u_flat - u_flat_mean) / u_flat_std

    #data_in_train_list = u
    data_in_train_list = [(d - u_flat_mean) / u_flat_std
                          for d in data_in_train_list]

    # In[8]:

    #    print data_in_train_list[0].shape
    #    print data_out_train_list[0].shape
    #
    #    for i in range(len(data_in_train_list)):
    #        plt.figure()
    #        plt.plot(data_in_train_list[i], 'x-')
    #        plt.title(i)
    #        print data_in_train_list[i].shape[0]

    # In[9]:

    print(y.shape)
    print(data_out_train.shape)
    print(u_flat.shape)
    print(data_in_train.shape)

    # In[10]:

    if debug:
        import pdb
        pdb.set_trace()
    ytest = data['Ytest']
    lblstest = data['lblstest']
    u = data['Yxyz_list_test']

    #data_out_test = ytest
    data_out_test = ytest[:, 3:]

    data_out_test = (ytest[:, 3:] - data_out_mean) / data_out_std

    #data_out_test_list = [data_out_test[np.where(lblstest[:,i]==1)[0]][1:] for i in range(lblstest.shape[1])]
    data_out_test_list = [
        data_out_test[np.where(lblstest[:, i] == 1)[0]]
        for i in range(lblstest.shape[1])
    ]

    # Create controls
    #data_in_test_list = [ytest[np.where(lblstest[:,i]==1)[0]][:,2][1:] - ytest[np.where(lblstest[:,i]==1)[0]][:,2][:-1] for i in range(lblstest.shape[1])]
    #data_in_test_list = [np.ones(d.shape+(1,))*d.mean() for d in data_in_test_list]

    #data_in_test_list = u

    data_in_test_list = u
    #data_in_test = (u_flat-u_flat_mean)/u_flat_std
    data_in_test_list = [(d - u_flat_mean) / u_flat_std for d in u]

    # ## Fit a model without NN-constraint

    # In[11]:

    # Down-scaling the input signals
    #data_in_train_list = [d*0.1 for d in data_in_train_list]
    #data_in_test_list = [d*0.1 for d in data_in_test_list]
    #data_in_train = data_in_train*0.1

    # In[13]:

    if debug:
        import pdb
        pdb.set_trace()
    #=============================
    # Initialize a model
    #=============================

    Q = 100  # 200
    win_in = 20  # 20
    win_out = 20  # 20
    use_controls = True
    back_cstr = False

    if input_scaling_factor is None:
        input_scaling_factor = 1

    if model == 1:
        # create the model
        if use_controls:
            #m = autoreg.DeepAutoreg([0, win_out], data_out_train, U=data_in_train, U_win=win_in, X_variance=0.05,
            #                    num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300,200], nDims=[data_out_train.shape[1],1],
            #                     kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True),
            #                     GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)])

            # Model without lists
            #        m = autoreg.DeepAutoreg([0, win_out, win_out], data_out_train, U=data_in_train, U_win=win_in, X_variance=0.05,
            #                            num_inducing=Q, back_cstr=back_cstr, MLP_dims=[300,200], nDims=[data_out_train.shape[1],1,1],
            #                             kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=False),
            #                             GPy.kern.RBF(win_out+win_out,ARD=True,inv_l=True, useGPU=False),
            #                             GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=False)])

            # Model with lists
            m = autoreg.DeepAutoreg(
                [0, win_out, win_out],
                data_out_train_list,
                U=[d * input_scaling_factor for d in data_in_train_list],
                U_win=win_in,
                X_variance=0.05,
                num_inducing=Q,
                back_cstr=back_cstr,
                MLP_dims=[300, 200],
                nDims=[data_out_train.shape[1], 1, 1],
                kernels=[
                    GPy.kern.RBF(win_out, ARD=True, inv_l=True, useGPU=False),
                    GPy.kern.RBF(win_out + win_out,
                                 ARD=True,
                                 inv_l=True,
                                 useGPU=False),
                    GPy.kern.RBF(win_out + win_in,
                                 ARD=True,
                                 inv_l=True,
                                 useGPU=False)
                ])

            if not second_model_svi:
                m_svi = autoreg.DeepAutoreg_new(
                    [0, win_out, win_out],
                    data_out_train_list,
                    U=[d * input_scaling_factor for d in data_in_train_list],
                    U_win=win_in,
                    X_variance=0.05,
                    num_inducing=Q,
                    back_cstr=back_cstr,
                    MLP_dims=[300, 200],
                    nDims=[data_out_train.shape[1], 1, 1],
                    kernels=[
                        GPy.kern.RBF(win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_in,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False)
                    ])

                m_svi.param_array[:] = m.param_array
                m_svi._trigger_params_changed()

            else:
                m_svi = autoreg.DeepAutoreg_new(
                    [0, win_out, win_out],
                    data_out_train_list,
                    U=[d * input_scaling_factor for d in data_in_train_list],
                    U_win=win_in,
                    X_variance=0.05,
                    num_inducing=Q,
                    back_cstr=back_cstr,
                    MLP_dims=[300, 200],
                    nDims=[data_out_train.shape[1], 1, 1],
                    kernels=[
                        GPy.kern.RBF(win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_in,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False)
                    ],
                    inference_method='svi')

            # used with back_cstr=True in the end of the notebook
    #        m = autoreg.DeepAutoreg([0, win_out], data_out_train_list, U=[d*0.1 for d in data_in_train_list], U_win=win_in, X_variance=0.05,
    #                        num_inducing=Q, back_cstr=back_cstr, MLP_dims=[500,200], nDims=[data_out_train.shape[1],1],
    #                         kernels=[GPy.kern.MLP(win_out,bias_variance=10.),
    #                         GPy.kern.MLP(win_out+win_in,bias_variance=10.)])
        else:
            m = autoreg.DeepAutoreg([0, win_out],
                                    data_in_train,
                                    U=None,
                                    U_win=win_in,
                                    X_variance=0.05,
                                    num_inducing=Q,
                                    back_cstr=back_cstr,
                                    MLP_dims=[200, 100],
                                    nDims=[data_out_train.shape[1], 1],
                                    kernels=[
                                        GPy.kern.RBF(win_out,
                                                     ARD=True,
                                                     inv_l=True,
                                                     useGPU=False),
                                        GPy.kern.RBF(win_out,
                                                     ARD=True,
                                                     inv_l=True,
                                                     useGPU=False)
                                    ])

            if not second_model_svi:
                m_svi = autoreg.DeepAutoreg_new(
                    [0, win_out, win_out],
                    data_out_train_list,
                    U=[d * input_scaling_factor for d in data_in_train_list],
                    U_win=win_in,
                    X_variance=0.05,
                    num_inducing=Q,
                    back_cstr=back_cstr,
                    MLP_dims=[300, 200],
                    nDims=[data_out_train.shape[1], 1, 1],
                    kernels=[
                        GPy.kern.RBF(win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_in,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False)
                    ])

                m_svi.param_array[:] = m.param_array
                m_svi._trigger_params_changed()

            else:
                m_svi = autoreg.DeepAutoreg_new(
                    [0, win_out, win_out],
                    data_out_train_list,
                    U=[d * input_scaling_factor for d in data_in_train_list],
                    U_win=win_in,
                    X_variance=0.05,
                    num_inducing=Q,
                    back_cstr=back_cstr,
                    MLP_dims=[300, 200],
                    nDims=[data_out_train.shape[1], 1, 1],
                    kernels=[
                        GPy.kern.RBF(win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_out,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False),
                        GPy.kern.RBF(win_out + win_in,
                                     ARD=True,
                                     inv_l=True,
                                     useGPU=False)
                    ],
                    inference_method='svi')

    elif model == 2:
        # Ask: no b tern in NLP regularization.
        #=============================
        # Model with NN-constraint
        #=============================
        Q = 500
        win_in = 20
        win_out = 20

        use_controls = True
        back_cstr = True

        m = autoreg.DeepAutoreg(
            [0, win_out],
            data_out_train_list,
            U=[d * input_scaling_factor for d in data_in_train_list],
            U_win=win_in,
            X_variance=0.05,
            num_inducing=Q,
            back_cstr=back_cstr,
            MLP_dims=[500, 200],
            nDims=[data_out_train.shape[1], 1],
            kernels=[
                GPy.kern.MLP(win_out, bias_variance=10.),
                GPy.kern.MLP(win_out + win_in, bias_variance=10.)
            ])
        #                      kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True),
        #                      GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)])
        if not second_model_svi:
            m_svi = autoreg.DeepAutoreg_new(
                [0, win_out],
                data_out_train_list,
                U=[d * input_scaling_factor for d in data_in_train_list],
                U_win=win_in,
                X_variance=0.05,
                num_inducing=Q,
                back_cstr=back_cstr,
                MLP_dims=[500, 200],
                nDims=[data_out_train.shape[1], 1],
                kernels=[
                    GPy.kern.MLP(win_out, bias_variance=10.),
                    GPy.kern.MLP(win_out + win_in, bias_variance=10.)
                ])
            #                      kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True),
            #                      GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)])

            m_svi.param_array[:] = m.param_array
            m_svi._trigger_params_changed()

        else:
            m_svi = autoreg.DeepAutoreg_new(
                [0, win_out],
                data_out_train_list,
                U=[d * input_scaling_factor for d in data_in_train_list],
                U_win=win_in,
                X_variance=0.05,
                num_inducing=Q,
                back_cstr=back_cstr,
                MLP_dims=[500, 200],
                nDims=[data_out_train.shape[1], 1],
                kernels=[
                    GPy.kern.MLP(win_out, bias_variance=10.),
                    GPy.kern.MLP(win_out + win_in, bias_variance=10.)
                ],
                inference_method='svi')

        #                      kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True, useGPU=True),
        #                      GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True, useGPU=True)])

    print("Old model:")
    print(m)
    print("New model:")
    print(m_svi)

    if not second_model_svi:
        print(
            "Maximum ll difference:  ",
            np.max(
                np.abs(m._log_marginal_likelihood -
                       m_svi._log_marginal_likelihood)))
        print(
            "Maximum ll_grad difference:  ",
            np.max(
                np.abs(m._log_likelihood_gradients() -
                       m_svi._log_likelihood_gradients())))

    globals().update(locals())
    return  # Alex
示例#4
0
def rgp_experiment_raw(p_task_name, p_iteration, train_U, train_Y, p_init_runs,
                       p_max_runs, p_num_layers, p_hidden_dims,
                       p_inference_method, p_back_cstr, p_MLP_Dims, p_Q,
                       p_win_in, p_win_out, p_init, p_x_init_var):
    """
    Experiment file for NON MINIBATCH inference.
    So, DeepAutoreg is run here.
    
    Inputs:
    -------------------------------
        p_task_name: string
            Experiment name, used only in file name
        p_iteration: int or string
            Iteration of the experiment, used only in file name
    
        p_init_runs: int:
             Number of initial runs when likelihood variances and covariance magnitudes are fixed
        p_max_runs: int
            Maximum runs of general optimization
        p_num_layers: int [1,2]
            Number of RGP layers
        p_hidden_dims: list[ length is the number of hidden layers]
            Dimensions of hidden layers
        p_inference_method: string
            If 'svi' then SVI inference is used.
        p_back_cstr: bool
            Use back constrains or not.
        p_MLP_Dims: list[length is the number of MLP hidden layers, ignoring input and output layers]
            Values are the number of neurons at each layer.
        p_Q: int
            Number of inducing points
        p_win_in, p_win_out: int
            Inpput window and hidden layer window.
        p_init: string 'Y', 'rand', 'zero'
            Initialization of RGP hidden layers
        p_x_init_var: float
            Initial variance for X, usually 0.05 for data close to normalized data.
    """
    win_in = p_win_in  # 20
    win_out = p_win_out  # 20

    inference_method = p_inference_method if p_inference_method == 'svi' else None
    #import pdb; pdb.set_trace()

    if p_num_layers == 1:
        # 1 layer:
        wins = [0, win_out]  # 0-th is output layer
        nDims = [train_Y.shape[1], p_hidden_dims[0]]

        kernels = [
            GPy.kern.RBF(win_out, ARD=True, inv_l=True),
            GPy.kern.RBF(win_in + win_out, ARD=True, inv_l=True)
        ]
    elif p_num_layers == 2:
        # 2 layers:
        wins = [0, win_out, win_out]
        nDims = [train_Y.shape[1], p_hidden_dims[0], p_hidden_dims[1]]

        kernels = [
            GPy.kern.RBF(win_out, ARD=True, inv_l=True),
            GPy.kern.RBF(win_out + win_out, ARD=True, inv_l=True),
            GPy.kern.RBF(win_out + win_in, ARD=True, inv_l=True)
        ]
    else:
        raise NotImplemented()

    print("Input window:  ", win_in)
    print("Output window:  ", win_out)

    m = autoreg.DeepAutoreg_new(
        wins,
        train_Y,
        U=train_U,
        U_win=win_in,
        num_inducing=p_Q,
        back_cstr=p_back_cstr,
        MLP_dims=p_MLP_Dims,
        nDims=nDims,
        init=p_init,  # how to initialize hidden states means
        X_variance=
        p_x_init_var,  #0.05, # how to initialize hidden states variances
        inference_method=inference_method,  # Inference method
        kernels=kernels)

    # pattern for model name: #task_name, inf_meth=?, wins=layers, Q = ?, backcstr=?,MLP_dims=?, nDims=
    model_file_name = '%s_%s--inf_meth=%s--backcstr=%s--wins=%s_%s--Q=%i--nDims=%s--init=%s--x_init=%s' % (
        p_task_name, str(p_iteration), 'reg'
        if inference_method is None else inference_method, str(p_back_cstr)
        if p_back_cstr == False else str(p_back_cstr) + '_' + str(p_MLP_Dims),
        str(win_in), str(wins), p_Q, str(nDims), p_init, str(p_x_init_var))

    print('Model file name:  ', model_file_name)
    print(m)

    #import pdb; pdb.set_trace()
    #Initialization
    # Here layer numbers are different than in initialization. 0-th layer is the top one
    for i in range(m.nLayers):
        m.layers[i].kern.inv_l[:] = np.mean(
            1. / ((m.layers[i].X.mean.values.max(0) -
                   m.layers[i].X.mean.values.min(0)) / np.sqrt(2.)))
        m.layers[i].likelihood.variance[:] = 0.01 * train_Y.var()
        m.layers[i].kern.variance.fix(warning=False)
        m.layers[i].likelihood.fix(warning=False)
    print(m)

    #init_runs = 50 if out_train.shape[0]<1000 else 100
    print("Init runs:  ", p_init_runs)
    m.optimize('bfgs', messages=1, max_iters=p_init_runs)
    for i in range(m.nLayers):
        m.layers[i].kern.variance.constrain_positive(warning=False)
        m.layers[i].likelihood.constrain_positive(warning=False)
    m.optimize('bfgs', messages=1, max_iters=p_max_runs)

    io.savemat(model_file_name, {'params': m.param_array[:]})
    print(m)

    return -float(m._log_marginal_likelihood), m
def svi_test_5():
    """
    This class tests the initial mlp implemetration
    """

    u_dim = 2
    y_dim = 3
    U, Y = generate_data(3, 20, u_dim=2, y_dim=3)

    Q = 3  # 200 # Inducing points num. Take small number ofr speed

    back_cstr = True
    inference_method = 'svi'
    minibatch_inference = True

    #        # 1 layer:
    #        wins = [0, win_out] # 0-th is output layer
    #        nDims = [out_train.shape[1],1]

    # 2 layers:
    win_out = 3
    win_in = 2
    wins = [0, win_out, win_out]
    nDims = [y_dim, 2, 3]  #

    MLP_dims = [3, 2]  # !!! 300, 200 For speed.
    #print("Input window:  ", win_in)
    #print("Output window:  ", win_out)

    m = autoreg.DeepAutoreg_new(
        wins,
        Y,
        U=U,
        U_win=win_in,
        num_inducing=Q,
        back_cstr=back_cstr,
        MLP_dims=MLP_dims,
        nDims=nDims,
        init='Y',  # how to initialize hidden states means
        X_variance=0.05,  # how to initialize hidden states variances
        inference_method=inference_method,  # Inference method
        minibatch_inference=minibatch_inference,
        mb_inf_init_xs_vals='mlp',
        # 1 layer:
        # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True),
        #        GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] )

        # 2 layers:
        kernels=[
            GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True),
            GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2],
                         ARD=True,
                         inv_l=True),
            GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim,
                         ARD=True,
                         inv_l=True)
        ])

    model_1 = m
    model_1._trigger_params_changed()

    mll_1 = model_1._log_marginal_likelihood
    g_mll_1 = model_1._log_likelihood_gradientsss

    return

    data_streamer = RandomPermutationDataStreamer(Y, U)
    model_1.set_DataStreamer(data_streamer)
    model_1._trigger_params_changed()

    model_1._next_minibatch()
    model_1._trigger_params_changed()

    np.testing.assert_equal(model_1._log_marginal_likelihood,
                            mll_1,
                            err_msg="Likelihoods must be equal")
    np.testing.assert_array_equal(model_1._log_likelihood_gradients,
                                  g_mll_1,
                                  err_msg="Likelihood gradients must be equal")

    model_1._next_minibatch()
    model_1._trigger_params_changed()

    np.testing.assert_equal(model_1._log_marginal_likelihood,
                            mll_1,
                            err_msg="Likelihoods must be equal")
    np.testing.assert_array_equal(model_1._log_likelihood_gradients,
                                  g_mll_1,
                                  err_msg="Likelihood gradients must be equal")
def svi_test_4():
    """
    This class tests that the model with minibatch turned on and with 
    a separate initial value for every samples and one latent space variance for every sample.
    
    Gradients are not compared but tested separately
    """
    u_dim = 2
    y_dim = 3
    ts_length = 20
    sequences_no = 3
    U, Y = generate_data(sequences_no, ts_length, u_dim=u_dim, y_dim=y_dim)

    Q = 3  # 200 # Inducing points num. Take small number ofr speed

    back_cstr = True
    inference_method = 'svi'
    minibatch_inference = True

    #        # 1 layer:
    #        wins = [0, win_out] # 0-th is output layer
    #        nDims = [out_train.shape[1],1]

    # 2 layers:
    win_out = 3
    win_in = 2
    wins = [0, win_out, win_out]
    nDims = [y_dim, 2, 3]  #

    MLP_dims = [3, 2]  # !!! 300, 200 For speed.
    #print("Input window:  ", win_in)
    #print("Output window:  ", win_out)

    data_streamer = TrivialDataStreamer(Y, U)
    minibatch_index, minibatch_indices, Y_mb, X_mb = data_streamer.next_minibatch(
    )

    m = autoreg.DeepAutoreg_new(
        wins,
        Y_mb,
        U=X_mb,
        U_win=win_in,
        num_inducing=Q,
        back_cstr=back_cstr,
        MLP_dims=MLP_dims,
        nDims=nDims,
        init='Y',  # how to initialize hidden states means
        X_variance=0.05,  # how to initialize hidden states variances
        inference_method=inference_method,  # Inference method
        minibatch_inference=minibatch_inference,
        mb_inf_tot_data_size=sequences_no,
        mb_inf_init_xs_means='all',
        mb_inf_init_xs_vars='all',
        mb_inf_sample_idxes=minibatch_indices,
        # 1 layer:
        # kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True),
        #        GPy.kern.RBF(win_in + win_out,ARD=True,inv_l=True)] )

        # 2 layers:
        kernels=[
            GPy.kern.RBF(win_out * nDims[1], ARD=True, inv_l=True),
            GPy.kern.RBF(win_out * nDims[1] + win_out * nDims[2],
                         ARD=True,
                         inv_l=True),
            GPy.kern.RBF(win_out * nDims[2] + win_in * u_dim,
                         ARD=True,
                         inv_l=True)
        ])

    model_1 = m
    model_1._trigger_params_changed()

    mll_1 = model_1._log_marginal_likelihood
    g_mll_1 = model_1._log_likelihood_gradients

    #self.assertTrue(self.model_1.checkgrad())
    #model_1.checkgrad(verbose=True)
    #return
    data_streamer = RandomPermutationDataStreamer(Y, U)
    #data_streamer = TrivialDataStreamer(Y, U)

    model_1.set_DataStreamer(data_streamer)
    model_1._trigger_params_changed()

    #model_1.checkgrad(verbose=True)

    model_1._next_minibatch()
    model_1._trigger_params_changed()

    model_1.checkgrad(verbose=True)

    np.testing.assert_equal(model_1._log_marginal_likelihood,
                            mll_1,
                            err_msg="Likelihoods must be equal")
    np.testing.assert_array_equal(model_1._log_likelihood_gradients,
                                  g_mll_1,
                                  err_msg="Likelihood gradients must be equal")

    model_1._next_minibatch()
    model_1._trigger_params_changed()

    np.testing.assert_equal(model_1._log_marginal_likelihood,
                            mll_1,
                            err_msg="Likelihoods must be equal")
    np.testing.assert_array_equal(model_1._log_likelihood_gradients,
                                  g_mll_1,
                                  err_msg="Likelihood gradients must be equal")
def svi_test_2():
    """
    The goal of this function is to compare the minibatch SVI with
    not minibatch SVI.
    """

    trainned_models_folder_name = "/Users/grigoral/work/code/RGP/examples/identif_trainded"

    Q = 3  # 200 # Inducing points num
    win_in = task.win_in  # 20
    win_out = task.win_out  # 20
    use_controls = True
    back_cstr = True
    inference_method = 'svi'
    minibatch_inference = True
    # 1 layer:
    wins = [0, win_out]  # 0-th is output layer
    nDims = [out_train.shape[1], 1]

    # 2 layers:
    # wins = [0, win_out, win_out]
    # nDims = [out_train.shape[1],1,1]

    MLP_dims = [3, 2]  # !!! 300, 200
    print("Input window:  ", win_in)
    print("Output window:  ", win_out)

    m = autoreg.DeepAutoreg_new(
        wins,
        out_train,
        U=in_train,
        U_win=win_in,
        num_inducing=Q,
        back_cstr=back_cstr,
        MLP_dims=MLP_dims,
        nDims=nDims,
        init='Y',  # how to initialize hidden states means
        X_variance=0.05,  # how to initialize hidden states variances
        inference_method=inference_method,  # Inference method
        minibatch_inference=minibatch_inference,
        # 1 layer:
        kernels=[
            GPy.kern.RBF(win_out, ARD=True, inv_l=True),
            GPy.kern.RBF(win_in + win_out, ARD=True, inv_l=True)
        ])

    # 2 layers:
    #kernels=[GPy.kern.RBF(win_out,ARD=True,inv_l=True),
    #         GPy.kern.RBF(win_out+win_out,ARD=True,inv_l=True),
    #         GPy.kern.RBF(win_out+win_in,ARD=True,inv_l=True)])

    data_streamer = TrivialDataStreamer(out_train, in_train)
    m.set_DataStreamer(data_streamer)

    m._trigger_params_changed()
    print(m)
    m._next_minibatch()
    m._trigger_params_changed()
    #m = autoreg.DeepAutoreg([0,win_out],out_train, U=in_train, U_win=win_in,X_variance=0.01,
    #                        num_inducing=50)

    # pattern for model name: #task_name, inf_meth=?, wins=layers, Q = ?, backcstr=?,MLP_dims=?, nDims=
    model_file_name = '%s--inf_meth=%s--wins=%s--Q=%i--backcstr=%i--nDims=%s' % (
        task.name, 'reg' if inference_method is None else inference_method,
        str(wins), Q, back_cstr, str(nDims))
    if back_cstr == True:
        model_file_name += '--MLP_dims=%s' % (MLP_dims, )

    print('Model file name:  ', model_file_name)
    print(m)
    m.checkgrad(verbose=True)
    return

    # ### Model initialization:

    # In[36]:

    # Here layer numbers are different than in initialization. 0-th layer is the top one
    for i in range(m.nLayers):
        m.layers[i].kern.inv_l[:] = np.mean(
            1. / ((m.layers[i].X.mean.values.max(0) -
                   m.layers[i].X.mean.values.min(0)) / np.sqrt(2.)))
        m.layers[i].likelihood.variance[:] = 0.01 * out_train.var()
        m.layers[i].kern.variance.fix(warning=False)
        m.layers[i].likelihood.fix(warning=False)
    print(m)

    # In[37]:

    print(m.layer_1.kern.inv_l)
    print(m.layer_0.kern.inv_l)
    print(
        np.mean(1. / (
            (m.layer_1.X.mean.values.max(0) - m.layer_1.X.mean.values.min(0)) /
            np.sqrt(2.))))

    # In[38]:

    # Plot initialization of hidden layer:
    def plot_hidden_states(fig_no,
                           layer,
                           layer_start_point=None,
                           layer_end_point=None,
                           data_start_point=None,
                           data_end_point=None):
        if layer_start_point is None: layer_start_point = 0
        if layer_end_point is None: layer_end_point = len(layer.mean)

        if data_start_point is None: data_start_point = 0
        if data_end_point is None: layer_end_point = len(out_train)

        data = out_train[data_start_point:data_end_point]
        layer_means = layer.mean[layer_start_point:layer_end_point]
        layer_vars = layer.variance[layer_start_point:layer_end_point]

        fig4 = plt.figure(fig_no, figsize=(10, 8))
        ax1 = plt.subplot(1, 1, 1)
        fig4.suptitle('Hidden layer plotting')
        ax1.plot(out_train[data_start_point:data_end_point],
                 label="Orig data Train_out",
                 color='b')
        ax1.plot(layer_means, label='pred mean', color='r')
        ax1.plot(layer_means + 2 * np.sqrt(layer_vars),
                 label='pred var',
                 color='r',
                 linestyle='--')
        ax1.plot(layer_means - 2 * np.sqrt(layer_vars),
                 label='pred var',
                 color='r',
                 linestyle='--')
        ax1.legend(loc=4)
        ax1.set_title('Hidden layer vs Training data')

        del ax1

    plot_hidden_states(5, m.layer_1.qX_0)
    #plot_hidden_states(6,m.layer_2.qX_0)

    # ### Model training:

    # In[39]:

    #init_runs = 50 if out_train.shape[0]<1000 else 100
    init_runs = 100
    print("Init runs:  ", init_runs)
    m.optimize('bfgs', messages=1, max_iters=init_runs)
    for i in range(m.nLayers):
        m.layers[i].kern.variance.constrain_positive(warning=False)
        m.layers[i].likelihood.constrain_positive(warning=False)
    m.optimize('bfgs', messages=1, max_iters=10000)

    print(m)

    # ### Look at trained parameters

    # In[40]:

    if hasattr(m, 'layer_1'):
        print("Layer 1:  ")
        print("States means (min and max), shapes:  ",
              m.layer_1.qX_0.mean.min(), m.layer_1.qX_0.mean.max(),
              m.layer_1.qX_0.mean.shape)
        print("States variances (min and max), shapes:  ",
              m.layer_1.qX_0.variance.min(), m.layer_1.qX_0.variance.max(),
              m.layer_1.qX_0.mean.shape)
        print("Inverse langthscales (min and max), shapes:  ",
              m.layer_1.rbf.inv_lengthscale.min(),
              m.layer_1.rbf.inv_lengthscale.max(),
              m.layer_1.rbf.inv_lengthscale.shape)

    if hasattr(m, 'layer_0'):
        print("")
        print("Layer 0 (output):  ")
        print("Inverse langthscales (min and max), shapes:  ",
              m.layer_0.rbf.inv_lengthscale.min(),
              m.layer_0.rbf.inv_lengthscale.max(),
              m.layer_0.rbf.inv_lengthscale.shape)

    # In[41]:

    print(m.layer_0.rbf.inv_lengthscale)

    # In[42]:

    print(m.layer_1.rbf.inv_lengthscale)

    # ### Analyze and plot model on test data:

    # In[43]:

    # Free-run on the train data

    # initialize to last part of trained latent states
    #init_Xs = [None, m.layer_1.qX_0[0:win_out]] # init_Xs for train prediction

    # initialize to zeros
    init_Xs = None
    predictions_train = m.freerun(init_Xs=init_Xs, U=in_train, m_match=True)

    # initialize to last part of trainig latent states
    #init_Xs = [None, m.layer_1.qX_0[-win_out:] ] # init_Xs for test prediction
    #U_test = np.vstack( (in_train[-win_in:], in_test) )

    # initialize to zeros
    init_Xs = None
    U_test = in_test

    # Free-run on the test data
    predictions_test = m.freerun(init_Xs=init_Xs, U=U_test, m_match=True)
    del init_Xs, U_test

    # In[44]:

    # Plot predictions
    def plot_predictions(fig_no,
                         posterior_train,
                         posterior_test=None,
                         layer_no=None):
        """
        Plots the output data along with posterior of the layer.
        Used for plotting the hidden states or
        
        layer_no: int or Normal posterior
            plot states of this layer (0-th is output). There is also some logic about compting
            the MSE, and aligning with actual data.
        """

        if layer_no is None:  #default
            layer_no = 1

        if posterior_test is None:
            no_test_data = True
        else:
            no_test_data = False

        if isinstance(posterior_train, list):
            layer_in_list = len(
                predictions_train
            ) - 1 - layer_no  # standard layer no (like in printing the model)
            predictions_train_layer = predictions_train[layer_in_list]
        else:
            predictions_train_layer = posterior_train

        if not no_test_data:
            if isinstance(posterior_test, list):
                predictions_test_layer = predictions_test[layer_in_list]
            else:
                predictions_test_layer = posterior_test

        # Aligning the data ->
        # training of test data can be longer than leyer data because of the initial window.
        if out_train.shape[0] > predictions_train_layer.mean.shape[0]:
            out_train_tmp = out_train[win_out:]
        else:
            out_train_tmp = out_train

        if out_test.shape[0] > predictions_test_layer.mean.shape[0]:
            out_test_tmp = out_test[win_out:]
        else:
            out_test_tmp = out_test
        # Aligning the data <-

        if layer_no == 0:
            # Not anymore! Compute RMSE ignoring first output values of length "win_out"
            train_rmse = [
                comp_RMSE(predictions_train_layer.mean, out_train_tmp)
            ]
            print("Train overall RMSE: ", str(train_rmse))

            if not no_test_data:
                # Compute RMSE ignoring first output values of length "win_out"
                test_rmse = [
                    comp_RMSE(predictions_test_layer.mean, out_test_tmp)
                ]
                print("Test overall RMSE: ", str(test_rmse))

        # Plot predictions:
        if not no_test_data:
            fig5 = plt.figure(10, figsize=(20, 8))
        else:
            fig5 = plt.figure(10, figsize=(10, 8))

        fig5.suptitle('Predictions on Training and Test data')
        if not no_test_data:
            ax1 = plt.subplot(1, 2, 1)
        else:
            ax1 = plt.subplot(1, 1, 1)
        ax1.plot(out_train_tmp, label="Train_out", color='b')
        ax1.plot(predictions_train_layer.mean, label='pred mean', color='r')
        ax1.plot(predictions_train_layer.mean +
                 2 * np.sqrt(predictions_train_layer.variance),
                 label='pred var',
                 color='r',
                 linestyle='--')
        ax1.plot(predictions_train_layer.mean -
                 2 * np.sqrt(predictions_train_layer.variance),
                 label='pred var',
                 color='r',
                 linestyle='--')
        ax1.legend(loc=4)
        ax1.set_title('Predictions on Train')

        if not no_test_data:
            ax2 = plt.subplot(1, 2, 2)
            ax2.plot(out_test_tmp, label="Test_out", color='b')

            ax2.plot(predictions_test_layer.mean, label='pred mean', color='r')
            #ax2.plot( predictions_test_layer.mean +\
            #                 2*np.sqrt( predictions_test_layer.variance ), label = 'pred var', color='r', linestyle='--' )
            #ax2.plot( predictions_test_layer.mean -\
            #                 2*np.sqrt( predictions_test_layer.variance ), label = 'pred var', color='r', linestyle='--' )
            ax2.legend(loc=4)
            ax2.set_title('Predictions on Test')

            del ax2
        del ax1

    plot_predictions(7, predictions_train, predictions_test, layer_no=0)