示例#1
0
    def __init__(self,
                 discount=0.95,
                 exploration_rate=1.0,
                 exploration_rate_decay=.99,
                 target_every=2):
        """ define deep network hyperparameters"""
        self.discount = discount  # how much future rewards are valued w.r.t. current
        self.exploration_rate = exploration_rate  # initial exploration rate
        self.exploration_rate_decay = exploration_rate_decay  # transition from exploration to expliotation
        self.target_every = target_every  #how many iterations to skip before we swap prediciton network with target network

        #retrieve the body plan
        #input has 6 neurons, one for each metabolite conc. and one for time horizon
        #output has 1 neuron, representing the only action and its value function approximation
        #~~output has 3 neurons, representing the Q values for each of the 3 actions
        #~~   action 0 is no treatment, action 1 is drug1 Tx, and and action 2 is for drug2 Tx
        self.bodyplan = read_bodyplan("crpm/data/abbc_bodyplan.csv")

        #define prediction network
        self.prednet = init_ffn(self.bodyplan)
        self.loss = None  #current prediction error

        #init the target network(s)
        self.targetnet1 = init_ffn(self.bodyplan)
        self.targetnet2 = init_ffn(self.bodyplan)
        self.targetnet3 = init_ffn(self.bodyplan)
        self.targetnet4 = init_ffn(self.bodyplan)
        #with the prediciton network
        #self.targetnet = copy_ffn(self.prednet)

        #init counter used to determine when to update target network with prediction network
        self.iteration = 0
示例#2
0
def test_solve_numberadder():
    """test number adder can be solved begining with weights = 1.1
    """
    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset
    from crpm.gradientdecent import gradientdecent

    #create shallow bodyplan with 5 inputs and 1 output for numebr adder data
    bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv")

    #create numberadder model
    model = init_ffn(bodyplan)

    #manually set layer weights to 1.1 and biases to 0
    model[1]["weight"] = 1.1 * np.ones(model[1]["weight"].shape)

    #train numberadder model  with mean squared error
    _, data = load_dataset("crpm/data/numberadder.csv")
    _, _, _ = gradientdecent(model, data[0:5, ], data[-1, ], "mse")

    print(model[1]["weight"])

    assert np.allclose(model[1]["weight"], 1.0, rtol=.005)
示例#3
0
def test_fwdprop_numberadder():
    """test that unit weights will make a number adder.
    """
    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset
    from crpm.fwdprop import fwdprop

    #create shallow bodyplan with 5 inputs and 1 output for number adder data
    bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #manually set layer 1 weights to 1 and biases to 0
    model[1]["weight"] = np.ones(model[1]["weight"].shape)

    #run forward propagation with example data in numberadder.csv
    __, data = load_dataset("crpm/data/numberadder.csv")
    indepvars = data[0:5, ]
    depvars = data[-1, ]
    prediction, __ = fwdprop(indepvars, model)

    assert np.allclose(depvars, prediction, rtol=1E-7)
示例#4
0
def test_backprop_numberadder():
    """test that solved number adder will have zero forces with proper shape.
    """
    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset
    from crpm.fwdprop import fwdprop
    from crpm.lossfunctions import loss
    from crpm.backprop import backprop

    #create shallow bodyplan for numberadder.csv data
    bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv")

    #create numberadder model
    addermodel = init_ffn(bodyplan)

    #manually set layer 1 weights to 1 and biases to 0
    addermodel[1]["weight"] = np.ones(addermodel[1]["weight"].shape)

    #compute forces using numberadder.csv data with mean squared error
    __, data = load_dataset("crpm/data/numberadder.csv")
    pred, state = fwdprop(data[0:5,], addermodel)
    __, dloss = loss("mse", pred, data[-1,])
    forces, _ = backprop(addermodel, state, dloss)

    assert forces[-1]["fweight"].shape == (1, 5)
    assert np.allclose(1+forces[-1]["fweight"], 1, rtol=1E-7)
    assert forces[-1]["fbias"].shape == (1, 1)
    assert np.allclose(1+forces[-1]["fbias"], 1, rtol=1E-7)
示例#5
0
def test_numadd_forcedir():
    """test that number adder with initial wieghts >1 will have negative forces.
    """
    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset
    from crpm.fwdprop import fwdprop
    from crpm.lossfunctions import loss
    from crpm.backprop import backprop

    #create shallow bodyplan for numberadder.csv data
    bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv")

    #create numberadder model
    addermodel = init_ffn(bodyplan)

    #manually set layer 1 weights to 1.1 and biases to 0
    addermodel[1]["weight"] = 1.1 * np.ones(addermodel[1]["weight"].shape)

    #compute forces using numberadder.csv data with mean squared error
    __, data = load_dataset("crpm/data/numberadder.csv")
    pred, state = fwdprop(data[0:5,], addermodel)
    __, dloss = loss("mse", pred, data[-1,])
    forces, _ = backprop(addermodel, state, dloss)

    assert np.all(forces[-1]["fweight"] < 0)
示例#6
0
    def __init__(self, desc, std=None, pre=None, post=None):
        """define model from description with options for pre and post procs
        and inital weight distribution.
        """
        from crpm.ffn_bodyplan import read_bodyplan
        from crpm.ffn_bodyplan import init_ffn

        #save weight variance parameter
        self.weightstd = std

        #get bodyplan from a file description
        if isinstance(desc, str):
            self.bodyplan = read_bodyplan(desc)

        #get bodyplan from a list description
        if isinstance(desc, list):
            self.bodyplan = desc

        #define model from bodyplan
        self.body = init_ffn(self.bodyplan, weightstd=self.weightstd)

        #link static pre-processing body
        self.pre = pre
        #append indicator in description if applicable
        if self.pre is not None:
            for layer in self.pre:
                layer["desc"] = layer["desc"] + str(' static pre-processor')

        #link static post-processing body
        self.post = post
        #append indicator in description if applicable
        if self.post is not None:
            for layer in self.post:
                layer["desc"] = layer["desc"] + str(' static post-processor')
示例#7
0
def setup_afmodel():
    """ will return model prototype and downloaded data."""

    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from  bodyplan file
    bodyplan = read_bodyplan("crpm/data/afmodel_bodyplan.csv")

    #create model
    prototype = init_ffn(bodyplan)

    #download data
    data = np.load("crpm/data/afmodel.npz")

    #get list of keys in data (represents individual arrays)
    keylist = []
    for key in data.keys():
        keylist.append(key)

    #return encoder protype, cohort1 data, cohort1 labels, cohort2 data, cohort2 labels
    return prototype, data[keylist[0]], data[keylist[1]], data[
        keylist[2]], data[keylist[3]]
示例#8
0
def setup_toruscases_deep():
    """ will return model and downloaded data."""

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from deep bodyplan file
    bodyplan = read_bodyplan("crpm/data/intorus_deep_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #download data
    __, data = load_dataset("crpm/data/intorus.csv")

    return model, data
示例#9
0
def setup_numberadder():
    """ will return numberadder model and downloaded data."""

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from  bodyplan file
    bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #download data
    keys, data = load_dataset("crpm/data/numberadder.csv")

    return model, keys, data
示例#10
0
def setup_periodiccases():
    """ will return model and downloaded data."""

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from bodyplan file
    bodyplan = read_bodyplan("crpm/data/periodiccases_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #download data
    __, data = load_dataset("crpm/data/periodiccases.csv")

    return model, data
示例#11
0
def setup_multicorrel_deep_c():
    """ will return deep model and downloaded data."""

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from  bodyplan file
    bodyplan = read_bodyplan("crpm/data/multicorrel_deep_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #download nestedCs data
    __, data = load_dataset("crpm/data/multicorrel_C.csv")

    return model, data
示例#12
0
def setup_overfitting_shallow():
    """ will return shallow model and downloaded data."""

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from  bodyplan file
    bodyplan = read_bodyplan("crpm/data/overfitting_shallow_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #download data
    __, traindata = load_dataset("crpm/data/overfitting_training.csv")
    keys, validdata = load_dataset("crpm/data/overfitting_validation.csv")

    return model, keys[1:], traindata[1:, :], validdata[1:, :]
示例#13
0
    def reinit(self, std=None):
        """Reinitialize FFN object.

        Args:
            model: A previously created ffn model
        Returns:
            The input model with reinitialized weights and biases
        """
        import numpy as np

        #always inform user model is being reinitialized
        print("Reinitialing FFN body!")

        #reset weight distribution if given
        if std is not None:
            self.weightstd = std

        #define model from bodyplan
        self.body = init_ffn(self.bodyplan, weightstd=self.weightstd)
示例#14
0
def test_init_ffn_types():
    """check if elements in layer dictionaries are of the correct type
    weights and biases should be ndarrays
    """
    import numpy as np

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn

    bodyplan = read_bodyplan("crpm/data/example_ffn_bodyplan.csv")
    model = init_ffn(bodyplan)

    for layer in model:
        assert isinstance(layer["layer"], int)
        assert isinstance(layer["n"], int)
        assert isinstance(layer["activation"], str)
        if layer["layer"] > 0:
            assert isinstance(layer["regval"], float)
            assert isinstance(layer["weight"], np.ndarray)
            assert isinstance(layer["bias"], np.ndarray)
示例#15
0
def test_solve_numberadder():
    """test number adder can be solved begining with init weights set
    """
    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.dataset import load_dataset
    from crpm.ffn_bodyplan import init_ffn
    from crpm.fwdprop import fwdprop
    from crpm.lossfunctions import loss
    from crpm.langevindynamics import langevindynamics


    #load data
    __, data = load_dataset("crpm/data/numberadder.csv")
    __, testdata = load_dataset("crpm/data/numberadder_test.csv")

    #create shallow bodyplan with 5 inputs and 1 output for numebr adder data
    bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv")

    #create numberadder model
    model = init_ffn(bodyplan)

    #manually set layer weights to 1.5 and biases to 0
    model[1]["weight"] = 1.5*np.ones(model[1]["weight"].shape)

    #calculate initial mean squared error
    pred, __ = fwdprop(data[0:5,], model)
    icost, __ = loss("mse", pred, data[-1,])
    print("icost = "+str(icost))
    print(model[1]["weight"])

    #train numberadder model  with mean squared error
    __, cost = langevindynamics(model, data[0:5,], data[-1,],
                                "mse", testdata[0:5,], testdata[-1,],
                                maxepoch=int(3E5), maxbuffer=int(1E3))
    print("cost ="+str(cost))
    print(model[1]["weight"])

    assert icost > cost
    assert np.allclose(model[1]["weight"], 1.0, rtol=.005)
示例#16
0
def setup_spectra2():
    """ will return model and downloaded data."""

    import numpy as np
    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn
    from crpm.dataset import load_dataset

    #create model from  bodyplan file
    bodyplan = read_bodyplan("crpm/data/spectra2_bodyplan.csv")

    #create model
    model = init_ffn(bodyplan)

    #download data
    data = np.load("crpm/data/spectra2.npz")

    #get list of keys in data (represents individual arrays)
    keylist = []
    for key in data.keys():
        keylist.append(key)

    return model, data[keylist[0]]
示例#17
0
def test_init_ffn():
    """Test ffn is created properly from example_bodyplan.csv
    """

    from crpm.ffn_bodyplan import read_bodyplan
    from crpm.ffn_bodyplan import init_ffn

    bodyplan = read_bodyplan("crpm/data/example_ffn_bodyplan.csv")
    model = init_ffn(bodyplan)

    assert model[0]["layer"] == 0
    assert model[1]["layer"] == 1
    assert model[2]["layer"] == 2
    assert model[3]["layer"] == 3
    assert model[4]["layer"] == 4

    assert model[0]["n"] == 2
    assert model[1]["n"] == 3
    assert model[2]["n"] == 5
    assert model[3]["n"] == 7
    assert model[4]["n"] == 1

    assert model[1]["weight"].shape == (3, 2)
    assert model[2]["weight"].shape == (5, 3)
    assert model[3]["weight"].shape == (7, 5)
    assert model[4]["weight"].shape == (1, 7)

    assert model[1]["bias"].shape == (3, 1)
    assert model[2]["bias"].shape == (5, 1)
    assert model[3]["bias"].shape == (7, 1)
    assert model[4]["bias"].shape == (1, 1)

    assert model[0]["activation"] == 'linear'
    assert model[1]["activation"] == 'relu'
    assert model[2]["activation"] == 'relu'
    assert model[3]["activation"] == 'relu'
    assert model[4]["activation"] == 'logistic'
示例#18
0
文件: som.py 项目: dmontemayor/CRPM
def init_som(model, state, n=100, nx=None, ny=None, hcp=False):
    """initializes a map from an ffn model

        Args:
                model: FFN model whose final layer is mapped
                n: number of mapping nodes default is 100
                nx: number of nodes in x direction
                ny: number of nodes in y direction
                hcp: boolean indicating use of hexagonal close packing default is False
    """

    import numpy as np
    from scipy.spatial import distance_matrix
    from crpm.ffn_bodyplan import get_bodyplan
    from crpm.ffn_bodyplan import init_ffn

    #make sure ffn top layer has logistic or softmax activation
    if (model[-1]["activation"] != "logistic"
            and model[-1]["activation"] != "softmax"):
        stop("som::init_map - input model is not a classifier.")

    #define number of clusters from size of top layer
    nclass = max(model[-1]["n"], 2)

    #get model bodyplan
    bodyplan = get_bodyplan(model)

    #edit bodyplan toplayer to reflect number of mapping nodes and create map
    bodyplan[-1]["n"] = n
    bodyplan[-1]["activation"] = "gaussian"

    # create map
    map = init_ffn(bodyplan)

    #add node geometry to top layer and save unit cell scale factor
    map[-1]["coord"], scale = coords(n, nx, ny, hcp)

    #calcualte node pair distances in mapping space for given geometry
    map[-1]["nodedist"] = distance_matrix(map[-1]["coord"], map[-1]["coord"])

    #multiply scale factor by 2 for unit radius
    scale = np.multiply(scale, 0.5)

    #initialize node weights based on
    #first 3 principal components of the penultimate layer activity

    #define matrix with penultimate features in columns
    act = state[-2]["activity"]
    # calculate the mean of each feature
    mact = np.mean(act, axis=1)
    # mean center the features
    cact = act.T - mact
    # calculate covariance matrix of centered features
    vact = np.cov(cact.T)
    # eigendecomposition of covariance matrix
    values, vectors = np.linalg.eig(vact)
    #calcualte feature variance for scaling
    sig = np.std(act, axis=1)[:, None]

    print(mact)
    print(sig)
    print(values)
    print(vectors)

    #add zero vectors if number of features is less than 3
    if vectors.shape[0] < 3:
        zerovectors = np.zeros((3 - vectors.shape[0], vectors.shape[1]))
        vectors = np.vstack((vectors, zerovectors))
        zerovectors = np.zeros((3 - vectors.shape[0], 1))
    #project node coordinates onto first 3 principal coordinates
    #unit scale coordinates then scale by feature stdev then translate to feature mean
    map[-1]["weight"] = (
        (map[-1]["coord"] / scale).dot(vectors[0:3, :])) * sig.T + mact[:,
                                                                        None].T

    return map, nclass
示例#19
0
def contrastivedivergence(model,
                          data,
                          validata=None,
                          ncd=1,
                          maxepoch=100,
                          nadj=10,
                          momentum=.5,
                          batchsize=10,
                          finetune=6):
    """unfold and train fnn model by contrastive divergence

        Args:
            model: deep FFN model
            data: features in rows, observations in columns.
            cd: number of contrastive divergence steps
            maxepoch: hard limit of learning iterations default is 100
            nadj: period of learning rate adjustment in units of epochs
            momentum: fraction of previous change in weight carried over to
                      next weight update step
        Returns: exit condition and trained unfolded model.
            Exit conditions are 0) learning converged, 1) learning not
            converged, and -1) learning cannot be performed.
            Training will modify model.
    """

    import numpy as np
    from crpm.activationfunctions import activation
    from crpm.ffn_bodyplan import get_bodyplan
    from crpm.ffn_bodyplan import copy_bodyplan
    from crpm.ffn_bodyplan import push_bodyplanlayer
    from crpm.ffn_bodyplan import init_ffn

    #init exit condition to default
    exitcond = 0

    #get model bodyplan
    bodyplan = get_bodyplan(model)
    #get number of model layers
    nlayer = len(model)
    #copy bodyplan
    unfolded_bodyplan = copy_bodyplan(bodyplan)
    #push layers in reversed order to create a symmetric bodyplan
    for layer in reversed(bodyplan[:-1]):
        push_bodyplanlayer(unfolded_bodyplan, layer)
    #create unfolded model from symmetric bodyplan
    smodel = init_ffn(unfolded_bodyplan)
    #print(smodel)

    #return symmetric model if maxepoch = 0
    if maxepoch < 1:
        return exitcond, smodel

    #define minibatches
    #get number of observations in data
    nobv = data.shape[1]
    #calculate number of minibatches needed
    batchsize = int(batchsize)
    nbatch = nobv // batchsize
    #get randomized observation index
    data = data.T
    np.random.shuffle(data)
    data = data.T

    #alpha norm scales learning rate by max force relative to weight
    alpha_norm = 10**(-finetune)
    #alpha_norm = 1E-8#7#5E-6

    #initialize previous layer activity with input data for layer 0
    prevlayeractivity = data

    #do the same for the validation data
    validprevlayeractivity = validata
    if validata is None:
        #use last 20% of batches for validation
        vbatch = nbatch // 5
        nbatch = nbatch - vbatch
        prevlayeractivity = data[:, 0:nbatch * batchsize]
        validprevlayeractivity = data[:, nbatch * batchsize:]

    # loop over first half of symmetric model begining with layer 1
    for layerindex in range(1, nlayer):

        #encoding index is = layerindex
        #decoding index is = 2*nlayer - layerindex +1
        decodeindex = 2 * nlayer - (layerindex + 1)

        #define layers
        vislayer = smodel[decodeindex]
        hidlayer = smodel[layerindex]

        #get number of nodes per layer
        nv = vislayer["n"]
        nh = hidlayer["n"]

        #initialize connecting weights ±4sqrt(6/(nv+nh))
        hidlayer["weight"] = ((np.random.rand(nh, nv) - 1 / 2) * 8 *
                              np.sqrt(6 / (nh + nv)))

        #determine appropriate RBM type
        vtype = vislayer["activation"]
        htype = hidlayer["activation"]
        rbmtype = None
        #1. binary
        if vtype == "logistic" and htype == "logistic":
            rbmtype = "binary"

            #define activity for visible layer
            def vsample():
                """returns logistic visible layer activity given hiddenlayer state"""
                stimulus = np.add(hidlayer["weight"].T.dot(hstate),
                                  vislayer["bias"])
                return activation("logistic", stimulus)

            #define activity for hidden layer
            def hsample():
                """returns logistic hidden layer activity and stocastic binary state given visible layer activity"""
                stimulus = np.add(hidlayer["weight"].dot(vact),
                                  hidlayer["bias"])
                hact = activation("logistic", stimulus)
                return hact, hact > np.random.random(hact.shape)

            #define free energy equation for binary-binary RBM
            def feng(act):
                #visible bias term: dim (1,m)
                #vbterm = -np.sum(np.multiply(act, vislayer["bias"]), axis=0)
                vbterm = -vislayer["bias"].T.dot(act)

                #hidden layer stimulus : dim (nh,m)
                stimulus = np.add(hidlayer["weight"].dot(act),
                                  hidlayer["bias"])

                # init hidden term : dim (nh,m)
                #hidden_term = activation("vacuum",stimulus)
                #for exp(stim) term numerical stability
                #first calc where stimulus is negative
                #xidx = np.where(stimulus < 0)
                #hidden term function for negative stimulus
                #hidden_term[xidx] = np.log(1+np.exp(stimulus[xidx]))
                #then calc where stimulus is not negative
                #xidx = np.where(stimulus >= 0)
                #hidden term function for not negative stimulus
                #hidden_term[xidx] = stimulus[xidx]+np.log(1+np.exp(-stimulus[xidx]))
                hidden_term = np.where(
                    stimulus < 0, np.log(1 + np.exp(stimulus)),
                    stimulus + np.log(1 + np.exp(-stimulus)))

                #sum over hidden units to get true hidden_term : dim (1,m)
                hidden_term = np.sum(hidden_term, axis=0)

                #free energy = sum over samples (visible_bias_term - hidden_term)
                return np.sum(vbterm - hidden_term)

        #2. Gaussian-Bernoulli
        if vtype == "linear" and htype == "logistic":
            rbmtype = "gaussian-bernoulli"
            #Get standard deviation for real-valued visible units
            sigma = np.std(prevlayeractivity, axis=1, keepdims=True)

            #define activity for visible layer
            def vsample():
                """returns linear plus gaussian noise visible layer activity given hidden layer state"""
                stimulus = np.add(hidlayer["weight"].T.dot(hstate) * sigma,
                                  vislayer["bias"])
                return np.random.normal(loc=stimulus, scale=sigma)

            #define activity for hidden layer
            def hsample():
                """returns logistic hidden layer activity and stocastic binary state given scaled visible layer activity"""
                stimulus = np.add(hidlayer["weight"].dot(vact / sigma),
                                  hidlayer["bias"])
                act = activation("logistic", stimulus)
                return act, act > np.random.random(act.shape)

            #define free energy equation for Gaussian - Bernoulli RBM
            def feng(act):

                #hidden layer stimulus : dim (nh,m)
                stimulus = np.add(hidlayer["weight"].dot(act),
                                  hidlayer["bias"])

                # init hidden term : dim (nh,m)
                #hidden_term = activation("vacuum",stimulus)
                #for exp(stim) term numerical stability
                #first calc where stimulus is negative
                #xidx = np.where(stimulus < 0)
                #hidden term function for negative stimulus
                #hidden_term[xidx] = np.log(1+np.exp(stimulus[xidx]))
                #then calc where stimulus is not negative
                #xidx = np.where(stimulus >= 0)
                #hidden term function for not negative stimulus
                #hidden_term[xidx] = stimulus[xidx]+np.log(1+np.exp(-stimulus[xidx]))
                hidden_term = np.where(
                    stimulus < 0, np.log(1 + np.exp(stimulus)),
                    stimulus + np.log(1 + np.exp(-stimulus)))

                #sum over hidden units to get true hidden_term : dim (1,m)
                hidden_term = np.sum(hidden_term, axis=0)

                #visible bias term: dim (1,m)
                vbterm = -vislayer["bias"].T.dot(act)

                #square term
                sqterm = np.trace(
                    act.T.dot(act) +
                    vislayer["bias"].T.dot(vislayer["bias"])) / 2

                #free energy = vbterm +[act^2 +vbias^2]/2 - hidden_term)
                return np.sum(vbterm - hidden_term) + sqterm

        #3. Bernoulli-Gaussian
        if vtype == "logistic" and htype == "linear":
            rbmtype = "bernoulli-gaussian"

            #define activity for visible layer
            def vsample():
                """returns logistic visible layer activity given unit scaled hidden layer activity"""
                stimulus = np.add(hidlayer["weight"].T.dot(hstate),
                                  vislayer["bias"])
                return activation("logistic", stimulus)

            #define activity for hidden layer
            def hsample():
                """returns linear plus unit var gaussian noise hidden layer activity and stocastic state given vislayer activity"""
                stimulus = np.add(hidlayer["weight"].dot(vact),
                                  hidlayer["bias"])
                return stimulus, np.random.normal(loc=stimulus)

            #define free energy equation for Gaussian - Bernoulli RBM
            print(
                "free energy function is not properly defined for Bernouli-Gaussian RBM"
            )

            def feng(act):
                stimulus = np.add(hidlayer["weight"].dot(act),
                                  hidlayer["bias"])
                #visible bias term
                vbterm = -np.transpose(act).dot(vislayer["bias"])
                vbtemp = np.add(
                    np.transpose(act).dot(act),
                    np.transpose(vislayer["bias"].dot(vislayer["bias"])))
                vbterm = np.add(vbterm, vbtemp / 2).T
                # init hidden term
                hidden_term = activation("vacuum", stimulus)
                #for exp(stim) term numerical stability
                #first calc where stimulus is negative
                xidx = np.where(stimulus < 0)
                #hidden term function for negative stimulus
                hidden_term[xidx] = np.log(1 + np.exp(stimulus[xidx]))
                #then calc where stimulus is not negative
                xidx = np.where(stimulus >= 0)
                #hidden term function for not negative stimulus
                hidden_term[xidx] = stimulus[xidx] + np.log(
                    1 + np.exp(-stimulus[xidx]))
                #free energy = visible_bias_term - hidden_term
                return np.sum(vbterm - np.sum(hidden_term, axis=0))

        #4. exit if unknown RBM type
        if rbmtype == None:
            exitcond = -1  #cannot run contrastive divergence on this model
            print(
                "Error in contrastivedivergence.py: cannot find appropriate RBM type."
            )
            print("Ensure model has only logistic or linear layers.")
            print(
                "Also ensure linear layers are not adjacent - that would be pointless btw."
            )
            return exitcond, smodel

        # continuous loop over learning steps (use exit conditions)
        print("training " + rbmtype + " RBM in layer " + str(layerindex))
        continuelearning = True
        momentum_adj = 0
        epoch = 0
        err = 0
        dweight = np.zeros(hidlayer["weight"].shape)
        dhbias = np.zeros(hidlayer["bias"].shape)
        dvbias = np.zeros(vislayer["bias"].shape)
        valid_feng = np.full(nadj, feng(validprevlayeractivity))
        train_feng = np.full(nadj, feng(prevlayeractivity))
        #freeeng = np.full(nadj, feng(validprevlayeractivity)
        #                  -feng(prevlayeractivity))
        #freeeng0 = np.copy(freeeng)
        earlystop = False
        while continuelearning:
            #increment epoch counter
            epoch += 1
            #print("epoch = "+str(epoch))

            #loop over minibatches
            for batch in range(nbatch):

                #get minibatch
                minibatch = prevlayeractivity[:,
                                              batch * batchsize:(batch + 1) *
                                              batchsize]

                # get visible layer activity
                vact = minibatch

                # get hidden layer activity and poshidstates
                hact, hstate = hsample()

                # get product of visible layer and hidden layer actvities
                pprod = hact.dot(vact.T)

                # get sum of visible layer activity
                pvsum = np.sum(vact, axis=1, keepdims=True)

                # get sum of hidden layer activity
                phsum = np.sum(hact, axis=1, keepdims=True)

                # loop over ncd Gibbs sampling iterations (at least one iteration)
                continuegibbs = True
                gibbs = 0
                while continuegibbs:
                    #increment gibbs counter
                    gibbs += 1
                    # get visible layer activity | hidden layer states
                    vact = vsample()
                    # sample hidden layer state | visible layer activity
                    hact, _ = hsample()
                    # use hidden layer activity instead of state for subsequent
                    # iterations so we overwrite hstate with the activity
                    hstate = np.copy(hact)
                    #exit condition
                    if gibbs >= ncd:
                        continuegibbs = False
                # get product of visible layer and hidden layer actvities
                nprod = hact.dot(vact.T)
                # get sum of visible layer activity
                nvsum = np.sum(vact, axis=1, keepdims=True)
                # get sum of hidden layer activity
                nhsum = np.sum(hact, axis=1, keepdims=True)

                # accumulate error
                err += np.sum(np.square(minibatch - vact))

                # get forces on visible layer biases
                dvbias0 = dvbias
                dvbias = (pvsum - nvsum) / batchsize

                # get forces on the hidden layer biases
                dhbias0 = dhbias
                dhbias = (phsum - nhsum) / batchsize

                #calculate forces on weights
                dweight0 = dweight
                dweight = (pprod - nprod) / batchsize
                #add regularization penalty term if specified by layer
                if hidlayer["regval"] > 0:
                    if hidlayer["lreg"] == 1:
                        dweight -= hidlayer["regval"] * np.sign(
                            hidlayer["weight"])
                    if hidlayer["lreg"] == 2:
                        dweight -= hidlayer["regval"] * hidlayer["weight"]

                #adjust learning rate to ensure integrator doesn't break
                if np.all(abs(dweight) >= np.finfo(float).eps):
                    alpha = alpha_norm * np.max(
                        np.divide(hidlayer["weight"], dweight))
                #print(alpha)

                #update weights with momentum term
                hidlayer["weight"] += momentum_adj * dweight0 + alpha * dweight

                # update visible layer biases with momentum term
                vislayer["bias"] += momentum_adj * dvbias0 + alpha * dvbias

                # update hidden layer biases with momentum term
                hidlayer["bias"] += momentum_adj * dhbias0 + alpha * dhbias

            # periodically check free energy for overfitting
            valid_feng[epoch % nadj] = feng(validprevlayeractivity)
            train_feng[epoch % nadj] = feng(prevlayeractivity)
            #freeeng[epoch%nadj] = feng(validprevlayeractivity)-feng(prevlayeractivity)
            #print(np.mean(freeeng))
            if epoch % nadj == (nadj - 1):
                #default turn off momentum
                momentum_adj = 0
                #if train_feng inc then turn off momentum and continue training
                #else
                if np.polyfit(np.arange(nadj), train_feng, 1)[0] < 0:
                    #   if valid_feng is inc then initiate earlystopping
                    if np.polyfit(np.arange(nadj), valid_feng, 1)[0] > 0:
                        earlystop = True
                #   else turn on momentum and continue training
                    else:
                        momentum_adj = momentum

                #if np.mean(freeeng) > np.mean(freeeng0)+0*np.std(freeeng0):
                #    #initiate naive earlystopping
                #    earlystop = True
                #    print("Free engergy prev = " +str(np.mean(freeeng0)))
                #    print("Free engergy curr = " +str(np.mean(freeeng)))
                #freeeng0 = np.copy(freeeng)

            # - EXIT CONDITIONS -
            #exit if learning is taking too long
            if epoch > int(maxepoch):
                print(
                    "Warning contrastivedivergence.py: Training is taking a long time!"
                    + " - Try increasing maxepoch - Training will end")
                exitcond = 1
                continuelearning = False
            #exit if naive earlystopping has been engauged
            if earlystop:
                print(
                    "Warning contrastivedivergence.py: early stopping after " +
                    str(epoch) + " epochs")
                continuelearning = False

        #symmeterize weights
        vislayer["weight"] = hidlayer["weight"].T

        #hidlayer to original model
        model[layerindex] = hidlayer

        #promote prevlayeractivity to current hidlayer activity
        vact = np.copy(prevlayeractivity)
        prevlayeractivity, _ = hsample()

        #promote validation data to current hidden layer too
        vact = np.copy(validprevlayeractivity)
        validprevlayeractivity, _ = hsample()

    # return exit condition
    return exitcond, smodel