Exemplo n.º 1
0
def test_classify_spectra2():
    """test spectra2 can find two groups
    """

    import numpy as np
    from crpm.setup_spectra2 import setup_spectra2
    from crpm.dynamics import computecost
    from crpm.gradientdecent import gradientdecent
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier

    #init numpy seed
    np.random.seed(40017)

    #setup model
    discriminator, data = setup_spectra2()

    #partition data (labels on first row)
    nobv = data.shape[1]
    cutoff = 2 * nobv // 3
    target = data[0, :cutoff]
    train = data[1:, :cutoff]
    vtarget = data[0, cutoff:]
    valid = data[1:, cutoff:]

    #analyze untrained discriminator
    pred, icost = computecost(discriminator, valid, vtarget, "bce")
    roc, ireport = analyzebinaryclassifier(pred, vtarget)
    if ireport["AreaUnderCurve"] < .5:
        #flip labels
        pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget)
    print(ireport)
    #plotroc(roc)

    #train discriminator
    pred, cost, _ = gradientdecent(discriminator,
                                   train,
                                   target,
                                   "bce",
                                   valid,
                                   vtarget,
                                   earlystop=True,
                                   finetune=7)

    #analyze discriminator
    print("analyze trained discriminator to iden subtype")
    pred, cost = computecost(discriminator, valid, vtarget, "bce")
    roc, report = analyzebinaryclassifier(pred, vtarget)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - vtarget)
    print(report)
    #plotroc(roc)

    #assert discriminator can be trained by binary cross entropy error
    assert icost > cost
    assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"]
    assert report["AreaUnderCurve"] > .8
Exemplo n.º 2
0
def test_encode_periodiccases_deep():
    """test periodiccases can be encoded
    """
    import numpy as np
    from crpm.setup_periodiccases import setup_periodiccases_deep
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.dynamics import computecost

    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_periodiccases_deep()
    nx = data.shape[0]
    nsample = data.shape[1]

    #remove discriminating layer
    prototype = model[0:-1]

    #partition training and validation data
    valid = data[1:nx, 0:nsample // 3]
    #validtargets = data[0,0:nsample//3]
    train = data[1:nx, nsample // 3:nsample]
    #targets =data[0,nsample//3:nsample]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, icost = computecost(autoencoder, valid, valid, "mse")

    #train prototype
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=5,
                                           maxepoch=500,
                                           momentum=.05)

    #calculate final reconstruction error
    pred, cost = computecost(autoencoder, valid, valid, "mse")

    #diagnostic
    print(icost)
    print(cost)

    #assert learning is taking place
    assert icost > cost
Exemplo n.º 3
0
def test_encode_nestedcs():
    """test nested cs data can be encoded
    """

    import numpy as np
    from crpm.setup_nestedcs import setup_nestedcs
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.dynamics import computecost
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.gradientdecent import gradientdecent
    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_nestedcs()

    #remove discriminating layer
    prototype = model[0:-1]

    #explicitly remove labels from data
    #labels = data[2, :]
    data = data[0:2, :]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, data, maxepoch=0)

    #calculate initial mean squared error
    pred, icost = computecost(autoencoder, data, data, "mse")

    #train model
    _, autoencoder = contrastivedivergence(prototype,
                                           data,
                                           ncd=10,
                                           nadj=10,
                                           maxepoch=1000,
                                           momentum=0.1,
                                           batchsize=10,
                                           finetune=6)

    #calculate final mean squared error
    pred, cost = computecost(autoencoder, data, data, "mse")

    #assert learning is taking place
    assert icost > cost
Exemplo n.º 4
0
def r_test_pretrain_periodiccases_deep():
    """test pretained periodiccases model encodes better than non pretrained model
    """
    import numpy as np
    from crpm.setup_periodiccases import setup_periodiccases_deep
    from crpm.dynamics import computecost

    from crpm.gradientdecent import gradientdecent
    from crpm.ffn_bodyplan import reinit_ffn
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.ffn_bodyplan import copy_ffn
    #from crpm.analyzebinaryclassifier import plotroc

    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_periodiccases_deep()
    nx = data.shape[0]
    nsample = data.shape[1]

    #partition training and validation data
    valid = data[1:nx, 0:nsample // 3]
    validtargets = data[0, 0:nsample // 3]
    train = data[1:nx, nsample // 3:nsample]
    targets = data[0, nsample // 3:nsample]

    #remove discriminating layer
    prototype = model[0:-1]

    #re-init prototype
    prototype = reinit_ffn(prototype)

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, icost_encoder = computecost(autoencoder, valid, valid, "mse")

    #conventional training autoencoder
    pred, gcost_encoder, _ = gradientdecent(autoencoder,
                                            train,
                                            train,
                                            "mse",
                                            valid,
                                            valid,
                                            maxepoch=1E6,
                                            earlystop=True,
                                            healforces=False,
                                            finetune=9)

    #assert auto encoder can be conventionally trained
    assert gcost_encoder < icost_encoder

    #re-init prototype
    prototype = reinit_ffn(prototype)

    #CD train autoencoder
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=10,
                                           batchsize=20,
                                           nadj=10,
                                           maxepoch=500,
                                           momentum=0.05,
                                           finetune=6)

    #calculate reconstruction error
    pred, cost_encoder = computecost(autoencoder, valid, valid, "mse")
    print(cost_encoder)

    #assert reconstruction error is less than initial recon error
    assert cost_encoder < icost_encoder

    #fine-tune autoencoder
    pred, fcost_encoder, _ = gradientdecent(autoencoder,
                                            train,
                                            train,
                                            "mse",
                                            valid,
                                            valid,
                                            maxepoch=1E6,
                                            earlystop=True,
                                            healforces=False,
                                            finetune=9)

    #assert final reconstruction error is not greater than previous recon error
    assert fcost_encoder <= cost_encoder

    #assert final reconstruction error is not greater than with conventional training
    assert fcost_encoder <= gcost_encoder
Exemplo n.º 5
0
def r_test_encode_spectra2():
    """test spectra2 can be encoded
    """

    import numpy as np
    from crpm.setup_spectra2 import setup_spectra2
    from crpm.lossfunctions import loss
    from crpm.gradientdecent import gradientdecent
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.ffn import FFN
    from crpm.dynamics import computecost

    from crpm.fwdprop import fwdprop

    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_spectra2()

    #remove discriminating layer
    prototype = model[0:-1]

    #partition data (labels on first row)
    nobv = data.shape[1]
    cutoff = 2 * nobv // 3
    #target = data[0, :cutoff]
    train = data[1:, :cutoff]
    #vtarget = data[0, cutoff:]
    valid = data[1:, cutoff:]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, icost = computecost(autoencoder, valid, valid, "mse")
    print("init recon error = " + str(icost))

    #train prototype
    #_, autoencoder = contrastivedivergence(prototype, train, validata=valid,
    #                                       ncd=1,
    #                                       batchsize=50,
    #                                       nadj=10,
    #                                       maxepoch=100,
    #                                       momentum=0.0)
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=1,
                                           batchsize=10,
                                           nadj=10,
                                           maxepoch=1000,
                                           momentum=0.9,
                                           finetune=7)

    #calculate final reconstruction error
    pred, cost = computecost(autoencoder, valid, valid, "mse")
    print("pretrained recon error = " + str(cost))

    #assert learning is taking place
    assert icost > cost
Exemplo n.º 6
0
def r_test_spectra2():
    """test spectra2 can be encoded and generated
    """

    import numpy as np
    from crpm.setup_spectra2 import setup_spectra2
    from crpm.dynamics import computecost
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    #from crpm.lossfunctions import loss
    #from crpm.analyzebinaryclassifier import plotroc
    from crpm.gradientdecent import gradientdecent
    from crpm.contrastivedivergence import contrastivedivergence
    #from crpm.ffn import FFN
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.ffn_bodyplan import copy_ffn
    from crpm.fwdprop import fwdprop
    from crpm.backprop import backprop
    #from crpm.dynamics import computeforces
    #from crpm.dynamics import maxforce

    from crpm.gan import gan
    #import matplotlib
    #matplotlib.use('TkAgg')
    #import matplotlib.pyplot as plt

    #init numpy seed
    np.random.seed(40017)

    #setup model
    prototype, data = setup_spectra2()

    #get prototype depth
    nlayer = len(prototype)

    #get data dimensions
    nfeat = data.shape[0]
    nobv = data.shape[1]

    #zscore data
    tdata = np.divide(data - np.mean(data, axis=1, keepdims=True),
                      np.std(data, axis=1, keepdims=True))

    #transform features into boltzmann like probs
    #tdata = np.exp(-data)
    #partfunc = np.sum(tdata, axis=1, keepdims = True)
    #tdata = np.divide(tdata,partfunc) #normalize
    #tdata = np.divide(tdata, np.max(tdata, axis=1, keepdims=True))#scale features by maxintensity

    #plt.plot(data[:,0])
    #plt.show()
    #plt.plot(tdata[:,0])
    #plt.show()
    #data = tdata

    #partition data (labels on first row)
    ntrain = 2 * nobv // 3
    target = data[0, :ntrain]
    train = data[1:, :ntrain]
    vtarget = data[0, ntrain:]
    valid = data[1:, ntrain:]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, ireconerr = computecost(autoencoder, valid, valid, "mse")
    print("init recon error = " + str(ireconerr))

    ##train prototype
    #_, autoencoder = contrastivedivergence(prototype, train,
    #                                       ncd=2,
    #                                       batchsize=50,
    #                                       nadj=10,
    #                                       maxepoch=100,
    #                                       momentum=0.1)
    #train prototype
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=1,
                                           batchsize=50,
                                           nadj=10,
                                           maxepoch=100,
                                           momentum=0.0)

    #calculate final reconstruction error
    pred, reconerr = computecost(autoencoder, valid, valid, "mse")
    print("pretrained recon error = " + str(reconerr))

    #assert learning is taking place by reduced recon error.
    assert ireconerr > reconerr

    # ----- Discriminator -----
    #create discriminator
    discriminator = copy_ffn(autoencoder[0:len(prototype)])
    discriminator = stack_new_layer(discriminator, n=1, activation="logistic")
    #analyze trained binary classifier
    pred, icost = computecost(discriminator, valid, vtarget, "bce")
    roc, ireport = analyzebinaryclassifier(pred, vtarget)
    if ireport["AreaUnderCurve"] < .5:
        #flip labels
        pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget)
    print(ireport)
    #plotroc(roc)

    #train discriminator
    pred, cost, _ = gradientdecent(discriminator,
                                   train,
                                   target,
                                   "bce",
                                   valid,
                                   vtarget,
                                   earlystop=True,
                                   finetune=6)

    #analyze trained binary classifier
    pred, cost = computecost(discriminator, valid, vtarget, "bce")
    roc, report = analyzebinaryclassifier(pred, vtarget)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - vtarget)
    print(report)
    #plotroc(roc)

    #assert discriminator can be trained by binary cross entropy error
    assert icost > cost

    #assert discriminator has potential to iden two calsses
    assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"]
    #assert report["AreaUnderCurve"] > .6

    # ----- generator -----

    #create generator from decoder
    generator = copy_ffn(autoencoder[len(prototype):len(autoencoder)])

    #adjust regularization
    for layer in generator:
        layer["regval"] = 0  #.00001

    #correct label idecies
    idx = 0
    for layer in generator:
        generator[idx]["layer"] = idx
        idx += 1

    #generate fake samples
    nfake = 600
    ncode = generator[0]["n"]
    fake, _ = fwdprop(np.random.rand(ncode, nfake), generator)

    #calculate initial reconstruction error
    pred, fkreconerr = computecost(autoencoder, fake, fake, "mse")
    print("init fake recon error = " + str(fkreconerr))

    #assert fake data recon error is better than untrained recon error
    assert fkreconerr < ireconerr

    #-- Start GAN training---

    ganerr = gan(generator,
                 discriminator,
                 train,
                 maxepoch=20000,
                 batchsize=50,
                 finetune=6.3)

    #assert generator fools discriminator at least some of the time bce<80%.
    assert ganerr[-1, 1] < .8

    #def moving_average(a, n=3) :
    #    ret = np.cumsum(a, dtype=float)
    #    ret[n:] = ret[n:] - ret[:-n]
    #    return ret[n - 1:] / n

    #fig = plt.figure()
    #plt.plot(ganerr[:, 0], ganerr[:, 1])
    #plt.plot(moving_average(ganerr[:, 0], n=20), moving_average(ganerr[:, 1], n=20))
    #plt.plot(ganerr[0, 0], ganerr[0, 1], marker="D", color="green", markersize=10)
    #plt.plot(ganerr[-1, 0], ganerr[-1, 1], marker="8", color="red", markersize=10)
    #plt.xlabel("discriminator error")
    #plt.ylabel("generator error")
    #plt.show()

    #print("final report")
    #print(report)
    #plotroc(roc)

    assert False
Exemplo n.º 7
0
def test_afnetwork():
    """test AF network patients can be encoded and generated
    """
    #import matplotlib
    #matplotlib.use('TkAgg')
    #import matplotlib.pyplot as plt
    #import matplotlib.patches as mpatches

    import numpy as np
    from crpm.setup_afmodel import setup_afmodel

    from crpm.dynamics import computecost
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    #from crpm.lossfunctions import loss
    from crpm.analyzebinaryclassifier import plotroc
    from crpm.gradientdecent import gradientdecent
    from crpm.contrastivedivergence import contrastivedivergence
    #from crpm.ffn import FFN
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.ffn_bodyplan import copy_ffn
    from crpm.fwdprop import fwdprop
    #from crpm.backprop import backprop
    #from crpm.dynamics import computeforces
    #from crpm.dynamics import maxforce

    from crpm.gan import gan

    #init numpy seed
    np.random.seed(40017)

    #setup model
    prototype, train, target, valid, vtarget = setup_afmodel()

    #trim data
    #maxobv = 150
    #train = train[:,:maxobv]
    #valid = valid[:,:maxobv]
    #target = target[:maxobv]
    #vtarget = vtarget[:maxobv]

    #get prototype depth
    nlayer = len(prototype)

    #get data dimensions
    nfeat = train.shape[0]
    nobv = train.shape[1]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    # ----- Discriminator -----

    #create discriminator
    discriminator = copy_ffn(autoencoder[0:len(prototype)])
    discriminator = stack_new_layer(discriminator, n=1, activation="logistic")

    print("analyze untrained discriminator to iden subtype")
    pred, icost = computecost(discriminator, valid, vtarget, "bce")
    roc, ireport = analyzebinaryclassifier(pred, vtarget)
    if ireport["AreaUnderCurve"] < .5:
        #flip labels
        pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget)
    print(ireport)
    #plotroc(roc)

    #train discriminator
    pred, cost, _ = gradientdecent(discriminator,
                                   train,
                                   target,
                                   "bce",
                                   valid,
                                   vtarget,
                                   earlystop=True,
                                   finetune=7)

    print("analyze trained discriminator to iden subtype")
    pred, cost = computecost(discriminator, valid, vtarget, "bce")
    roc, report = analyzebinaryclassifier(pred, vtarget)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - vtarget)
    print(report)
    #plotroc(roc)

    #assert discriminator can be trained by binary cross entropy error
    #assert icost > cost

    #assert discriminator has potential to iden two classes
    #assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"]
    #assert report["AreaUnderCurve"] > .55

    # ----- GENERATOR -----

    #create generator from decoder
    generator = copy_ffn(autoencoder[len(prototype) - 1:len(autoencoder)])

    #correct label idecies
    idx = 0
    for layer in generator:
        generator[idx]["layer"] = idx
        idx += 1

    #assert False
    #-- Main GAN training---
    #ganerr = gan(generator, discriminator, train,
    #                   maxepoch=100000, batchsize=1, finetune=6)
    ganerr = gan(generator,
                 discriminator,
                 train,
                 maxepoch=100000,
                 batchsize=1,
                 finetune=6)

    #def moving_average(a, n=3) :
    #    ret = np.cumsum(a, dtype=float)
    #    ret[n:] = ret[n:] - ret[:-n]
    #    return ret[n - 1:] / n

    #ganerr[:,2] = np.log(ganerr[:,2]) #plot density error on logscale
    #discerrbar = moving_average(ganerr[:, 0], n=20)
    #generrbar = moving_average(ganerr[:, 1], n=20)
    #autoerrbar = moving_average(ganerr[:, 2], n=20)

    #assert generator fools discriminator at least some of the time bce<65%.
    print(ganerr[-1, 1])
    assert ganerr[-1, 1] < .65

    #fig = plt.figure()
    #plt.plot(ganerr[:, 0], ganerr[:, 1])
    #plt.plot(discerrbar, generrbar)
    #plt.plot(discerrbar[0], generrbar[0], marker="D", color="green", markersize=10)
    #plt.plot(discerrbar[-1], generrbar[-1], marker="8", color="red", markersize=10)
    #plt.xlabel("discriminator error")
    #plt.ylabel("generator error")
    #plt.show()

    #fig = plt.figure()
    #plt.plot(ganerr[:, 0], ganerr[:, 2])
    #plt.plot(discerrbar, autoerrbar)
    #plt.plot(discerrbar[0], autoerrbar[0], marker="D", color="green", markersize=10)
    #plt.plot(discerrbar[-1], autoerrbar[-1], marker="8", color="red", markersize=10)
    #plt.xlabel("discriminator error")
    #plt.ylabel("encoder error")
    #plt.show()

    #generate fake data for every training sample
    nsample = train.shape[1]
    fake, _ = fwdprop(np.random.rand(generator[0]["n"], nsample), generator)
    #merge training and fake data
    gandata = np.hstack((train, fake))
    ganlabels = np.hstack((np.repeat(1, nsample), np.repeat(0, nsample)))

    print("analyze trained discriminator on fake vs training set")
    pred, cost = computecost(discriminator, gandata, ganlabels, "bce")
    roc, report = analyzebinaryclassifier(pred, ganlabels)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, gandata, ganlabels, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - ganlabels)
    print(report)
    #plotroc(roc)

    #gen fake data for every validation sample
    nsample = valid.shape[1]
    fake, _ = fwdprop(np.random.rand(generator[0]["n"], nsample), generator)
    #merge validation and fake data
    gandata = np.hstack((valid, fake))
    ganlabels = np.hstack((np.repeat(1, nsample), np.repeat(0, nsample)))

    print("analyze trained discriminator on fake vs vaidation set")
    pred, costv = computecost(discriminator, gandata, ganlabels, "bce")
    roc, reportv = analyzebinaryclassifier(pred, ganlabels)
    if reportv["AreaUnderCurve"] < .5:
        #flip labels
        pred, costv = computecost(discriminator, gandata, 1 - ganlabels, "bce")
        roc, reportv = analyzebinaryclassifier(pred, 1 - ganlabels)
    print(reportv)
    #plotroc(roc)

    #assert discriminator has poor potential to iden fake data
    assert reportv["AreaUnderCurve"] < .55

    #get fake data the discriminator thinks is real
    pred, _ = fwdprop(fake, discriminator)
    spoof = fake[:, pred[0, :] > report["OptimalThreshold"]]
Exemplo n.º 8
0
 def out_sample_error():
     if is_validating:
         pred, cost = computecost(model, validata, valitargets, lossname)
     else:
         pred, cost = computecost(model, data, targets, lossname)
     return pred, cost
Exemplo n.º 9
0
def gradientdecent(model,
                   data,
                   targets,
                   lossname,
                   validata=None,
                   valitargets=None,
                   maxepoch=1E6,
                   earlystop=False,
                   healforces=True,
                   finetune=6):
    """train fnn model by gradient decent

        Args:
            model: FFN object or as the body in FFN class
            data: training data with features in columns and observation in rows
            targets: labels with targets in columns and observation in rows
            lossname: loss function string defined in crmp.lossfunctions
            validata: data used to calculate out-sample error
            valitargets: targets used to calculate out-sample error
            maxiteration: hard limit of learning iterations default is 10000
        Returns: final predictions and cost along with exit condition.
            Exit conditions are 0) learning converged, 1) learning not
            converged, 2) learning was stopped early, and -1) learning diverged.
            Training will modify model.
    """

    import numpy as np
    from crpm.dynamics import setupdynamics
    #from crpm.dynamics import normalizelearningrate
    from crpm.dynamics import computecost
    from crpm.dynamics import computeforces
    from crpm.dynamics import maxforce
    from crpm.ffn_bodyplan import copy_ffn
    from crpm.ffn import FFN

    #convergence test constants
    #alpha norm scales learning rate by max force relative to weight
    alpha_norm = 10**(-finetune)
    #alpha_norm = 1E-8#7#5E-6
    #alpha_norm = 1E-7#5 #scales learning rate by max force relative to weight
    nbuffer = 500
    maxslope = -1E-6  #max learning slope should be negative but close to zero
    tgrid = np.array(range(nbuffer))
    tsum = np.sum(tgrid)
    tvar = nbuffer * np.sum(np.multiply(tgrid, tgrid)) - tsum * tsum

    #setup dynamics if requested (allows for reinit to heal bad forces)
    if healforces:
        forces = setupdynamics(model, data, targets, lossname)
    else:
        forces = computeforces(model, data, targets, lossname)

    #check if using validation set
    is_validating = not ((validata is None) or (valitargets is None))

    #define out-sample error calculator
    def out_sample_error():
        if is_validating:
            pred, cost = computecost(model, validata, valitargets, lossname)
        else:
            pred, cost = computecost(model, data, targets, lossname)
        return pred, cost

    #calculate out-sample error
    _, cost = out_sample_error()

    #init best error and model
    best_cost = np.copy(cost)
    if isinstance(model, FFN):
        best_model = model.copy()
    else:
        best_model = copy_ffn(model)

    #iterate training until:
    # 1) cost converges - defined as when slope of costbuffer is greater than to -1e-6
    # or
    # 2) out-sample error increases
    # or
    # 3) cost diverges - defined true when cost > 1E16
    # or
    # 4) too many iterations - hardcoded to ensure loop exit
    continuelearning = True
    #Do not do any learning if maxepoch is not a positive integer
    if maxepoch < 1:
        continuelearning = False
    count = 0
    exitcond = 0
    while continuelearning:

        #clear cost buffer
        costbuffer = []

        #normalize learning rate alpha based on current forces
        alpha = alpha_norm * maxforce(model, forces)
        #alpha = normalizelearningrate(model, forces, alpha_norm)

        #loop for training steps in buffer
        for i in tgrid:

            #update current learning step
            count += 1

            #update body wieghts and biases
            body = model
            if isinstance(model, FFN):
                body = model.body

            #loop over layer
            for layer in forces:
                index = layer["layer"]
                body[index]["weight"] = body[index][
                    "weight"] + alpha * layer["fweight"]
                body[index][
                    "bias"] = body[index]["bias"] + alpha * layer["fbias"]

            #compute forces
            forces = computeforces(model, data, targets, lossname)

            #record cost
            _, cost = computecost(model, data, targets, lossname)
            costbuffer.append(cost)

        #calculate cost slope to check for convergence
        slope = nbuffer * np.sum(np.multiply(
            tgrid, costbuffer)) - tsum * np.sum(costbuffer)
        slope = slope / tvar

        #calculate out-sample error
        _, cost = out_sample_error()

        #Record best error and save model
        if cost <= best_cost:
            best_cost = np.copy(cost)
            if isinstance(model, FFN):
                best_model = model.copy()
            else:
                best_model = copy_ffn(model)

        # - EXIT CONDITIONS -
        #exit if learning is taking too long
        if count > int(maxepoch):
            print(
                "Warning gradientdecent.py: Training is taking a long time!" +
                " - Try increaseing maxepoch - Training will end")
            exitcond = 1
            continuelearning = False
        #exit if learning has plateaued
        if slope > maxslope:
            exitcond = 0
            continuelearning = False
        #exit if early stopping and error has risen
        if earlystop and cost > best_cost:
            print("early stopping")
            exitcond = 2
            continuelearning = False
        #exit if cost has diverged
        if cost > 1E16:
            print(
                "Warning gradientdecent.py: diverging cost function " +
                "- try lowering learning rate or inc regularization constant "
                + "- training will end.")
            exitcond = -1
            continuelearning = False

    #return best model
    if isinstance(model, FFN):
        best_model = model.copy()
    else:
        best_model = copy_ffn(model)

    #return predictions and cost
    return (*out_sample_error(), exitcond)