def computeforces(model, data, targets, lossname): """compute forces on weights and biases """ from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.backprop import backprop from crpm.ffn import FFN if isinstance(model, FFN): pred = data #pre fwd prop if any if model.pre is not None: pred, _ = fwdprop(pred, model.pre) #body fwd prop pred, state = fwdprop(pred, model.body) logit = state[-1]["stimulus"] activation = model.body[-1]["activation"] #post fwd prop if any if model.post is not None: pred, poststate = fwdprop(pred, model.post) logit = poststate[-1]["stimulus"] activation = model.post[-1]["activation"] #turn off from-logit if final layer was not logit and loss name is not bce if not (activation == "logit" and lossname == "bce"): logit = None #get derivative of loss function _, dloss = loss(lossname, pred, targets, logit) #post back prop if any if model.post is not None: _, dloss = backprop(model.post, poststate, dloss) #body back prop to get forces forces, _ = backprop(model.body, state, dloss) #return FFN forces return forces #If not FFN object then simply return body forces pred, state = fwdprop(data, model) #turn off from-logit if final layer was not logit with bce lossfunction if not (model[-1]["activation"] == "logit" and lossname == "bce"): logit = None _, dloss = loss(lossname, pred, targets, logit) forces, _ = backprop(model, state, dloss) return forces
def computecost(model, data, targets, lossname): """compute predictions and cost """ from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.ffn import FFN #get predictions if isinstance(model, FFN): pred = data #pre fwd prop if any if model.pre is not None: pred, _ = fwdprop(pred, model.pre) #body fwd prop pred, _ = fwdprop(pred, model.body) #post fwd prop if any if model.post is not None: pred, _ = fwdprop(pred, model.post) #ELSE IF model is not FFN object then simply fwd prop to get predictions else: pred, _ = fwdprop(data, model) #calculate cost based on predictions cost, _ = loss(lossname, pred, targets) #return predictions and cost return pred, cost
def test_solve_nestedcs_bce(): """test nested cs can be solved """ import numpy as np from crpm.setup_nestedcs import setup_nestedcs from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.gradientdecent import gradientdecent #init numpy seed np.random.seed(40017) #setup model model, data = setup_nestedcs() #calculate initial binary cross entropy error pred, _ = fwdprop(data[0:2, ], model) icost, _ = loss("bce", pred, data[-1, ]) #train model pred, cost, _ = gradientdecent(model, data[0:2, ], data[-1, ], "bce") #print(model) #print(icost) #print(cost) assert icost > cost assert cost < .29
def test_solve_nestedcs_bce(): """test nested cs can be solved """ import numpy as np from crpm.setup_nestedcs import setup_nestedcs from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.langevindynamics import langevindynamics #init numpy seed np.random.seed(40017) #setup model model, data = setup_nestedcs() #calculate initial binary cross entropy error pred, __ = fwdprop(data[0:2,], model) icost, __ = loss("bce", pred, data[-1,]) #train model pred, cost = langevindynamics(model, data[0:2,], data[-1,], "bce", maxepoch=int(2E3), maxbuffer=int(1E2)) #print(model) #print(icost) #print(cost) assert icost > cost assert cost < .29
def test_numadd_forcedir(): """test that number adder with initial wieghts >1 will have negative forces. """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.backprop import backprop #create shallow bodyplan for numberadder.csv data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model addermodel = init_ffn(bodyplan) #manually set layer 1 weights to 1.1 and biases to 0 addermodel[1]["weight"] = 1.1 * np.ones(addermodel[1]["weight"].shape) #compute forces using numberadder.csv data with mean squared error __, data = load_dataset("crpm/data/numberadder.csv") pred, state = fwdprop(data[0:5,], addermodel) __, dloss = loss("mse", pred, data[-1,]) forces, _ = backprop(addermodel, state, dloss) assert np.all(forces[-1]["fweight"] < 0)
def test_backprop_numberadder(): """test that solved number adder will have zero forces with proper shape. """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.backprop import backprop #create shallow bodyplan for numberadder.csv data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model addermodel = init_ffn(bodyplan) #manually set layer 1 weights to 1 and biases to 0 addermodel[1]["weight"] = np.ones(addermodel[1]["weight"].shape) #compute forces using numberadder.csv data with mean squared error __, data = load_dataset("crpm/data/numberadder.csv") pred, state = fwdprop(data[0:5,], addermodel) __, dloss = loss("mse", pred, data[-1,]) forces, _ = backprop(addermodel, state, dloss) assert forces[-1]["fweight"].shape == (1, 5) assert np.allclose(1+forces[-1]["fweight"], 1, rtol=1E-7) assert forces[-1]["fbias"].shape == (1, 1) assert np.allclose(1+forces[-1]["fbias"], 1, rtol=1E-7)
def test_mse(): """test mean square error """ pred = np.ones((3, 5)) targets = np.array([1, 1, 2, 3, 6]) dif = pred - targets cost, dloss = loss("mse", pred, targets) assert cost == 3 assert np.all(dloss == dif)
def test_solve_numberadder(): """test number adder can be solved begining with init weights set """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.dataset import load_dataset from crpm.ffn_bodyplan import init_ffn from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.langevindynamics import langevindynamics #load data __, data = load_dataset("crpm/data/numberadder.csv") __, testdata = load_dataset("crpm/data/numberadder_test.csv") #create shallow bodyplan with 5 inputs and 1 output for numebr adder data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model model = init_ffn(bodyplan) #manually set layer weights to 1.5 and biases to 0 model[1]["weight"] = 1.5*np.ones(model[1]["weight"].shape) #calculate initial mean squared error pred, __ = fwdprop(data[0:5,], model) icost, __ = loss("mse", pred, data[-1,]) print("icost = "+str(icost)) print(model[1]["weight"]) #train numberadder model with mean squared error __, cost = langevindynamics(model, data[0:5,], data[-1,], "mse", testdata[0:5,], testdata[-1,], maxepoch=int(3E5), maxbuffer=int(1E3)) print("cost ="+str(cost)) print(model[1]["weight"]) assert icost > cost assert np.allclose(model[1]["weight"], 1.0, rtol=.005)
def pretrain(self, state, validation=None): """ will pretrain deep network model by contrastive divergence """ #make sure input all have the same number of observations nobv = state.shape[1] failcheck = False if validation is not None and validation.shape[0] != nobv: failcheck = True if failcheck: print( "runtime error in pretrain: inconsistent number of observations!" ) return #get network input size nfeat = state.shape[0] #network input size if validation is None: #manually set validation data to False validation = np.full(state.shape[0], False) #partition out validation patients from dataset intrain = ~validation nobv = np.sum(intrain) #exit if too few participated if nobv < 1: print("too few participants found for training") return #otherwise proceed with training data = state[:, intrain].reshape((nfeat, nobv)) #Left off here - need to pop off last layer in model and add random weight to target and prediction nets #return untrained autoencoder _, autoencoder = contrastivedivergence(self.prednet, data, maxepoch=0) print(autoencoder) #calculate initial mean squared error pred, _ = fwdprop(data, autoencoder) icost, _ = loss("mse", pred, data) print(icost) #train model _, autoencoder = contrastivedivergence(self.prednet, data, maxepoch=100) #calculate final mean squared error pred, _ = fwdprop(data, autoencoder) cost, _ = loss("mse", pred, data) #print(autoencoder) print(icost) print(cost) #reinit the target network(s) #with the prediciton network #self.targetnet = copy_ffn(self.prednet) self.targetnet1 = copy_ffn(self.prednet) self.targetnet2 = copy_ffn(self.prednet) self.targetnet3 = copy_ffn(self.prednet) self.targetnet4 = copy_ffn(self.prednet)
def test_solve_toruscases_bce(): """test toruscases can be solved """ import numpy as np from crpm.setup_toruscases import setup_toruscases from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.gradientdecent import gradientdecent from crpm.analyzebinaryclassifier import analyzebinaryclassifier #init numpy seed np.random.seed(40017) #setup model model, data = setup_toruscases() nx = data.shape[0] nsample = data.shape[1] #partition training and validation data valid = data[1:data.shape[0], 0:nsample // 3] validtargets = data[0, 0:nsample // 3] train = data[1:data.shape[0], nsample // 3:nsample] targets = data[0, nsample // 3:nsample] #calculate initial binary cross entropy error pred, _ = fwdprop(train, model) icost, _ = loss("bce", pred, targets) #analyze binary classifier pred, _ = fwdprop(valid, model) roc, ireport = analyzebinaryclassifier(pred, validtargets) if ireport["AreaUnderCurve"] < .5: pred = 1 - pred icost, _ = loss("bce", pred, validtargets) roc, ireport = analyzebinaryclassifier(pred, validtargets) print(ireport) #plotroc(roc) #train model pred, cost, _ = gradientdecent(model, train, targets, "bce", valid, validtargets, earlystop=True) #analyze binary classifier pred, _ = fwdprop(valid, model) roc, report = analyzebinaryclassifier(pred, validtargets) if report["AreaUnderCurve"] < .5: pred = 1 - pred cost, _ = loss("bce", pred, validtargets) roc, report = analyzebinaryclassifier(pred, validtargets) print(report) #plotroc(roc) #print(model) print(icost) print(cost) assert icost > cost assert cost < .4 assert report["MatthewsCorrCoef"] > .1
def gan(generator, discriminator, data, maxepoch=500, batchsize=10, finetune=6): """ Trains generative adversarial network by semi gradientdecent. Args: data: training data with features in rows and observations in columns generator: ffn model with number of nodes in output layer equal to the number of features in the training data. discriminator: ffn model with sigle node logistic in the output layer and number of nodes in the input layer equal to the number of features in the training data. maxepoch: optional maximum number of training steps. batchsize: optional size of minibatch for SGD training. finetune: tuning parameter that scales inversely with learning step. Returns: cost: discriminator final binary cross entropy error """ import numpy as np from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.backprop import backprop from crpm.dynamics import computeforces from crpm.dynamics import maxforce from crpm.ffn_bodyplan import copy_ffn #get data dimensions nfeat = data.shape[0] nobv = data.shape[1] # ----- check input ----- def isnotpositiveint(var): """ will return true if var is not a positive integer""" if not isinstance(var, int): return True if var <= 0: return True return False #check discriminator has logistic output if discriminator[-1]["activation"] != "logistic": print("Warning: discriminator should have logistic output.") return None #check discriminator has single node output if discriminator[-1]["n"] != 1: print("Warning: discriminator should output a single number.") return None #check generator outputs a value for all features in the training data if generator[-1]["n"] != nfeat: print("Warning: number of nodes in generator ouptut layer should be " + "equal to number of rows in data.") return None #check generator has linear or logistic input if (generator[0]["activation"] != "linear" and generator[0]["activation"] != "logistic"): print("Warning: generator must have linear or logistic input.") return None #check discriminator penultimate layer has same size and activation as generator input layer if (discriminator[-2]["activation"] != generator[0]["activation"] or discriminator[-2]["n"] != generator[0]["n"]): print( "Warning: discriminator penultimate layer must match generator input layer." ) return None #check for positive number of training steps if isnotpositiveint(maxepoch): #throw error msg and return nothing print("Warning maxepoch is not a positive integer!") return None #check for positive number of training steps if isnotpositiveint(batchsize): #throw error msg and return nothing print("Warning batchsize is not a positive integer!") return None #-- Start GAN training--- #save only 5k-10k points nadj = 1 delay = 0 maxpts = 5000 if (maxepoch > maxpts): nadj = maxepoch // maxpts delay = maxepoch % maxpts else: maxpts = maxepoch #init ganerr record for discriminator bce, generator bce, encoder mse, and epoch ganerr = np.empty((maxpts, 4)) #init best disc and gen models #best_discriminator = copy_ffn(discriminator) #best_generator = copy_ffn(generator) #besterr = None #correct minibatch size if larger than number of observations in data minibatch = min(batchsize, nobv) #learning rate regulator alpha_norm = 10**(-finetune) #get number of generator encoding nodes ncode = generator[0]["n"] ##select initial 1/2 batch from data #sel = np.random.choice(nobv, size=minibatch, replace=False) ##sample initial 1/2 batch of noise #noise = np.random.rand(ncode, minibatch) #loop over epochs for epoch in range(maxepoch): #select mini batch from data sel = np.random.choice(nobv, size=minibatch, replace=False) #sample mini batch of noise if (generator[0]["activation"] == "linear"): #sample gaussian distribution noise = np.random.randn(ncode, minibatch) if (generator[0]["activation"] == "logistic"): #sample uniform distribution noise = np.random.rand(ncode, minibatch) # - - Train discriminator to detect real data: # increase TPR (decr T1err) #compute forces on discriminator pred, discstate = fwdprop(data[:, sel], discriminator) derr, dloss = loss("bce", pred, np.repeat(1, minibatch)) forces, _ = backprop(discriminator, discstate, dloss) #normalize learning rate alpha based on current forces alpha = alpha_norm * maxforce(discriminator, forces) #update discriminator weights and biases for layer in forces: index = layer["layer"] discriminator[index]["weight"] = (discriminator[index]["weight"] + alpha * layer["fweight"]) discriminator[index]["bias"] = (discriminator[index]["bias"] + alpha * layer["fbias"]) # - - Train generator to reproduce discriminator latent representation: # autoencoding to increase FNR? (incr T2err?) # should improve mode collapse #fwd prop encoder(discriminator upto penultimate layer) state latent, encstate = fwdprop(data[:, sel], discriminator[:-1]) #fwd prop decoder(generator) state recon, genstate = fwdprop(latent, generator) #compute autoencoder reconstruction error autoerr, dloss = loss("mse", recon, data[:, sel]) #compute forces on decoder(generator) forces, _ = backprop(generator, genstate, dloss) #normalize learning rate alpha based on current forces alpha = alpha_norm * maxforce(generator, forces) #update decoder weights and biases for layer in forces: index = layer["layer"] generator[index]["weight"] = (generator[index]["weight"] + alpha * layer["fweight"]) generator[index]["bias"] = (generator[index]["bias"] + alpha * layer["fbias"]) # - - Train discriminator to detect fake data: # increase TNR (decr T2err) # generate fake data fake, genstate = fwdprop(noise, generator) #compute forces on discriminator pred, discstate = fwdprop(fake, discriminator) derr, dloss = loss("bce", pred, np.repeat(0, minibatch)) forces, _ = backprop(discriminator, discstate, dloss) #normalize learning rate alpha based on current forces alpha = alpha_norm * maxforce(discriminator, forces) #update discriminator weights and biases for layer in forces: index = layer["layer"] discriminator[index]["weight"] = (discriminator[index]["weight"] + alpha * layer["fweight"]) discriminator[index]["bias"] = (discriminator[index]["bias"] + alpha * layer["fbias"]) # - - Train generator to fool discriminator: # increase FPR (incr T1err) # compute discriminator state due to fake data pred, discstate = fwdprop(fake, discriminator) # calculate derivative of missclassification error gerr, dloss = loss("bce", pred, np.repeat(1, minibatch)) # back prop gradient on generator coming from disccr missclassification _, dact = backprop(discriminator, discstate, dloss) # get forces on generator forces, _ = backprop(generator, genstate, dact) # normalize learning rate alpha based on current forces alpha = alpha_norm * maxforce(generator, forces) # update body wieghts and biases for layer in forces: index = layer["layer"] generator[index]["weight"] = (generator[index]["weight"] + alpha * layer["fweight"]) generator[index]["bias"] = (generator[index]["bias"] + alpha * layer["fbias"]) #save best autoencoding discriminator-generator pair #if besterr is None: # besterr = autoerr #if autoerr < besterr: # best_discriminator = copy_ffn(discriminator) # best_generator = copy_ffn(generator) #book keeping idx = epoch - delay if idx % nadj == 0 and idx >= 0: ganerr[idx // nadj, :] = [derr, gerr, autoerr, epoch] #Overwrite discriminator and generator #discriminator = copy_ffn(best_discriminator) #generator = copy_ffn(best_generator) return ganerr