def copy(self): """Copy FFN """ from crpm.ffn_bodyplan import copy_ffn #init new model using current model's bodyplan newmodel = FFN(self.bodyplan, std=self.weightstd, pre=self.pre, post=self.post) #copy bodies newmodel.body = copy_ffn(self.body) #return newmodel return newmodel
def update(self, state, action, reward, new_state, validation=None): # Train our model with new data self.train(state, action, reward, new_state, validation) # Periodically, shift the prediction network into the target network queue if self.iteration % self.target_every == 0: tempnet = copy_ffn(self.prednet) self.prednet = copy_ffn(self.targetnet1) self.targetnet1 = copy_ffn(self.targetnet2) self.targetnet2 = copy_ffn(self.targetnet3) self.targetnet3 = copy_ffn(self.targetnet4) self.targetnet4 = copy_ffn(tempnet) # Finally shift our exploration_rate toward zero (less gambling) self.exploration_rate *= self.exploration_rate_decay #increment iteration counter self.iteration += 1
def r_test_spectra2(): """test spectra2 can be encoded and generated """ import numpy as np from crpm.setup_spectra2 import setup_spectra2 from crpm.dynamics import computecost from crpm.analyzebinaryclassifier import analyzebinaryclassifier #from crpm.lossfunctions import loss #from crpm.analyzebinaryclassifier import plotroc from crpm.gradientdecent import gradientdecent from crpm.contrastivedivergence import contrastivedivergence #from crpm.ffn import FFN from crpm.ffn_bodyplan import stack_new_layer from crpm.ffn_bodyplan import copy_ffn from crpm.fwdprop import fwdprop from crpm.backprop import backprop #from crpm.dynamics import computeforces #from crpm.dynamics import maxforce from crpm.gan import gan #import matplotlib #matplotlib.use('TkAgg') #import matplotlib.pyplot as plt #init numpy seed np.random.seed(40017) #setup model prototype, data = setup_spectra2() #get prototype depth nlayer = len(prototype) #get data dimensions nfeat = data.shape[0] nobv = data.shape[1] #zscore data tdata = np.divide(data - np.mean(data, axis=1, keepdims=True), np.std(data, axis=1, keepdims=True)) #transform features into boltzmann like probs #tdata = np.exp(-data) #partfunc = np.sum(tdata, axis=1, keepdims = True) #tdata = np.divide(tdata,partfunc) #normalize #tdata = np.divide(tdata, np.max(tdata, axis=1, keepdims=True))#scale features by maxintensity #plt.plot(data[:,0]) #plt.show() #plt.plot(tdata[:,0]) #plt.show() #data = tdata #partition data (labels on first row) ntrain = 2 * nobv // 3 target = data[0, :ntrain] train = data[1:, :ntrain] vtarget = data[0, ntrain:] valid = data[1:, ntrain:] #return untrained autoencoder _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0) #calculate initial reconstruction error pred, ireconerr = computecost(autoencoder, valid, valid, "mse") print("init recon error = " + str(ireconerr)) ##train prototype #_, autoencoder = contrastivedivergence(prototype, train, # ncd=2, # batchsize=50, # nadj=10, # maxepoch=100, # momentum=0.1) #train prototype _, autoencoder = contrastivedivergence(prototype, train, validata=valid, ncd=1, batchsize=50, nadj=10, maxepoch=100, momentum=0.0) #calculate final reconstruction error pred, reconerr = computecost(autoencoder, valid, valid, "mse") print("pretrained recon error = " + str(reconerr)) #assert learning is taking place by reduced recon error. assert ireconerr > reconerr # ----- Discriminator ----- #create discriminator discriminator = copy_ffn(autoencoder[0:len(prototype)]) discriminator = stack_new_layer(discriminator, n=1, activation="logistic") #analyze trained binary classifier pred, icost = computecost(discriminator, valid, vtarget, "bce") roc, ireport = analyzebinaryclassifier(pred, vtarget) if ireport["AreaUnderCurve"] < .5: #flip labels pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce") roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget) print(ireport) #plotroc(roc) #train discriminator pred, cost, _ = gradientdecent(discriminator, train, target, "bce", valid, vtarget, earlystop=True, finetune=6) #analyze trained binary classifier pred, cost = computecost(discriminator, valid, vtarget, "bce") roc, report = analyzebinaryclassifier(pred, vtarget) if report["AreaUnderCurve"] < .5: #flip labels pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce") roc, report = analyzebinaryclassifier(pred, 1 - vtarget) print(report) #plotroc(roc) #assert discriminator can be trained by binary cross entropy error assert icost > cost #assert discriminator has potential to iden two calsses assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"] #assert report["AreaUnderCurve"] > .6 # ----- generator ----- #create generator from decoder generator = copy_ffn(autoencoder[len(prototype):len(autoencoder)]) #adjust regularization for layer in generator: layer["regval"] = 0 #.00001 #correct label idecies idx = 0 for layer in generator: generator[idx]["layer"] = idx idx += 1 #generate fake samples nfake = 600 ncode = generator[0]["n"] fake, _ = fwdprop(np.random.rand(ncode, nfake), generator) #calculate initial reconstruction error pred, fkreconerr = computecost(autoencoder, fake, fake, "mse") print("init fake recon error = " + str(fkreconerr)) #assert fake data recon error is better than untrained recon error assert fkreconerr < ireconerr #-- Start GAN training--- ganerr = gan(generator, discriminator, train, maxepoch=20000, batchsize=50, finetune=6.3) #assert generator fools discriminator at least some of the time bce<80%. assert ganerr[-1, 1] < .8 #def moving_average(a, n=3) : # ret = np.cumsum(a, dtype=float) # ret[n:] = ret[n:] - ret[:-n] # return ret[n - 1:] / n #fig = plt.figure() #plt.plot(ganerr[:, 0], ganerr[:, 1]) #plt.plot(moving_average(ganerr[:, 0], n=20), moving_average(ganerr[:, 1], n=20)) #plt.plot(ganerr[0, 0], ganerr[0, 1], marker="D", color="green", markersize=10) #plt.plot(ganerr[-1, 0], ganerr[-1, 1], marker="8", color="red", markersize=10) #plt.xlabel("discriminator error") #plt.ylabel("generator error") #plt.show() #print("final report") #print(report) #plotroc(roc) assert False
def test_afnetwork(): """test AF network patients can be encoded and generated """ #import matplotlib #matplotlib.use('TkAgg') #import matplotlib.pyplot as plt #import matplotlib.patches as mpatches import numpy as np from crpm.setup_afmodel import setup_afmodel from crpm.dynamics import computecost from crpm.analyzebinaryclassifier import analyzebinaryclassifier #from crpm.lossfunctions import loss from crpm.analyzebinaryclassifier import plotroc from crpm.gradientdecent import gradientdecent from crpm.contrastivedivergence import contrastivedivergence #from crpm.ffn import FFN from crpm.ffn_bodyplan import stack_new_layer from crpm.ffn_bodyplan import copy_ffn from crpm.fwdprop import fwdprop #from crpm.backprop import backprop #from crpm.dynamics import computeforces #from crpm.dynamics import maxforce from crpm.gan import gan #init numpy seed np.random.seed(40017) #setup model prototype, train, target, valid, vtarget = setup_afmodel() #trim data #maxobv = 150 #train = train[:,:maxobv] #valid = valid[:,:maxobv] #target = target[:maxobv] #vtarget = vtarget[:maxobv] #get prototype depth nlayer = len(prototype) #get data dimensions nfeat = train.shape[0] nobv = train.shape[1] #return untrained autoencoder _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0) # ----- Discriminator ----- #create discriminator discriminator = copy_ffn(autoencoder[0:len(prototype)]) discriminator = stack_new_layer(discriminator, n=1, activation="logistic") print("analyze untrained discriminator to iden subtype") pred, icost = computecost(discriminator, valid, vtarget, "bce") roc, ireport = analyzebinaryclassifier(pred, vtarget) if ireport["AreaUnderCurve"] < .5: #flip labels pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce") roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget) print(ireport) #plotroc(roc) #train discriminator pred, cost, _ = gradientdecent(discriminator, train, target, "bce", valid, vtarget, earlystop=True, finetune=7) print("analyze trained discriminator to iden subtype") pred, cost = computecost(discriminator, valid, vtarget, "bce") roc, report = analyzebinaryclassifier(pred, vtarget) if report["AreaUnderCurve"] < .5: #flip labels pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce") roc, report = analyzebinaryclassifier(pred, 1 - vtarget) print(report) #plotroc(roc) #assert discriminator can be trained by binary cross entropy error #assert icost > cost #assert discriminator has potential to iden two classes #assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"] #assert report["AreaUnderCurve"] > .55 # ----- GENERATOR ----- #create generator from decoder generator = copy_ffn(autoencoder[len(prototype) - 1:len(autoencoder)]) #correct label idecies idx = 0 for layer in generator: generator[idx]["layer"] = idx idx += 1 #assert False #-- Main GAN training--- #ganerr = gan(generator, discriminator, train, # maxepoch=100000, batchsize=1, finetune=6) ganerr = gan(generator, discriminator, train, maxepoch=100000, batchsize=1, finetune=6) #def moving_average(a, n=3) : # ret = np.cumsum(a, dtype=float) # ret[n:] = ret[n:] - ret[:-n] # return ret[n - 1:] / n #ganerr[:,2] = np.log(ganerr[:,2]) #plot density error on logscale #discerrbar = moving_average(ganerr[:, 0], n=20) #generrbar = moving_average(ganerr[:, 1], n=20) #autoerrbar = moving_average(ganerr[:, 2], n=20) #assert generator fools discriminator at least some of the time bce<65%. print(ganerr[-1, 1]) assert ganerr[-1, 1] < .65 #fig = plt.figure() #plt.plot(ganerr[:, 0], ganerr[:, 1]) #plt.plot(discerrbar, generrbar) #plt.plot(discerrbar[0], generrbar[0], marker="D", color="green", markersize=10) #plt.plot(discerrbar[-1], generrbar[-1], marker="8", color="red", markersize=10) #plt.xlabel("discriminator error") #plt.ylabel("generator error") #plt.show() #fig = plt.figure() #plt.plot(ganerr[:, 0], ganerr[:, 2]) #plt.plot(discerrbar, autoerrbar) #plt.plot(discerrbar[0], autoerrbar[0], marker="D", color="green", markersize=10) #plt.plot(discerrbar[-1], autoerrbar[-1], marker="8", color="red", markersize=10) #plt.xlabel("discriminator error") #plt.ylabel("encoder error") #plt.show() #generate fake data for every training sample nsample = train.shape[1] fake, _ = fwdprop(np.random.rand(generator[0]["n"], nsample), generator) #merge training and fake data gandata = np.hstack((train, fake)) ganlabels = np.hstack((np.repeat(1, nsample), np.repeat(0, nsample))) print("analyze trained discriminator on fake vs training set") pred, cost = computecost(discriminator, gandata, ganlabels, "bce") roc, report = analyzebinaryclassifier(pred, ganlabels) if report["AreaUnderCurve"] < .5: #flip labels pred, cost = computecost(discriminator, gandata, ganlabels, "bce") roc, report = analyzebinaryclassifier(pred, 1 - ganlabels) print(report) #plotroc(roc) #gen fake data for every validation sample nsample = valid.shape[1] fake, _ = fwdprop(np.random.rand(generator[0]["n"], nsample), generator) #merge validation and fake data gandata = np.hstack((valid, fake)) ganlabels = np.hstack((np.repeat(1, nsample), np.repeat(0, nsample))) print("analyze trained discriminator on fake vs vaidation set") pred, costv = computecost(discriminator, gandata, ganlabels, "bce") roc, reportv = analyzebinaryclassifier(pred, ganlabels) if reportv["AreaUnderCurve"] < .5: #flip labels pred, costv = computecost(discriminator, gandata, 1 - ganlabels, "bce") roc, reportv = analyzebinaryclassifier(pred, 1 - ganlabels) print(reportv) #plotroc(roc) #assert discriminator has poor potential to iden fake data assert reportv["AreaUnderCurve"] < .55 #get fake data the discriminator thinks is real pred, _ = fwdprop(fake, discriminator) spoof = fake[:, pred[0, :] > report["OptimalThreshold"]]
def langevindynamics(model, data, targets, lossname, validata=None, valitargets=None, maxepoch=int(1E6), maxbuffer=int(1E3), finetune=6): """train fnn model by langevin dynamics Args: model: data: targets: lossname: validata: data used to calculate out-sample error valitargets: targets used to calculate out-sample error maxiteration: hard limit of learning iterations default is 10000 Returns: final predictions and cost. Training will modify model. """ import numpy as np import copy from crpm.dynamics import setupdynamics #from crpm.dynamics import normalizelearningrate from crpm.dynamics import computecost from crpm.dynamics import computeforces from crpm.dynamics import maxforce from crpm.ffn_bodyplan import copy_ffn from crpm.pvalue import righttailpvalue #convergence test constants #alpha_norm = 5E-5 #scales learning rate by max force relative to weight alpha_norm = 10**(-finetune) nbuffer = 500 #maxslope = -1E-6 #max learning slope should be negative but close to zero #buffer time grid #tgrid = np.array(range(nbuffer)) #tsum = np.sum(tgrid) #tvar = nbuffer*np.sum(np.multiply(tgrid, tgrid))-tsum*tsum #langevin hyper parameters #eta = 5E-1 #ideal fraction of unexplained variance in costbuffer #downgamma = 0.95 #fraction by which friction is decreased #upgamma = 1.05 #fraction by which friction is decreased downtemp = 0.95 #fraction by which temperature is decreased uptemp = 1.05 #fraction by which temperature is increased #init lagevin parameters gamma = 5E-2 #viscosity or friction invbeta = 1E-6 #temperature ~ 1/beta #setup dynamics forces = setupdynamics(model, data, targets, lossname) #check if using validation set is_validating = not ((validata is None) or (valitargets is None)) #define out-sample error calculator def out_sample_error(): if is_validating: pred, cost = computecost(model, validata, valitargets, lossname) else: pred, cost = computecost(model, data, targets, lossname) return pred, cost #calculate out-sample error _, cost = out_sample_error() #init best error and model best_cost = copy.copy(cost) best_model = copy_ffn(model) #init cost history costhistory = np.full(maxbuffer, cost) #iterate training until: # 1) cost diverges - defined true when cost > 1E16 # or # 2) too many iterations - hardcoded to ensure loop exit epoch = 0 window = 0 continuelearning = True while continuelearning: ##clear cost buffer #costbuffer = [] #save cost at begining of buffer init_cost = copy.copy(cost) #normalize learning rate alpha based on current forces #alpha = normalizelearningrate(model, forces, alpha_norm) alpha = alpha_norm * maxforce(model, forces) #calculate langevin dynamics factors timestep = np.sqrt(2 * alpha) halftimestep = timestep / 2 littled = np.exp(-gamma * timestep) littleq = (1 - littled) / gamma sigma = np.sqrt(invbeta * (1 - gamma * gamma)) #loop for training steps in buffer #for i in tgrid: for i in range(nbuffer): #update current learning step epoch += 1 #update model postions by half step for layer in forces: index = layer["layer"] model[index]["weight"] = ( model[index]["weight"] + halftimestep * model[index]["weightdot"]) model[index]["bias"] = (model[index]["bias"] + halftimestep * model[index]["biasdot"]) #compute forces forces = computeforces(model, data, targets, lossname) #update model momenta by whole step for layer in forces: index = layer["layer"] ncurr = model[index]["n"] nprev = model[index - 1]["n"] model[index]["weightdot"] = ( littled * model[index]["weightdot"] + littleq * layer["fweight"] + sigma * np.random.randn(ncurr, nprev)) model[index]["biasdot"] = (littled * model[index]["biasdot"] + littleq * layer["fbias"] + sigma * np.random.randn(ncurr, 1)) #update model postions by second half-step for layer in forces: index = layer["layer"] model[index]["weight"] = ( model[index]["weight"] + halftimestep * model[index]["weightdot"]) model[index]["bias"] = (model[index]["bias"] + halftimestep * model[index]["biasdot"]) ##record cost at full step #costbuffer.append(computecost(model, data, targets, lossname)) #calculate out-sample error _, cost = out_sample_error() #increment window counter and save out sample error in cost history window += 1 costhistory[window % maxbuffer] = copy.copy(cost) #Record best error and save model if cost <= best_cost: best_cost = copy.copy(cost) best_model = copy_ffn(model) #linear regression and goodness of fit measures in buffer #ysum = np.sum(costbuffer) # sum of costbuffer #in-sample error slope #slope = (nbuffer*np.sum(np.multiply(tgrid, costbuffer))-tsum*ysum)/tvar #ntercept = (ysum-slope*tsum)/nbuffer #in-sample error y-intercept #residuals = np.subtract(costbuffer,(slope*tgrid+intercept)) #fit error ##explained error sum of squares times nbuffer #sserr = nbuffer*np.sum(np.multiply(residuals,residuals)) ##total error sum of squares times nbuffer #sstot = nbuffer*np.sum(np.multiply(costbuffer, costbuffer))-ysum*ysum #fvu = sserr/sstot #fraction of variance unexplained out_slope = (cost - init_cost) #/nbuffer #out-sample(validation) slope #Thermostat #if out_slope is negative #then decrease temperature #else increase temperature with probability p_out #where p_out is the proportion of out sample error historical values #that are greater than the current out sample error #in other words p_out is the right-tailed p_value of the out sample error. if out_slope < 0: invbeta *= downtemp #print(" ") #print("- temp "+str(invbeta)) else: pvalue = righttailpvalue(np.array([cost]), costhistory) #print(" ") #print("pvalue = "+str(pvalue)) if np.random.random() <= pvalue: #print("+ temp "+str(invbeta)) invbeta *= uptemp #Viscostat #if fraction of unexplained variance is < eta #then decrease friction #else increase friction #where hyperparameter eta should be close to 0 #if fvu < eta: # gamma *= .95 #else: # gamma *= 1.05 #if window%10==0: # keng = 0 # for layer in model[1:]: # keng += np.sum(np.multiply(layer["weightdot"],layer["weightdot"])) # print("temp = "+str(invbeta)+" KE = "+str(keng)+" <cost> = " # +str(np.mean(costhistory))+" cost = "+str(cost)+ # " best cost = "+str(best_cost)) # - EXIT CONDITIONS - #exit if learning is taking too long if epoch > maxepoch: print( "Warning langevindynamics.py: Training is taking a long time!" + " - Try increaseing maxepoch - Training will end") continuelearning = False #exit if cost has diverged if cost > 1E16: print( "Warning langevindynamics.py: diverging cost function " + "- try lowering learning rate or inc regularization constant" + " - training will end.") continuelearning = False #model = copy_ffn(best_model) #return best model model = copy_ffn(best_model) #return predictions and cost return out_sample_error()
def pretrain(self, state, validation=None): """ will pretrain deep network model by contrastive divergence """ #make sure input all have the same number of observations nobv = state.shape[1] failcheck = False if validation is not None and validation.shape[0] != nobv: failcheck = True if failcheck: print( "runtime error in pretrain: inconsistent number of observations!" ) return #get network input size nfeat = state.shape[0] #network input size if validation is None: #manually set validation data to False validation = np.full(state.shape[0], False) #partition out validation patients from dataset intrain = ~validation nobv = np.sum(intrain) #exit if too few participated if nobv < 1: print("too few participants found for training") return #otherwise proceed with training data = state[:, intrain].reshape((nfeat, nobv)) #Left off here - need to pop off last layer in model and add random weight to target and prediction nets #return untrained autoencoder _, autoencoder = contrastivedivergence(self.prednet, data, maxepoch=0) print(autoencoder) #calculate initial mean squared error pred, _ = fwdprop(data, autoencoder) icost, _ = loss("mse", pred, data) print(icost) #train model _, autoencoder = contrastivedivergence(self.prednet, data, maxepoch=100) #calculate final mean squared error pred, _ = fwdprop(data, autoencoder) cost, _ = loss("mse", pred, data) #print(autoencoder) print(icost) print(cost) #reinit the target network(s) #with the prediciton network #self.targetnet = copy_ffn(self.prednet) self.targetnet1 = copy_ffn(self.prednet) self.targetnet2 = copy_ffn(self.prednet) self.targetnet3 = copy_ffn(self.prednet) self.targetnet4 = copy_ffn(self.prednet)
def gradientdecent(model, data, targets, lossname, validata=None, valitargets=None, maxepoch=1E6, earlystop=False, healforces=True, finetune=6): """train fnn model by gradient decent Args: model: FFN object or as the body in FFN class data: training data with features in columns and observation in rows targets: labels with targets in columns and observation in rows lossname: loss function string defined in crmp.lossfunctions validata: data used to calculate out-sample error valitargets: targets used to calculate out-sample error maxiteration: hard limit of learning iterations default is 10000 Returns: final predictions and cost along with exit condition. Exit conditions are 0) learning converged, 1) learning not converged, 2) learning was stopped early, and -1) learning diverged. Training will modify model. """ import numpy as np from crpm.dynamics import setupdynamics #from crpm.dynamics import normalizelearningrate from crpm.dynamics import computecost from crpm.dynamics import computeforces from crpm.dynamics import maxforce from crpm.ffn_bodyplan import copy_ffn from crpm.ffn import FFN #convergence test constants #alpha norm scales learning rate by max force relative to weight alpha_norm = 10**(-finetune) #alpha_norm = 1E-8#7#5E-6 #alpha_norm = 1E-7#5 #scales learning rate by max force relative to weight nbuffer = 500 maxslope = -1E-6 #max learning slope should be negative but close to zero tgrid = np.array(range(nbuffer)) tsum = np.sum(tgrid) tvar = nbuffer * np.sum(np.multiply(tgrid, tgrid)) - tsum * tsum #setup dynamics if requested (allows for reinit to heal bad forces) if healforces: forces = setupdynamics(model, data, targets, lossname) else: forces = computeforces(model, data, targets, lossname) #check if using validation set is_validating = not ((validata is None) or (valitargets is None)) #define out-sample error calculator def out_sample_error(): if is_validating: pred, cost = computecost(model, validata, valitargets, lossname) else: pred, cost = computecost(model, data, targets, lossname) return pred, cost #calculate out-sample error _, cost = out_sample_error() #init best error and model best_cost = np.copy(cost) if isinstance(model, FFN): best_model = model.copy() else: best_model = copy_ffn(model) #iterate training until: # 1) cost converges - defined as when slope of costbuffer is greater than to -1e-6 # or # 2) out-sample error increases # or # 3) cost diverges - defined true when cost > 1E16 # or # 4) too many iterations - hardcoded to ensure loop exit continuelearning = True #Do not do any learning if maxepoch is not a positive integer if maxepoch < 1: continuelearning = False count = 0 exitcond = 0 while continuelearning: #clear cost buffer costbuffer = [] #normalize learning rate alpha based on current forces alpha = alpha_norm * maxforce(model, forces) #alpha = normalizelearningrate(model, forces, alpha_norm) #loop for training steps in buffer for i in tgrid: #update current learning step count += 1 #update body wieghts and biases body = model if isinstance(model, FFN): body = model.body #loop over layer for layer in forces: index = layer["layer"] body[index]["weight"] = body[index][ "weight"] + alpha * layer["fweight"] body[index][ "bias"] = body[index]["bias"] + alpha * layer["fbias"] #compute forces forces = computeforces(model, data, targets, lossname) #record cost _, cost = computecost(model, data, targets, lossname) costbuffer.append(cost) #calculate cost slope to check for convergence slope = nbuffer * np.sum(np.multiply( tgrid, costbuffer)) - tsum * np.sum(costbuffer) slope = slope / tvar #calculate out-sample error _, cost = out_sample_error() #Record best error and save model if cost <= best_cost: best_cost = np.copy(cost) if isinstance(model, FFN): best_model = model.copy() else: best_model = copy_ffn(model) # - EXIT CONDITIONS - #exit if learning is taking too long if count > int(maxepoch): print( "Warning gradientdecent.py: Training is taking a long time!" + " - Try increaseing maxepoch - Training will end") exitcond = 1 continuelearning = False #exit if learning has plateaued if slope > maxslope: exitcond = 0 continuelearning = False #exit if early stopping and error has risen if earlystop and cost > best_cost: print("early stopping") exitcond = 2 continuelearning = False #exit if cost has diverged if cost > 1E16: print( "Warning gradientdecent.py: diverging cost function " + "- try lowering learning rate or inc regularization constant " + "- training will end.") exitcond = -1 continuelearning = False #return best model if isinstance(model, FFN): best_model = model.copy() else: best_model = copy_ffn(model) #return predictions and cost return (*out_sample_error(), exitcond)