def train(self,X,R,Q,Y,gamma=1, nIterations=100,weightPrecision=0,errorPrecision=0,verbose=False): X = self._standardizeX(X) def objectiveF(w): self._unpack(w) Y,_ = self._forward_pass(X) return 0.5 *np.mean((R+gamma*Q-Y)**2) def gradF(w): self._unpack(w) Y,Z = self._forward_pass(X) nSamples = X.shape[0] delta = -(R + gamma * Q - Y) / nSamples dVs,dW = self._backward_pass(delta,Z) return self._pack(dVs,dW) scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self, X, T, nIterations=100, verbose=False, weightPrecision=0, errorPrecision=0): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self._standardizeX(X) if T.ndim == 1: T = T.reshape((-1, 1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self._standardizeT(T) # Local functions used by scg() def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) return 0.5 * np.mean((Y - T)**2) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) delta = (Y - T) / (X.shape[0] * T.shape[1]) dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, verbose=verbose, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt( scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True return self
def train(self, X, T, nIterations=100, weightPrecision=0, errorPrecision=0, verbose=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self._standardizeX(X) self.classes, counts = np.unique(T, return_counts=True) self.mostCommonClass = self.classes[np.argmax(counts)] # to break ties if self.no != len(self.classes): raise ValueError( " In NeuralNetworkClassifier, the number of outputs must equal\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()." % (self.no, len(self.classes))) T = makeIndicatorVars(T) # Local functions used by gradientDescent.scg() def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) Y = self._multinomialize(Y) Y[Y == 0] = sys.float_info.epsilon return -np.mean(T * np.log(Y)) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) Y = self._multinomialize(Y) delta = (Y - T) / (X.shape[0] * (T.shape[1])) dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) - 1 self.trained = True return self
def train(self, X, T, nIterations=100, weightPrecision=0, errorPrecision=0, verbose=False): self.timings = [] if not isinstance(self.layers[0], ConvolutionalLayer): if X.shape[1] != self.layers[0].nInputs: print( 'Number of columns in X plus 1 ({}) does not equal the number of declared network inputs ({}).' .format(X.shape[1], self.layers[0].nInputs)) return #sys.exit(1) if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) # self.Xconstant = (self.Xstds == 0).reshape((1,-1)) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self._standardizeX(X) else: pass #print("TO-DO: Add standardization code to ConvolutionalLayer") T = self._preProcessTargets(T) if isinstance(self.layers[0], ConvolutionalLayer): self.layers[0].updateConvolution = True scgresult = scg.scg(self._pack([layer.W for layer in self.layers]), self._objectiveF, self._gradF, X, T, evalFunc=self._scg_verbose_eval(), xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, iterationVariable=self.iteration, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace += scgresult['ftrace'].tolist() self.numberOfIterations = len(self.errorTrace) - 1 # self.trained.value = True self.trained = True return self
def train(self, X, T, nIterations=100, weightPrecision=0, errorPrecision=0, verbose=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self._standardizeX(X) if T.ndim == 1: T = T.reshape((-1, 1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) T = self._standardizeT(T) # Local functions used by gradientDescent.scg() def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) return 0.5 * np.mean((Y - T)**2) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) delta = (Y - T) / (X.shape[0] * T.shape[1]) dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, iterationVariable=self.iteration, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) - 1 self.trained.value = True return self
def train(self, X, T, nIterations=100, weightPrecision=0, errorPrecision=0, verbose=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self._standardizeX(X) self.classes = np.unique(T) if self.no != len(self.classes) - 1: raise ValueError( " In NeuralNetworkClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()." % (self.no, len(self.classes))) T = ml.makeIndicatorVars(T) # Local functions used by gradientDescent.scg() def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) Y = self._multinomialize(Y) return -np.mean(T * np.log(Y)) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) Y = self._multinomialize(Y) delta = (Y[:, :-1] - T[:, :-1]) / (X.shape[0] * (T.shape[1] - 1)) dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, iterationVariable=self.iteration, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) - 1 self.trained.value = True return self
def train(self,X,T,nIterations=100,verbose=False, weightPrecision=0,errorPrecision=0): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self._standardizeX(X) if T.ndim == 1: T = T.reshape((-1,1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self._standardizeT(T) # Local functions used by scg() def objectiveF(w): self._unpack(w) Y,_ = self._forward_pass(X) return 0.5 * np.mean((Y - T)**2) def gradF(w): self._unpack(w) Y,Z = self._forward_pass(X) delta = (Y - T) / (X.shape[0] * T.shape[1]) dVs,dW = self._backward_pass(delta,Z) return self._pack(dVs,dW) scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, verbose=verbose, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt(scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True return self
def train(self,X,T, nIterations=100,weightPrecision=0,errorPrecision=0,verbose=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self._standardizeX(X) self.classes, counts = np.unique(T,return_counts=True) self.mostCommonClass = self.classes[np.argmax(counts)] # to break ties if self.no != len(self.classes)-1: raise ValueError(" In NeuralNetworkClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()." % (self.no, len(self.classes))) T = makeIndicatorVars(T) # Local functions used by gradientDescent.scg() def objectiveF(w): self._unpack(w) Y,_ = self._forward_pass(X) Y = self._multinomialize(Y) Y[Y==0] = sys.float_info.epsilon return -np.mean(T * np.log(Y)) def gradF(w): self._unpack(w) Y,Z = self._forward_pass(X) Y = self._multinomialize(Y) delta = (Y[:,:-1] - T[:,:-1]) / (X.shape[0] * (T.shape[1]-1)) dVs,dW = self._backward_pass(delta,Z) return self._pack(dVs,dW) scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) - 1 self.trained = True return self
def train(self,X,T, nIterations=100,weightPrecision=0,errorPrecision=0,verbose=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) X = self._standardizeX(X) if T.ndim == 1: T = T.reshape((-1,1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) T = self._standardizeT(T) # Local functions used by gradientDescent.scg() def objectiveF(w): self._unpack(w) Y,_ = self._forward_pass(X) return 0.5 * np.mean((Y - T)**2) def gradF(w): self._unpack(w) Y,Z = self._forward_pass(X) delta = (Y - T) / (X.shape[0] * T.shape[1]) dVs,dW = self._backward_pass(delta,Z) return self._pack(dVs,dW) scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, iterationVariable = self.iteration, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) - 1 self.trained.value = True return self
def train(self, X, R, Q, Y, gamma=1, nIterations=100, weightPrecision=0, errorPrecision=0, verbose=False): X = self._standardizeX(X) def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) return 0.5 * np.mean((R + gamma * Q - Y)**2) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) nSamples = X.shape[0] delta = -(R + gamma * Q - Y) / nSamples dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, iterationVariable=self.iteration, ftracep=True, verbose=verbose) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult['ftrace'] self.numberOfIterations = len(self.errorTrace) self.trained.value = True return self
def train(self,X,T,nIterations=100,verbose=False, weightPrecision=0,errorPrecision=0): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self.standardizeX(X) if T.ndim == 1: T = T.reshape((-1,1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self.standardizeT(T) # Local functions used by scg() def pack(Vs,W): return np.hstack([V.flat for V in Vs] + [W.flat]) def unpack(w): first = 0 numInThisLayer = self.ni for i in range(len(self.Vs)): self.Vs[i][:] = w[first:first+(numInThisLayer+1)*self.nhs[i]].reshape((numInThisLayer+1,self.nhs[i])) first += (numInThisLayer+1) * self.nhs[i] numInThisLayer = self.nhs[i] self.W[:] = w[first:].reshape((numInThisLayer+1,self.no)) def objectiveF(w): unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) # handling bias weight without adding column of 1's Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] return np.mean((T-Y)**2) def gradF(w): unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:]) Z.append(Zprev) Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:] delta = -(T - Y) / (X.shape[0] * T.shape[1]) dW = 2 * np.vstack((np.dot(np.ones((1,delta.shape[0])),delta), np.dot( Z[-1].T, delta))) dVs = [] delta = (1-Z[-1]**2) * np.dot( delta, self.W[1:,:].T) for Zi in range(len(self.nhs),0,-1): Vi = Zi - 1 # because X is first element of Z dV = 2 * np.vstack(( np.dot(np.ones((1,delta.shape[0])), delta), np.dot( Z[Zi-1].T, delta))) dVs.insert(0,dV) delta = np.dot( delta, self.Vs[Vi][1:,:].T) * (1-Z[Zi-1]**2) return pack(dVs,dW) scgresult = scg.scg(pack(self.Vs,self.W), objectiveF, gradF, xPrecision = weightPrecision, fPrecision = errorPrecision, nIterations = nIterations, verbose=verbose, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt(scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True return self
def train(self, X, T, nIterations=100, verbose=False, weightPrecision=0, errorPrecision=0, saveWeightsHistory=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self.standardizeX(X) if T.ndim == 1: T = T.reshape((-1, 1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self.standardizeT(T) def objectiveF(w): self.unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh( Zprev @ V[1:, :] + V[0:1, :] ) # handling bias weight without adding column of 1's Y = Zprev @ self.W[1:, :] + self.W[0:1, :] return np.mean((T - Y)**2) def gradF(w): self.unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(Zprev @ V[1:, :] + V[0:1, :]) Z.append(Zprev) Y = Zprev @ self.W[1:, :] + self.W[0:1, :] delta = -(T - Y) / (X.shape[0] * T.shape[1]) dW = 2 * np.vstack((np.ones( (1, delta.shape[0])) @ delta, Z[-1].T @ delta)) dVs = [] delta = (1 - Z[-1]**2) * (delta @ self.W[1:, :].T) for Zi in range(len(self.nhs), 0, -1): Vi = Zi - 1 # because X is first element of Z dV = 2 * np.vstack((np.ones( (1, delta.shape[0])) @ delta, Z[Zi - 1].T @ delta)) dVs.insert(0, dV) delta = (delta @ self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1]**2) return self.pack(dVs, dW) scgresult = scg.scg(self.pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, verbose=verbose, ftracep=True, xtracep=saveWeightsHistory) self.unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = scgresult[ 'ftrace'] # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True self.weightsHistory = scgresult[ 'xtrace'] if saveWeightsHistory else None return self
def train(self, X, T, nIterations=100, verbose=False, weightPrecision=0, errorPrecision=0): if type(T) == np.ndarray: ## regular np version. if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 if T.ndim == 1: T = T.reshape((-1, 1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self._standardizeT(T) # Local functions used by scg() def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) return 0.5 * np.mean((Y - T) ** 2) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) delta = (Y - T) / (X.shape[0] * T.shape[1]) dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, verbose=verbose, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt(scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True else: ## Tensor version #if self.Xmeans is None: #don't standardize? # self.Xmeans = X.mean().item() # self.Xstds = X.std(False).item() # self.Xconstant = self.Xstds == 0 # self.XstdsFixed = copy(self.Xstds) #X = self._standardizeX(X) if T.dim() == 1: T = T.reshape((-1, 1)) # if self.Tmeans is None: # self.Tmeans = T.mean().item() # self.Tstds = T.std(False).item() # self.Tconstant = self.Tstds == 0 # self.TstdsFixed = copy(self.Tstds) # self.TstdsFixed[self.Tconstant] = 1 # T = self._standardizeT(T) # haven't changed these yet def objectiveF(w): self._unpack(w) Y, _ = self._forward_pass(X) return 0.5 * np.mean((Y - T) ** 2) def gradF(w): self._unpack(w) Y, Z = self._forward_pass(X) delta = (Y - T) / (X.shape[0] * T.shape[1]) dVs, dW = self._backward_pass(delta, Z) return self._pack(dVs, dW) scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, verbose=verbose, ftracep=True) self._unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt(scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True return self def use(self, X, allOutputs=False): Xst = self._standardizeX(X) Y, Z = self._forward_pass(Xst) Y = self._unstandardizeT(Y) if Z is None: return (Y, None) if allOutputs else Y else: return (Y, Z[1:]) if allOutputs else Y def getNumberOfIterations(self): return self.numberOfIterations def getErrorTrace(self): return self.errorTrace def draw(self, inputNames=None, outputNames=None): ml.draw(self.Vs + [self.W], inputNames, outputNames) def _forward_pass(self, X): if self.nhs is None: # no hidden units, just linear output layer Y = np.dot(X, self.W[1:, :]) + self.W[0:1, :] Zs = [X] else: Zprev = X Zs = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev, V[1:, :]) + V[0:1, :]) Zs.append(Zprev) Y = np.dot(Zprev, self.W[1:, :]) + self.W[0:1, :] return Y, Zs def _backward_pass(self, delta, Z): if self.nhs is None: # no hidden units, just linear output layer dW = np.vstack((np.dot(np.ones((1, delta.shape[0])), delta), np.dot(Z[0].T, delta))) dVs = None else: dW = np.vstack((np.dot(np.ones((1, delta.shape[0])), delta), np.dot(Z[-1].T, delta))) dVs = [] delta = (1 - Z[-1] ** 2) * np.dot(delta, self.W[1:, :].T) for Zi in range(len(self.nhs), 0, -1): Vi = Zi - 1 # because X is first element of Z dV = np.vstack((np.dot(np.ones((1, delta.shape[0])), delta), np.dot(Z[Zi - 1].T, delta))) dVs.insert(0, dV) delta = np.dot(delta, self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1] ** 2) return dVs, dW def _standardizeX(self, X): result = (X - self.Xmeans) / self.XstdsFixed # result[:,self.Xconstant] = 0.0 return result def _unstandardizeX(self, Xs): return self.Xstds * Xs + self.Xmeans def _standardizeT(self, T): result = (T - self.Tmeans) / self.TstdsFixed # result[:,self.Tconstant] = 0.0 return result def _unstandardizeT(self, Ts): return self.Tstds * Ts + self.Tmeans def _pack(self, Vs, W): if Vs is None: return np.array(W.flat) else: return np.hstack([V.flat for V in Vs] + [W.flat]) def _unpack(self, w): if self.nhs is None: self.W[:] = w.reshape((self.ni + 1, self.no)) else: first = 0 numInThisLayer = self.ni for i in range(len(self.Vs)): self.Vs[i][:] = w[first:first + (numInThisLayer + 1) * self.nhs[i]].reshape( (numInThisLayer + 1, self.nhs[i])) first += (numInThisLayer + 1) * self.nhs[i] numInThisLayer = self.nhs[i] self.W[:] = w[first:].reshape((numInThisLayer + 1, self.no)) def __repr__(self): str = 'NeuralNetwork({}, {}, {})'.format(self.ni, self.nhs, self.no) # str += ' Standardization parameters' + (' not' if self.Xmeans == None else '') + ' calculated.' if self.trained: str += '\n Network was trained for {} iterations. Final error is {}.'.format(self.numberOfIterations, self.errorTrace[-1]) else: str += ' Network is not trained.' return str
def train(self, X, T, nIterations=100, verbose=False, weightPrecision=0, errorPrecision=0): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self.standardizeX(X) if T.ndim == 1: T = T.reshape((-1, 1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self.standardizeT(T) # Local functions used by scg() def pack(Vs, W): return np.hstack([V.flat for V in Vs] + [W.flat]) def unpack(w): first = 0 numInThisLayer = self.ni for i in range(len(self.Vs)): self.Vs[i][:] = w[first:first + (numInThisLayer + 1) * self.nhs[i]].reshape( (numInThisLayer + 1, self.nhs[i])) first += (numInThisLayer + 1) * self.nhs[i] numInThisLayer = self.nhs[i] self.W[:] = w[first:].reshape((numInThisLayer + 1, self.no)) def objectiveF(w): unpack(w) Zprev = X for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh( np.dot(Zprev, V[1:, :]) + V[0:1, :] ) # handling bias weight without adding column of 1's Y = np.dot(Zprev, self.W[1:, :]) + self.W[0:1, :] return np.mean((T - Y)**2) def gradF(w): unpack(w) Zprev = X Z = [Zprev] for i in range(len(self.nhs)): V = self.Vs[i] Zprev = np.tanh(np.dot(Zprev, V[1:, :]) + V[0:1, :]) Z.append(Zprev) Y = np.dot(Zprev, self.W[1:, :]) + self.W[0:1, :] delta = -(T - Y) / (X.shape[0] * T.shape[1]) dW = 2 * np.vstack((np.dot(np.ones( (1, delta.shape[0])), delta), np.dot(Z[-1].T, delta))) dVs = [] delta = (1 - Z[-1]**2) * np.dot(delta, self.W[1:, :].T) for Zi in range(len(self.nhs), 0, -1): Vi = Zi - 1 # because X is first element of Z dV = 2 * np.vstack((np.dot(np.ones( (1, delta.shape[0])), delta), np.dot(Z[Zi - 1].T, delta))) dVs.insert(0, dV) delta = np.dot(delta, self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1]**2) return pack(dVs, dW) scgresult = scg.scg(pack(self.Vs, self.W), objectiveF, gradF, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, verbose=verbose, ftracep=True) unpack(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt( scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True return self