示例#1
0
    def train(self,X,R,Q,Y,gamma=1,
                 nIterations=100,weightPrecision=0,errorPrecision=0,verbose=False):
        X = self._standardizeX(X)

        def objectiveF(w):
            self._unpack(w)
            Y,_ = self._forward_pass(X)
            return 0.5 *np.mean((R+gamma*Q-Y)**2)

        def gradF(w):
            self._unpack(w)
            Y,Z = self._forward_pass(X)
            nSamples = X.shape[0]
            delta = -(R + gamma * Q - Y) / nSamples
            dVs,dW = self._backward_pass(delta,Z)
            return self._pack(dVs,dW)

        scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF,
                            xPrecision = weightPrecision,
                            fPrecision = errorPrecision,
                            nIterations = nIterations,
                            iterationVariable = self.iteration,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace)
        self.trained.value = True
        return self
    def train(self,
              X,
              T,
              nIterations=100,
              verbose=False,
              weightPrecision=0,
              errorPrecision=0):

        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self._standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1, 1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
            self.Tconstant = self.Tstds == 0
            self.TstdsFixed = copy(self.Tstds)
            self.TstdsFixed[self.Tconstant] = 1
        T = self._standardizeT(T)

        # Local functions used by scg()

        def objectiveF(w):
            self._unpack(w)
            Y, _ = self._forward_pass(X)
            return 0.5 * np.mean((Y - T)**2)

        def gradF(w):
            self._unpack(w)
            Y, Z = self._forward_pass(X)
            delta = (Y - T) / (X.shape[0] * T.shape[1])
            dVs, dW = self._backward_pass(delta, Z)
            return self._pack(dVs, dW)

        scgresult = scg.scg(self._pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            verbose=verbose,
                            ftracep=True)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = np.sqrt(
            scgresult['ftrace'])  # * self.Tstds # to unstandardize the MSEs
        self.numberOfIterations = len(self.errorTrace)
        self.trained = True
        return self
    def train(self,
              X,
              T,
              nIterations=100,
              weightPrecision=0,
              errorPrecision=0,
              verbose=False):
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self._standardizeX(X)

        self.classes, counts = np.unique(T, return_counts=True)
        self.mostCommonClass = self.classes[np.argmax(counts)]  # to break ties

        if self.no != len(self.classes):
            raise ValueError(
                " In NeuralNetworkClassifier, the number of outputs must equal\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()."
                % (self.no, len(self.classes)))
        T = makeIndicatorVars(T)

        # Local functions used by gradientDescent.scg()
        def objectiveF(w):
            self._unpack(w)
            Y, _ = self._forward_pass(X)
            Y = self._multinomialize(Y)
            Y[Y == 0] = sys.float_info.epsilon
            return -np.mean(T * np.log(Y))

        def gradF(w):
            self._unpack(w)
            Y, Z = self._forward_pass(X)
            Y = self._multinomialize(Y)
            delta = (Y - T) / (X.shape[0] * (T.shape[1]))
            dVs, dW = self._backward_pass(delta, Z)
            return self._pack(dVs, dW)

        scgresult = scg.scg(self._pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace) - 1
        self.trained = True
        return self
    def train(self,
              X,
              T,
              nIterations=100,
              weightPrecision=0,
              errorPrecision=0,
              verbose=False):
        self.timings = []
        if not isinstance(self.layers[0], ConvolutionalLayer):
            if X.shape[1] != self.layers[0].nInputs:
                print(
                    'Number of columns in X plus 1 ({}) does not equal the number of declared network inputs ({}).'
                    .format(X.shape[1], self.layers[0].nInputs))
                return  #sys.exit(1)
            if self.Xmeans is None:
                self.Xmeans = X.mean(axis=0)
                self.Xstds = X.std(axis=0)
                # self.Xconstant = (self.Xstds == 0).reshape((1,-1))
                self.Xconstant = self.Xstds == 0
                self.XstdsFixed = copy(self.Xstds)
                self.XstdsFixed[self.Xconstant] = 1
            X = self._standardizeX(X)
        else:
            pass  #print("TO-DO: Add standardization code to ConvolutionalLayer")

        T = self._preProcessTargets(T)

        if isinstance(self.layers[0], ConvolutionalLayer):
            self.layers[0].updateConvolution = True

        scgresult = scg.scg(self._pack([layer.W for layer in self.layers]),
                            self._objectiveF,
                            self._gradF,
                            X,
                            T,
                            evalFunc=self._scg_verbose_eval(),
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            iterationVariable=self.iteration,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace += scgresult['ftrace'].tolist()
        self.numberOfIterations = len(self.errorTrace) - 1
        # self.trained.value = True
        self.trained = True
        return self
示例#5
0
    def train(self,
              X,
              T,
              nIterations=100,
              weightPrecision=0,
              errorPrecision=0,
              verbose=False):
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
        X = self._standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1, 1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
        T = self._standardizeT(T)

        # Local functions used by gradientDescent.scg()

        def objectiveF(w):
            self._unpack(w)
            Y, _ = self._forward_pass(X)
            return 0.5 * np.mean((Y - T)**2)

        def gradF(w):
            self._unpack(w)
            Y, Z = self._forward_pass(X)
            delta = (Y - T) / (X.shape[0] * T.shape[1])
            dVs, dW = self._backward_pass(delta, Z)
            return self._pack(dVs, dW)

        scgresult = scg.scg(self._pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            iterationVariable=self.iteration,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace) - 1
        self.trained.value = True
        return self
    def train(self,
              X,
              T,
              nIterations=100,
              weightPrecision=0,
              errorPrecision=0,
              verbose=False):
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
        X = self._standardizeX(X)

        self.classes = np.unique(T)
        if self.no != len(self.classes) - 1:
            raise ValueError(
                " In NeuralNetworkClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()."
                % (self.no, len(self.classes)))
        T = ml.makeIndicatorVars(T)

        # Local functions used by gradientDescent.scg()
        def objectiveF(w):
            self._unpack(w)
            Y, _ = self._forward_pass(X)
            Y = self._multinomialize(Y)
            return -np.mean(T * np.log(Y))

        def gradF(w):
            self._unpack(w)
            Y, Z = self._forward_pass(X)
            Y = self._multinomialize(Y)
            delta = (Y[:, :-1] - T[:, :-1]) / (X.shape[0] * (T.shape[1] - 1))
            dVs, dW = self._backward_pass(delta, Z)
            return self._pack(dVs, dW)

        scgresult = scg.scg(self._pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            iterationVariable=self.iteration,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace) - 1
        self.trained.value = True
        return self
    def train(self,X,T,nIterations=100,verbose=False,
              weightPrecision=0,errorPrecision=0):
        
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self._standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1,1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
            self.Tconstant = self.Tstds == 0
            self.TstdsFixed = copy(self.Tstds)
            self.TstdsFixed[self.Tconstant] = 1
        T = self._standardizeT(T)

        # Local functions used by scg()

        def objectiveF(w):
            self._unpack(w)
            Y,_ = self._forward_pass(X)
            return 0.5 * np.mean((Y - T)**2)

        def gradF(w):
            self._unpack(w)
            Y,Z = self._forward_pass(X)
            delta = (Y - T) / (X.shape[0] * T.shape[1])
            dVs,dW = self._backward_pass(delta,Z)
            return self._pack(dVs,dW)

        scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF,
                            xPrecision = weightPrecision,
                            fPrecision = errorPrecision,
                            nIterations = nIterations,
                            verbose=verbose,
                            ftracep=True)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = np.sqrt(scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs
        self.numberOfIterations = len(self.errorTrace)
        self.trained = True
        return self
    def train(self,X,T,
                 nIterations=100,weightPrecision=0,errorPrecision=0,verbose=False):
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self._standardizeX(X)

        self.classes, counts = np.unique(T,return_counts=True)
        self.mostCommonClass = self.classes[np.argmax(counts)]  # to break ties

        if self.no != len(self.classes)-1:
            raise ValueError(" In NeuralNetworkClassifier, the number of outputs must be one less than\n the number of classes in the training data. The given number of outputs\n is %d and number of classes is %d. Try changing the number of outputs in the\n call to NeuralNetworkClassifier()." % (self.no, len(self.classes)))
        T = makeIndicatorVars(T)

        # Local functions used by gradientDescent.scg()
        def objectiveF(w):
            self._unpack(w)
            Y,_ = self._forward_pass(X)
            Y = self._multinomialize(Y)
            Y[Y==0] = sys.float_info.epsilon
            return -np.mean(T * np.log(Y))

        def gradF(w):
            self._unpack(w)
            Y,Z = self._forward_pass(X)
            Y = self._multinomialize(Y)
            delta = (Y[:,:-1] - T[:,:-1]) / (X.shape[0] * (T.shape[1]-1))
            dVs,dW = self._backward_pass(delta,Z)
            return self._pack(dVs,dW)

        scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF,
                            xPrecision = weightPrecision,
                            fPrecision = errorPrecision,
                            nIterations = nIterations,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace) - 1
        self.trained = True
        return self
示例#9
0
    def train(self,X,T,
              nIterations=100,weightPrecision=0,errorPrecision=0,verbose=False):
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
        X = self._standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1,1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
        T = self._standardizeT(T)

        # Local functions used by gradientDescent.scg()

        def objectiveF(w):
            self._unpack(w)
            Y,_ = self._forward_pass(X)
            return 0.5 * np.mean((Y - T)**2)

        def gradF(w):
            self._unpack(w)
            Y,Z = self._forward_pass(X)
            delta = (Y - T) / (X.shape[0] * T.shape[1])
            dVs,dW = self._backward_pass(delta,Z)
            return self._pack(dVs,dW)

        scgresult = scg.scg(self._pack(self.Vs,self.W), objectiveF, gradF,
                            xPrecision = weightPrecision,
                            fPrecision = errorPrecision,
                            nIterations = nIterations,
                            iterationVariable = self.iteration,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace) - 1
        self.trained.value = True
        return self
示例#10
0
    def train(self,
              X,
              R,
              Q,
              Y,
              gamma=1,
              nIterations=100,
              weightPrecision=0,
              errorPrecision=0,
              verbose=False):
        X = self._standardizeX(X)

        def objectiveF(w):
            self._unpack(w)
            Y, _ = self._forward_pass(X)
            return 0.5 * np.mean((R + gamma * Q - Y)**2)

        def gradF(w):
            self._unpack(w)
            Y, Z = self._forward_pass(X)
            nSamples = X.shape[0]
            delta = -(R + gamma * Q - Y) / nSamples
            dVs, dW = self._backward_pass(delta, Z)
            return self._pack(dVs, dW)

        scgresult = scg.scg(self._pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            iterationVariable=self.iteration,
                            ftracep=True,
                            verbose=verbose)

        self._unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult['ftrace']
        self.numberOfIterations = len(self.errorTrace)
        self.trained.value = True
        return self
示例#11
0
    def train(self,X,T,nIterations=100,verbose=False,
              weightPrecision=0,errorPrecision=0):
        
        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self.standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1,1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
            self.Tconstant = self.Tstds == 0
            self.TstdsFixed = copy(self.Tstds)
            self.TstdsFixed[self.Tconstant] = 1
        T = self.standardizeT(T)

        # Local functions used by scg()

        def pack(Vs,W):
            return np.hstack([V.flat for V in Vs] + [W.flat])

        def unpack(w):
            first = 0
            numInThisLayer = self.ni
            for i in range(len(self.Vs)):
                self.Vs[i][:] = w[first:first+(numInThisLayer+1)*self.nhs[i]].reshape((numInThisLayer+1,self.nhs[i]))
                first += (numInThisLayer+1) * self.nhs[i]
                numInThisLayer = self.nhs[i]
            self.W[:] = w[first:].reshape((numInThisLayer+1,self.no))

        def objectiveF(w):
            unpack(w)
            Zprev = X
            for i in range(len(self.nhs)):
                V = self.Vs[i]
                Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:])  # handling bias weight without adding column of 1's
            Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:]
            return np.mean((T-Y)**2)

        def gradF(w):
            unpack(w)
            Zprev = X
            Z = [Zprev]
            for i in range(len(self.nhs)):
                V = self.Vs[i]
                Zprev = np.tanh(np.dot(Zprev,V[1:,:]) + V[0:1,:])
                Z.append(Zprev)
            Y = np.dot(Zprev, self.W[1:,:]) + self.W[0:1,:]
            delta = -(T - Y) / (X.shape[0] * T.shape[1])
            dW = 2 * np.vstack((np.dot(np.ones((1,delta.shape[0])),delta),  np.dot( Z[-1].T, delta)))
            dVs = []
            delta = (1-Z[-1]**2) * np.dot( delta, self.W[1:,:].T)
            for Zi in range(len(self.nhs),0,-1):
                Vi = Zi - 1 # because X is first element of Z
                dV = 2 * np.vstack(( np.dot(np.ones((1,delta.shape[0])), delta),
                                 np.dot( Z[Zi-1].T, delta)))
                dVs.insert(0,dV)
                delta = np.dot( delta, self.Vs[Vi][1:,:].T) * (1-Z[Zi-1]**2)
            return pack(dVs,dW)

        scgresult = scg.scg(pack(self.Vs,self.W), objectiveF, gradF,
                            xPrecision = weightPrecision,
                            fPrecision = errorPrecision,
                            nIterations = nIterations,
                            verbose=verbose,
                            ftracep=True)

        unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = np.sqrt(scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs
        self.numberOfIterations = len(self.errorTrace)
        self.trained = True
        return self
示例#12
0
    def train(self,
              X,
              T,
              nIterations=100,
              verbose=False,
              weightPrecision=0,
              errorPrecision=0,
              saveWeightsHistory=False):

        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self.standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1, 1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
            self.Tconstant = self.Tstds == 0
            self.TstdsFixed = copy(self.Tstds)
            self.TstdsFixed[self.Tconstant] = 1
        T = self.standardizeT(T)

        def objectiveF(w):
            self.unpack(w)
            Zprev = X
            for i in range(len(self.nhs)):
                V = self.Vs[i]
                Zprev = np.tanh(
                    Zprev @ V[1:, :] + V[0:1, :]
                )  # handling bias weight without adding column of 1's
            Y = Zprev @ self.W[1:, :] + self.W[0:1, :]
            return np.mean((T - Y)**2)

        def gradF(w):
            self.unpack(w)
            Zprev = X
            Z = [Zprev]
            for i in range(len(self.nhs)):
                V = self.Vs[i]
                Zprev = np.tanh(Zprev @ V[1:, :] + V[0:1, :])
                Z.append(Zprev)
            Y = Zprev @ self.W[1:, :] + self.W[0:1, :]
            delta = -(T - Y) / (X.shape[0] * T.shape[1])
            dW = 2 * np.vstack((np.ones(
                (1, delta.shape[0])) @ delta, Z[-1].T @ delta))
            dVs = []
            delta = (1 - Z[-1]**2) * (delta @ self.W[1:, :].T)
            for Zi in range(len(self.nhs), 0, -1):
                Vi = Zi - 1  # because X is first element of Z
                dV = 2 * np.vstack((np.ones(
                    (1, delta.shape[0])) @ delta, Z[Zi - 1].T @ delta))
                dVs.insert(0, dV)
                delta = (delta @ self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1]**2)
            return self.pack(dVs, dW)

        scgresult = scg.scg(self.pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            verbose=verbose,
                            ftracep=True,
                            xtracep=saveWeightsHistory)

        self.unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = scgresult[
            'ftrace']  # * self.Tstds # to unstandardize the MSEs
        self.numberOfIterations = len(self.errorTrace)
        self.trained = True
        self.weightsHistory = scgresult[
            'xtrace'] if saveWeightsHistory else None
        return self
示例#13
0
    def train(self, X, T, nIterations=100, verbose=False,
              weightPrecision=0, errorPrecision=0):

        if type(T) == np.ndarray:
            ## regular np version.
            if self.Xmeans is None:
                self.Xmeans = X.mean(axis=0)
                self.Xstds = X.std(axis=0)
                self.Xconstant = self.Xstds == 0
                self.XstdsFixed = copy(self.Xstds)
                self.XstdsFixed[self.Xconstant] = 1
            if T.ndim == 1:
                T = T.reshape((-1, 1))

            if self.Tmeans is None:
                self.Tmeans = T.mean(axis=0)
                self.Tstds = T.std(axis=0)
                self.Tconstant = self.Tstds == 0
                self.TstdsFixed = copy(self.Tstds)
                self.TstdsFixed[self.Tconstant] = 1
            T = self._standardizeT(T)

            # Local functions used by scg()

            def objectiveF(w):
                self._unpack(w)
                Y, _ = self._forward_pass(X)
                return 0.5 * np.mean((Y - T) ** 2)

            def gradF(w):
                self._unpack(w)
                Y, Z = self._forward_pass(X)
                delta = (Y - T) / (X.shape[0] * T.shape[1])
                dVs, dW = self._backward_pass(delta, Z)
                return self._pack(dVs, dW)

            scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF,
                                xPrecision=weightPrecision,
                                fPrecision=errorPrecision,
                                nIterations=nIterations,
                                verbose=verbose,
                                ftracep=True)

            self._unpack(scgresult['x'])
            self.reason = scgresult['reason']
            self.errorTrace = np.sqrt(scgresult['ftrace'])  # * self.Tstds # to unstandardize the MSEs
            self.numberOfIterations = len(self.errorTrace)
            self.trained = True
        else:
            ## Tensor version
            #if self.Xmeans is None:
            #don't standardize?
            #    self.Xmeans = X.mean().item()
            #    self.Xstds = X.std(False).item()
            #    self.Xconstant = self.Xstds == 0
            #    self.XstdsFixed = copy(self.Xstds)
            #X = self._standardizeX(X)

            if T.dim() == 1:
                T = T.reshape((-1, 1))

           # if self.Tmeans is None:
           #     self.Tmeans = T.mean().item()
           #    self.Tstds = T.std(False).item()
           #     self.Tconstant = self.Tstds == 0
           #     self.TstdsFixed = copy(self.Tstds)
           #     self.TstdsFixed[self.Tconstant] = 1
           # T = self._standardizeT(T)

            # haven't changed these yet
            def objectiveF(w):
                self._unpack(w)
                Y, _ = self._forward_pass(X)
                return 0.5 * np.mean((Y - T) ** 2)

            def gradF(w):
                self._unpack(w)
                Y, Z = self._forward_pass(X)
                delta = (Y - T) / (X.shape[0] * T.shape[1])
                dVs, dW = self._backward_pass(delta, Z)
                return self._pack(dVs, dW)

            scgresult = scg.scg(self._pack(self.Vs, self.W), objectiveF, gradF,
                                xPrecision=weightPrecision,
                                fPrecision=errorPrecision,
                                nIterations=nIterations,
                                verbose=verbose,
                                ftracep=True)

            self._unpack(scgresult['x'])
            self.reason = scgresult['reason']
            self.errorTrace = np.sqrt(scgresult['ftrace'])  # * self.Tstds # to unstandardize the MSEs
            self.numberOfIterations = len(self.errorTrace)
            self.trained = True
            return self

        def use(self, X, allOutputs=False):
            Xst = self._standardizeX(X)
            Y, Z = self._forward_pass(Xst)
            Y = self._unstandardizeT(Y)
            if Z is None:
                return (Y, None) if allOutputs else Y
            else:
                return (Y, Z[1:]) if allOutputs else Y

        def getNumberOfIterations(self):
            return self.numberOfIterations

        def getErrorTrace(self):
            return self.errorTrace

        def draw(self, inputNames=None, outputNames=None):
            ml.draw(self.Vs + [self.W], inputNames, outputNames)

        def _forward_pass(self, X):
            if self.nhs is None:
                # no hidden units, just linear output layer
                Y = np.dot(X, self.W[1:, :]) + self.W[0:1, :]
                Zs = [X]
            else:
                Zprev = X
                Zs = [Zprev]
                for i in range(len(self.nhs)):
                    V = self.Vs[i]
                    Zprev = np.tanh(np.dot(Zprev, V[1:, :]) + V[0:1, :])
                    Zs.append(Zprev)
                Y = np.dot(Zprev, self.W[1:, :]) + self.W[0:1, :]
            return Y, Zs

        def _backward_pass(self, delta, Z):
            if self.nhs is None:
                # no hidden units, just linear output layer
                dW = np.vstack((np.dot(np.ones((1, delta.shape[0])), delta), np.dot(Z[0].T, delta)))
                dVs = None
            else:
                dW = np.vstack((np.dot(np.ones((1, delta.shape[0])), delta), np.dot(Z[-1].T, delta)))
                dVs = []
                delta = (1 - Z[-1] ** 2) * np.dot(delta, self.W[1:, :].T)
                for Zi in range(len(self.nhs), 0, -1):
                    Vi = Zi - 1  # because X is first element of Z
                    dV = np.vstack((np.dot(np.ones((1, delta.shape[0])), delta),
                                    np.dot(Z[Zi - 1].T, delta)))
                    dVs.insert(0, dV)
                    delta = np.dot(delta, self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1] ** 2)
            return dVs, dW

        def _standardizeX(self, X):
            result = (X - self.Xmeans) / self.XstdsFixed
            # result[:,self.Xconstant] = 0.0
            return result

        def _unstandardizeX(self, Xs):
            return self.Xstds * Xs + self.Xmeans

        def _standardizeT(self, T):
            result = (T - self.Tmeans) / self.TstdsFixed
            # result[:,self.Tconstant] = 0.0
            return result

        def _unstandardizeT(self, Ts):
            return self.Tstds * Ts + self.Tmeans

        def _pack(self, Vs, W):
            if Vs is None:
                return np.array(W.flat)
            else:
                return np.hstack([V.flat for V in Vs] + [W.flat])

        def _unpack(self, w):
            if self.nhs is None:
                self.W[:] = w.reshape((self.ni + 1, self.no))
            else:
                first = 0
                numInThisLayer = self.ni
                for i in range(len(self.Vs)):
                    self.Vs[i][:] = w[first:first + (numInThisLayer + 1) * self.nhs[i]].reshape(
                        (numInThisLayer + 1, self.nhs[i]))
                    first += (numInThisLayer + 1) * self.nhs[i]
                    numInThisLayer = self.nhs[i]
                self.W[:] = w[first:].reshape((numInThisLayer + 1, self.no))

        def __repr__(self):
            str = 'NeuralNetwork({}, {}, {})'.format(self.ni, self.nhs, self.no)
            # str += '  Standardization parameters' + (' not' if self.Xmeans == None else '') + ' calculated.'
            if self.trained:
                str += '\n   Network was trained for {} iterations. Final error is {}.'.format(self.numberOfIterations,
                                                                                               self.errorTrace[-1])
            else:
                str += '  Network is not trained.'
            return str
示例#14
0
    def train(self,
              X,
              T,
              nIterations=100,
              verbose=False,
              weightPrecision=0,
              errorPrecision=0):

        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self.standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1, 1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
            self.Tconstant = self.Tstds == 0
            self.TstdsFixed = copy(self.Tstds)
            self.TstdsFixed[self.Tconstant] = 1
        T = self.standardizeT(T)

        # Local functions used by scg()

        def pack(Vs, W):
            return np.hstack([V.flat for V in Vs] + [W.flat])

        def unpack(w):
            first = 0
            numInThisLayer = self.ni
            for i in range(len(self.Vs)):
                self.Vs[i][:] = w[first:first +
                                  (numInThisLayer + 1) * self.nhs[i]].reshape(
                                      (numInThisLayer + 1, self.nhs[i]))
                first += (numInThisLayer + 1) * self.nhs[i]
                numInThisLayer = self.nhs[i]
            self.W[:] = w[first:].reshape((numInThisLayer + 1, self.no))

        def objectiveF(w):
            unpack(w)
            Zprev = X
            for i in range(len(self.nhs)):
                V = self.Vs[i]
                Zprev = np.tanh(
                    np.dot(Zprev, V[1:, :]) + V[0:1, :]
                )  # handling bias weight without adding column of 1's
            Y = np.dot(Zprev, self.W[1:, :]) + self.W[0:1, :]
            return np.mean((T - Y)**2)

        def gradF(w):
            unpack(w)
            Zprev = X
            Z = [Zprev]
            for i in range(len(self.nhs)):
                V = self.Vs[i]
                Zprev = np.tanh(np.dot(Zprev, V[1:, :]) + V[0:1, :])
                Z.append(Zprev)
            Y = np.dot(Zprev, self.W[1:, :]) + self.W[0:1, :]
            delta = -(T - Y) / (X.shape[0] * T.shape[1])
            dW = 2 * np.vstack((np.dot(np.ones(
                (1, delta.shape[0])), delta), np.dot(Z[-1].T, delta)))
            dVs = []
            delta = (1 - Z[-1]**2) * np.dot(delta, self.W[1:, :].T)
            for Zi in range(len(self.nhs), 0, -1):
                Vi = Zi - 1  # because X is first element of Z
                dV = 2 * np.vstack((np.dot(np.ones(
                    (1, delta.shape[0])), delta), np.dot(Z[Zi - 1].T, delta)))
                dVs.insert(0, dV)
                delta = np.dot(delta,
                               self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1]**2)
            return pack(dVs, dW)

        scgresult = scg.scg(pack(self.Vs, self.W),
                            objectiveF,
                            gradF,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            verbose=verbose,
                            ftracep=True)

        unpack(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = np.sqrt(
            scgresult['ftrace'])  # * self.Tstds # to unstandardize the MSEs
        self.numberOfIterations = len(self.errorTrace)
        self.trained = True
        return self