def test_ModelFitandFreez1(): typs = ["SL", "SL2"] for typ in typs: NNx = NN.clsNN(1,1,2, Type=typs[0], FreezeModel=[1]) NN.ModelFit(NNx.model[0],x1,f20(x1), Epochs=10, ReTry=100, StopAt=[1.0e-2]) oldw = list(NNx.model[0].layers[1].get_weights()[0][0]) NN.ModelFit(NNx.model[1],x1,f21(x1), Epochs=10, ReTry=100, StopAt=[1.0e-2]) assert list(NNx.model[0].layers[1].get_weights()[0][0]) == \ list(NNx.model[1].layers[1].get_weights()[0][0]), "shared layer weights not identical" assert not list(NNx.model[0].layers[2].get_weights()[0][0]) == \ list(NNx.model[1].layers[2].get_weights()[0][0]), "last /parallel layer weights identical" assert oldw == list(NNx.model[0].layers[1].get_weights()[0][0]), "frozen weights changed"
def review_ModelFitMultiOut(): typ = "STD" NNx = NN.clsNN(1,1,2,Type=typ) Y = np.swapaxes([f20(x1), f21(x1)],0,1) NN.ModelFit(NNx.model[0],x1,Y, Epochs=10, StopAt=[1.0e-2]) Ynn = np.swapaxes(NN.ModelPredict(NNx.model[0],x1),0,1) plot(x1,f20(x1),Ynn[0]) plot(x1,f21(x1),Ynn[1]) typs = ["SI", "SL", "SL2"] for typ in typs: NNx = NN.clsNN(1,1,2,Type=typ) NN.ModelFit(NNx.model[0],x1,f20(x1), Epochs=10, StopAt=[1.0e-2]) NN.ModelFit(NNx.model[1],x1,f21(x1), Epochs=10, StopAt=[1.0e-2]) Ynn = [NN.ModelPredict(NNx.model[0],x1), NN.ModelPredict(NNx.model[1],x1)] plot(x1,f20(x1),Ynn[0]) plot(x1,f21(x1),Ynn[1])
def xTrainQToReward(self, FromSequence, StopAt=[1.0e-03, 0]): SequenceSample = FromSequence.ReturnSample(self.batch) Xs, _, As, Rs, _ = SequenceSample.AsList() X_01 = self._RetXActAs01(Xs, As) R = Rs if FromSequence.Nterminal > 0: SequenceTerminal = FromSequence.ReturnSample( idxs=FromSequence.ReturnIdx(1)) _, Xt, _, _, Rt = SequenceTerminal.AsList(multi=4) At = [ j for _ in range(SequenceTerminal.len) for j in range(len(self.actions)) ] X_01 = self._RetXActAs01(Xs + Xt, As + At) R = Rs + Rt # Fit NN.ModelFit(self.QModel[0], np.array(X_01), np.array(R), Epochs=100, StopAt=StopAt)
def xTrain(self, FromSequence, BellmanIterations, StopAt=[1.0e-03, 0]): # Terminal States if FromSequence.Nterminal > 0: SequenceTerminal = FromSequence.ReturnSample( idxs=FromSequence.ReturnIdx(1)) _, Xt, _, _, Rt = SequenceTerminal.AsList(multi=4) At = [ j for _ in range(SequenceTerminal.len) for j in range(len(self.actions)) ] for k in range(BellmanIterations): # Non Terminal States SequenceSample = FromSequence.ReturnSample(self.batch) X0s, X1s, As, Rs, _ = SequenceSample.AsList() # Q under current policy Qpolicy_X0 = self.Predict(X0s) Qpolicy_X1 = self.Predict(X1s) QpAcn_X0 = [Qpolicy_X0[i][As[i]] for i in range(len(Qpolicy_X0))] QpMax_X1 = [max(Qpolicy_X1[i]) for i in range(len(Qpolicy_X1))] # taget Q QtAcn_X0 = [ QpAcn_X0[i] + self.alpha * (Rs[i] + QpMax_X1[i] - QpAcn_X0[i]) for i in range(len(QpAcn_X0)) ] if FromSequence.Nterminal > 0: X0s = X0s + Xt As = As + At QtAcn_X0 = QtAcn_X0 + Rt # Generate X As Combination of State and Action XA = self._RetXActAs01(X0s, As) # MOHI print("Iteration" + str(k)) # Fit NN.ModelFit(self.QModel[0], np.array(XA), np.array(QtAcn_X0), Epochs=100, StopAt=StopAt) return
def review_ModelFit(): for typ in ModelTypes: NNx = NN.clsNN(1,1,1,Type=typ) NN.ModelFit(NNx.model[0],x1,f20(x1), Epochs=10, StopAt=[1.0e-3]) Ynn = NN.ModelPredict(NNx.model[0],x1) plot(x1,f20(x1),Ynn)