def __init__(self, IO, SAMPLE_SIZE, BATCH_SIZE, EPOCH, NB_GEN, NB_SEEDER, RNN=False, DATA_XPLT=0.5, LEARNING_RATE=1e-6, MOMENTUM=0.5): # Parameter self.IO = IO self.N = SAMPLE_SIZE self.BATCH = BATCH_SIZE self.EPOCH = EPOCH self.NB_GEN = NB_GEN self.NB_SEEDER = NB_SEEDER**2 self.LR = LEARNING_RATE self.MM = MOMENTUM # generate first ENN step self.GRAPH_LIST = [ GRAPH_EAT([self.IO, 1], None) for n in range(self.NB_SEEDER - 1) ] self.SEEDER_LIST = [CTRL_NET(self.IO)] for g in self.GRAPH_LIST: NEURON_LIST = g.NEURON_LIST self.SEEDER_LIST += [ pRNN(NEURON_LIST, self.BATCH, self.IO[0], STACK=RNN) ] self.PARENT = [(-1 * np.ones(self.NB_SEEDER))[None]] # best seeder model self.BEST_MODEL = 0 self.OPTIM_BEST = 0 self.BEST_CRIT = nn.CrossEntropyLoss() self.LOSS_BEST = 0 self.BEST_WEIGHT = [] # generate loss-optimizer self.OPTIM = [ torch.optim.SGD(s.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM) for s in self.SEEDER_LIST ] self.CRITERION = [nn.CrossEntropyLoss() for n in range(self.NB_SEEDER)] self.LOSS = self.NB_SEEDER * [0] # calculate nb batch per generation self.NB_BATCH_P_GEN = int( (DATA_XPLT * self.N * self.EPOCH) / (self.NB_GEN * self.BATCH)) # selection and accuracy self.SCORE_LOSS = [] self.ACCUR = [] # in % self.BEST_SCORE_LOSS = [] # for next gen (n-plicat) and control group self.NB_CONTROL = 1 # always (preference) self.NB_CHALLENGE = int(np.sqrt(self.NB_SEEDER) - self.NB_CONTROL) self.NB_SURVIVOR = self.NB_CHALLENGE # square completion self.NB_CHILD = int(np.sqrt(self.NB_SEEDER) - 1) # FITNESS
def __init__(self, *arg, MODEL = None, CTRL=False, NET = None, COOR = None): self.P_MIN = 1 # Parameter self.ARG = arg self.IO = arg[0] # image cells, action self.NB_P_GEN = arg[1] self.batch_size = arg[2] self.N_TIME = arg[4] self.N_CYCLE = arg[5] ## Init if CTRL : self.NET = GRAPH_EAT(None, self.CONTROL_NETWORK(self.IO)) MODEL = CTRL_NET(self.IO) elif NET == None : self.NET = GRAPH_EAT([self.IO, self.P_MIN], None) else : self.NET = NET self.NEURON_LIST = self.NET.NEURON_LIST if (MODEL == None) and not(CTRL) : self.MODEL = pRNN(self.NEURON_LIST, self.batch_size, self.IO[0], STACK=True) else : self.MODEL = MODEL # nn optimiser self.GAMMA = 0.9 #self.optimizer = torch.optim.Adam(self.MODEL.parameters()) self.optimizer = torch.optim.Adam(self.MODEL.parameters()) #torch.optim.SGD(self.MODEL.parameters(), lr=1e-6, momentum=0.9) self.criterion = nn.SmoothL1Loss() # HubberLoss, nn.MSELoss() # because not classification (same comparaison [batch] -> [batch]) #self.criterion = nn.NLLLoss(reduction='sum') #negative log likelihood loss ([batch,Nout]->[batch]) self.loss = None self.LOSS = [] ## IO Coordinate X_A = np.mgrid[-1:2,-1:2].reshape((2,-1)).T X_B = np.array([[0,0],[0,2],[0,4],[2,0],[2,4],[4,0],[4,2],[4,4]])-[2,2] self.X,self.Y = np.concatenate((X_A,X_B)), np.array([[0,1],[1,2],[2,0]])-[1,1] ## Data sample (memory : 'old_state', 'action', 'new_state', 'reward', 'terminal') self.MEMORY = [[],[],[],[],[]] self.MEMORY_ = None ## Players self.prev_state = None
def __init__(self, arg, NAMED_MEMORY=None, TYPE="class", TIME_DEPENDANT=False): # parameter self.IO = arg[0] self.BATCH = arg[1] self.NB_GEN = arg[2] self.NB_SEEDER = arg[3] self.NB_EPISOD = arg[4] self.ALPHA = arg[5] # 1-% of predict (not random step) self.NB_E_P_G = int(self.NB_EPISOD / self.NB_GEN) self.TIME_DEP = TIME_DEPENDANT self.TYPE = TYPE self.NAMED_M = NAMED_MEMORY # generate first ENN model self.GRAPH_LIST = [ GRAPH_EAT([self.IO, 1], None) for n in range(self.NB_SEEDER - 1) ] self.SEEDER_LIST = [CTRL_NET(self.IO)] for g in self.GRAPH_LIST: NEURON_LIST = g.NEURON_LIST self.SEEDER_LIST += [ pRNN(NEURON_LIST, self.BATCH, self.IO[0], STACK=self.TIME_DEP) ] # training parameter self.NEURON_LIST = [] self.UPDATE_MODEL() # selection self.loss = pd.DataFrame( columns=['GEN', 'IDX_SEED', 'EPISOD', 'N_BATCH', 'LOSS_VALUES']) self.supp_param = None # evolution param self.NB_CONTROL = int(np.power(self.NB_SEEDER, 1. / 4)) self.NB_EVOLUTION = int(np.sqrt(self.NB_SEEDER) - 1) # square completion self.NB_CHALLENGE = int(self.NB_SEEDER - (self.NB_EVOLUTION * (self.NB_EVOLUTION + 1) + self.NB_CONTROL)) # evolution variable self.PARENTING = [-1 * np.ones(self.NB_SEEDER)[None]] self.PARENTING[0][0][:self.NB_CONTROL] = 0
self.trace[t] = torch.cat([tt[t] for tt in trace]) else : # Only adapted for SGD, if mini-batch, pseudo-rnn perturbation idx_end, self.trace[:-1] = self.graph2net(BATCH_) # save for t+1 for t in range(len(self.trace)): self.h[t][BATCH_] = self.trace[t][BATCH_].detach() # output probs return self.trace[idx_end] if __name__ == '__main__' : IO = (17,3) BATCH = 16 # graph part from GRAPH_EAT import GRAPH_EAT NET = GRAPH_EAT([IO, 1], None) print(NET.NEURON_LIST) for BOOL in [False,True] : # networks model = pRNN(NET.NEURON_LIST, BATCH, IO[0], STACK=BOOL) # data test tensor_in = torch.randn(BATCH,IO[0]) tensor_out = model(tensor_in[:5]) # print print('\n' ,tensor_out.shape,model.h[0].shape) # init train OPTIM = torch.optim.Adam(model.parameters()) CRITERION = nn.CrossEntropyLoss() # step of train for i in range(5): print(i)
class Q_AGENT(): def __init__(self, *arg, MODEL = None, CTRL=False, NET = None, COOR = None): self.P_MIN = 1 # Parameter self.ARG = arg self.IO = arg[0] # image cells, action self.NB_P_GEN = arg[1] self.batch_size = arg[2] self.N_TIME = arg[4] self.N_CYCLE = arg[5] ## Init if CTRL : self.NET = GRAPH_EAT(None, self.CONTROL_NETWORK(self.IO)) MODEL = CTRL_NET(self.IO) elif NET == None : self.NET = GRAPH_EAT([self.IO, self.P_MIN], None) else : self.NET = NET self.NEURON_LIST = self.NET.NEURON_LIST if (MODEL == None) and not(CTRL) : self.MODEL = pRNN(self.NEURON_LIST, self.batch_size, self.IO[0], STACK=True) else : self.MODEL = MODEL # nn optimiser self.GAMMA = 0.9 #self.optimizer = torch.optim.Adam(self.MODEL.parameters()) self.optimizer = torch.optim.Adam(self.MODEL.parameters()) #torch.optim.SGD(self.MODEL.parameters(), lr=1e-6, momentum=0.9) self.criterion = nn.SmoothL1Loss() # HubberLoss, nn.MSELoss() # because not classification (same comparaison [batch] -> [batch]) #self.criterion = nn.NLLLoss(reduction='sum') #negative log likelihood loss ([batch,Nout]->[batch]) self.loss = None self.LOSS = [] ## IO Coordinate X_A = np.mgrid[-1:2,-1:2].reshape((2,-1)).T X_B = np.array([[0,0],[0,2],[0,4],[2,0],[2,4],[4,0],[4,2],[4,4]])-[2,2] self.X,self.Y = np.concatenate((X_A,X_B)), np.array([[0,1],[1,2],[2,0]])-[1,1] ## Data sample (memory : 'old_state', 'action', 'new_state', 'reward', 'terminal') self.MEMORY = [[],[],[],[],[]] self.MEMORY_ = None ## Players self.prev_state = None def INIT_ENV(self, ENV_INPUT) : self.prev_state = ENV_INPUT.FIRST_STEP_SET(0) def PARTY(self, ENV_INPUT): # reset game environement (for each batch ? important) for n in range(self.N_CYCLE): self.prev_state = ENV_INPUT.FIRST_STEP_SET(n) for t in range(self.N_TIME): # loop game for i in range(self.batch_size): action = self.ACTION(self.prev_state) new_state, reward, DONE = ENV_INPUT.STEP(action) # Memory update if i == self.batch_size-1 : DONE = True self.SEQUENCING(self.prev_state,action,new_state,reward,DONE) # n+1 self.prev_state = new_state.copy() # escape loop if DONE == True : break # Reinforcement learning self.OPTIM() ## Action Exploration/Exploitation Dilemna def ACTION(self, Input) : img_in = torch.tensor(Input, dtype=torch.float) # actor-critic (old version) action_probs = self.MODEL(img_in) # exploration-exploitation dilemna DILEMNA = np.squeeze(action_probs.detach().numpy()) if DILEMNA.sum() == 0 or str(DILEMNA.sum()) == 'nan' : next_action = np.random.randint(self.IO[1]) else : if DILEMNA.min() < 0 : DILEMNA = DILEMNA-DILEMNA.min() # n-1 choice restriction ## add dispersion (in q-table, values is near) order = np.exp(np.argsort(DILEMNA)+1) # probability p_norm = order/order.sum() #print(order, p_norm) next_action = np.random.choice(self.IO[1], p=p_norm) return next_action ## Memory sequencing def SEQUENCING(self, prev_state,action,new_state,reward,DONE): self.MEMORY[0] += [prev_state] self.MEMORY[1] += [action] self.MEMORY[2] += [new_state] self.MEMORY[3] += [reward] self.MEMORY[4] += [DONE] if DONE : self.MEMORY[0] = torch.tensor(np.concatenate(self.MEMORY[0]), dtype=torch.float) self.MEMORY[1] = torch.tensor(np.array(self.MEMORY[1]), dtype=torch.long).unsqueeze(1) self.MEMORY[2] = torch.tensor(np.concatenate(self.MEMORY[2]), dtype=torch.float) self.MEMORY[3] = torch.tensor(np.array(self.MEMORY[3])) self.MEMORY[4] = torch.tensor(np.array(self.MEMORY[4]), dtype=torch.int) ## Training Q-Table def OPTIM(self) : # extract info old_state, action, new_state, reward, DONE = self.MEMORY # actor proba actor = self.MODEL(old_state) # Compute predicted Q-values for each action pred_q_values_batch = actor.gather(1, action) pred_q_values_next = self.MODEL(new_state) # Compute targeted Q-value for action performed target_q_values_batch = (reward+(1-DONE)*self.GAMMA*torch.max(pred_q_values_next, 1)[0]).detach().unsqueeze(1) self.y = [pred_q_values_batch,target_q_values_batch] #[print(i,self.y[i].shape) for i in range(2)] #print(self.y[1]) # zero the parameter gradients self.MODEL.zero_grad() # Compute the loss self.loss = self.criterion(pred_q_values_batch,target_q_values_batch) # Do backward pass self.loss.backward() self.optimizer.step() # save loss self.LOSS += [self.loss.item()] # reset memory self.MEMORY_ = self.MEMORY self.MEMORY = [[],[],[],[],[]] ## reset object def RESET(self, PROBA): GRAPH = self.NET.NEXT_GEN(-1) XY_TUPLE = (self.X,self.Y) if np.random.choice((False,True), 1, p=[PROBA,1-PROBA])[0]: return Q_AGENT(*self.ARG, NET = GRAPH, COOR = XY_TUPLE) else : return Q_AGENT(*self.ARG, MODEL = self.MODEL, NET = GRAPH, COOR = XY_TUPLE) ## mutation def MUTATION(self, MUT = None): # mutate graph GRAPH = self.NET.NEXT_GEN(MUT) return Q_AGENT(*self.ARG, NET = GRAPH) ## control group def CONTROL_NETWORK(self, IO) : """ For Lyfe problem : not generalized """ # init number of connection per layer """NB_H_LAYER = 2""" """NB_C_P_LAYER = int(np.sqrt(self.IO[0]) + np.sqrt(self.IO[1]))""" # network equivalence --> passer à 17 ? NET = np.array([[-1, 3, 4, 32, [[2,0],[2,1],[2,2],[2,3]]], [ 1, 4, IO[0], 10, [[0,i] for i in range(IO[0])]], [ 2, 4, 4, 20, [[1,0],[1,1],[1,2],[1,3]]]]) # Listing LIST_C = np.array([[0,0,i] for i in range(IO[0])]+ [[10,1,0],[10,1,1],[10,1,2],[10,1,3], [20,2,0],[20,2,1],[20,2,2],[20,2,3]]) return [IO, NET.copy(), LIST_C.copy()]
def SELECTION(self, GEN, supp_factor=1): # sup median loss selection TailLoss = np.ones(self.NB_SEEDER) # extract data sub_loss = self.loss[self.loss.GEN == GEN] # verify if you have SDG (only evolution selection) if sub_loss.size > 0: gb_seed = sub_loss.groupby('IDX_SEED') # sup median loss selection for i, g in gb_seed: if self.ALPHA != 1: Tail_eps = g.EPISOD.min() + (g.EPISOD.max() - g.EPISOD.min()) * self.ALPHA else: Tail_eps = g.EPISOD.median() TailLoss[int(i)] = g[g.EPISOD > Tail_eps].LOSS_VALUES.mean() # normalization relativeLOSS = (TailLoss - TailLoss.min()) / (TailLoss.max() - TailLoss.min()) else: relativeLOSS = TailLoss # coeffect, belong to [0,3] score = supp_factor + supp_factor * relativeLOSS + relativeLOSS # order order = np.argsort(score[self.NB_CONTROL:]) ### stock control network NET_C = self.SEEDER_LIST[:self.NB_CONTROL] ### generation parenting PARENT = [0] * self.NB_CONTROL ### survivor GRAPH_S = [] NET_S = [] GRAPH_IDX = list(order[:self.NB_EVOLUTION]) for i in GRAPH_IDX: GRAPH_S += [self.GRAPH_LIST[i]] if np.random.choice((True, False), 1, p=[0.9, 0.1]): NET_S += [self.SEEDER_LIST[self.NB_CONTROL:][i]] else: NET_S += [ pRNN(GRAPH_S[-1].NEURON_LIST, self.BATCH, self.IO[0], STACK=self.TIME_DEP) ] PARENT += [i + 1] ### mutation GRAPH_M = [] NET_M = [] for g, j in zip(GRAPH_S, GRAPH_IDX): for i in range(self.NB_EVOLUTION): GRAPH_M += [g.NEXT_GEN()] NET_M += [ pRNN(GRAPH_M[-1].NEURON_LIST, self.BATCH, self.IO[0], STACK=self.TIME_DEP) ] PARENT += [j + 1] ### news random GRAPH_N = [] NET_N = [] for n in range(self.NB_CHALLENGE): GRAPH_N += [GRAPH_EAT([self.IO, 1], None)] NET_N += [ pRNN(GRAPH_N[-1].NEURON_LIST, self.BATCH, self.IO[0], STACK=self.TIME_DEP) ] PARENT += [-1] ### update seeder list and stock info self.PARENTING += [np.array(PARENT)[None]] self.GRAPH_LIST = GRAPH_S + GRAPH_M + GRAPH_N self.SEEDER_LIST = NET_C + NET_S + NET_M + NET_N ### update model self.UPDATE_MODEL()
def fit(self, DATA, LABEL): # gen loop for o in tqdm(range(self.NB_GEN)): DATA, LABEL = shuffle(DATA, LABEL) P = (self.NB_GEN - o) / (2 * self.NB_GEN) # proba # compilation for n in range(self.NB_BATCH_P_GEN): data = torch.tensor(DATA[n * self.BATCH:(n + 1) * self.BATCH].reshape(-1, self.IO[0]), dtype=torch.float) target = torch.tensor(LABEL[n * self.BATCH:(n + 1) * self.BATCH]).type(torch.LongTensor) # seed for s in range(self.NB_SEEDER): self.OPTIM[s].zero_grad() output = self.SEEDER_LIST[s](data) self.LOSS[s] = self.CRITERION[s](output, target) self.LOSS[s].backward() self.OPTIM[s].step() # score loss self.SCORE_LOSS += [torch.tensor(self.LOSS).numpy()[None]] # score accuracy train_idx = np.random.randint(self.N, size=self.BATCH) dt_train = torch.tensor(DATA[train_idx].reshape((-1, self.IO[0])), dtype=torch.float) tg_train = torch.tensor(LABEL[train_idx]) max_idx = self.predict(dt_train, False) self.ACCUR += [ ((max_idx == tg_train).sum(1) / self.BATCH).numpy()[None] ] # evolution SCORE_LIST = ((1 - self.ACCUR[-1]).squeeze()) * ( self.SCORE_LOSS[-1].squeeze()) # square effect ## fitness (in accuracy test) ORDER = np.argsort(SCORE_LIST[self.NB_CONTROL:]).astype(int) # control CTRL = self.SEEDER_LIST[:self.NB_CONTROL] PARENT = [0] * self.NB_CONTROL # survivor (reset weight or not) BEST = [] B_G_ = [] B_I = [] for i in ORDER[:self.NB_SURVIVOR]: B_G_ += [self.GRAPH_LIST[i]] if np.random.choice((True, False), 1, p=[P, 1 - P]): BEST += [self.SEEDER_LIST[self.NB_CONTROL:][i]] else: BEST += [ pRNN(B_G_[-1].NEURON_LIST, self.BATCH, self.IO[0]) ] PARENT += [i + 1] B_I += [i + 1] # mutation MUTS = [] M_G_ = [] for g, j in zip(B_G_, B_I): for i in range(self.NB_CHILD): M_G_ += [g.NEXT_GEN()] MUTS += [ pRNN(M_G_[-1].NEURON_LIST, self.BATCH, self.IO[0]) ] PARENT += [j] # challenger NEWS = [] N_G_ = [] for n in range(self.NB_CHALLENGE): N_G_ += [GRAPH_EAT([self.IO, 1], None)] NEWS += [pRNN(N_G_[-1].NEURON_LIST, self.BATCH, self.IO[0])] PARENT += [-1] # update self.SEEDER_LIST = CTRL + BEST + MUTS + NEWS self.GRAPH_LIST = B_G_ + M_G_ + N_G_ self.PARENT += [np.array(PARENT)[None]] # generate loss-optimizer self.OPTIM = [ torch.optim.SGD(s.parameters(), lr=self.LR, momentum=self.MM) for s in self.SEEDER_LIST ] self.CRITERION = [ nn.CrossEntropyLoss() for n in range(self.NB_SEEDER) ] # compact evolution data self.SCORE_LOSS = np.concatenate(self.SCORE_LOSS).T self.ACCUR = np.concatenate(self.ACCUR).T self.PARENT = np.concatenate(self.PARENT).T # best loop weight optimization self.BEST_MODEL = pRNN(self.GRAPH_LIST[ORDER[0]].NEURON_LIST, self.BATCH, self.IO[0]) self.OPTIM_BEST = torch.optim.SGD(self.BEST_MODEL.parameters(), lr=self.LR, momentum=self.MM) for i in tqdm(range(self.NB_GEN)): DATA, LABEL = shuffle(DATA, LABEL) for n in range(self.NB_BATCH_P_GEN): data = torch.tensor(DATA[n * self.BATCH:(n + 1) * self.BATCH].reshape(-1, self.IO[0]), dtype=torch.float) target = torch.tensor(LABEL[n * self.BATCH:(n + 1) * self.BATCH]).type(torch.LongTensor) self.OPTIM_BEST.zero_grad() output = self.BEST_MODEL(data) self.LOSS_BEST = self.BEST_CRIT(output, target) self.LOSS_BEST.backward() self.OPTIM_BEST.step() # score loss self.BEST_SCORE_LOSS += [self.LOSS_BEST.detach().numpy()[None]] self.BEST_SCORE_LOSS = np.concatenate(self.BEST_SCORE_LOSS) # Extract learned weight self.BEST_WEIGHT = list(self.BEST_MODEL.parameters()) # save object if (not os.path.isdir('OUT')): os.makedirs('OUT') TIME = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') filehandler = open("OUT" + os.path.sep + "MODEL_" + TIME + ".obj", 'wb') pickle.dump(self, filehandler) filehandler.close()