Exemplo n.º 1
0
 def __init__(self,
              IO,
              SAMPLE_SIZE,
              BATCH_SIZE,
              EPOCH,
              NB_GEN,
              NB_SEEDER,
              RNN=False,
              DATA_XPLT=0.5,
              LEARNING_RATE=1e-6,
              MOMENTUM=0.5):
     # Parameter
     self.IO = IO
     self.N = SAMPLE_SIZE
     self.BATCH = BATCH_SIZE
     self.EPOCH = EPOCH
     self.NB_GEN = NB_GEN
     self.NB_SEEDER = NB_SEEDER**2
     self.LR = LEARNING_RATE
     self.MM = MOMENTUM
     # generate first ENN step
     self.GRAPH_LIST = [
         GRAPH_EAT([self.IO, 1], None) for n in range(self.NB_SEEDER - 1)
     ]
     self.SEEDER_LIST = [CTRL_NET(self.IO)]
     for g in self.GRAPH_LIST:
         NEURON_LIST = g.NEURON_LIST
         self.SEEDER_LIST += [
             pRNN(NEURON_LIST, self.BATCH, self.IO[0], STACK=RNN)
         ]
     self.PARENT = [(-1 * np.ones(self.NB_SEEDER))[None]]
     # best seeder model
     self.BEST_MODEL = 0
     self.OPTIM_BEST = 0
     self.BEST_CRIT = nn.CrossEntropyLoss()
     self.LOSS_BEST = 0
     self.BEST_WEIGHT = []
     # generate loss-optimizer
     self.OPTIM = [
         torch.optim.SGD(s.parameters(),
                         lr=LEARNING_RATE,
                         momentum=MOMENTUM) for s in self.SEEDER_LIST
     ]
     self.CRITERION = [nn.CrossEntropyLoss() for n in range(self.NB_SEEDER)]
     self.LOSS = self.NB_SEEDER * [0]
     # calculate nb batch per generation
     self.NB_BATCH_P_GEN = int(
         (DATA_XPLT * self.N * self.EPOCH) / (self.NB_GEN * self.BATCH))
     # selection and accuracy
     self.SCORE_LOSS = []
     self.ACCUR = []  # in %
     self.BEST_SCORE_LOSS = []
     # for next gen (n-plicat) and control group
     self.NB_CONTROL = 1  # always (preference)
     self.NB_CHALLENGE = int(np.sqrt(self.NB_SEEDER) - self.NB_CONTROL)
     self.NB_SURVIVOR = self.NB_CHALLENGE  # square completion
     self.NB_CHILD = int(np.sqrt(self.NB_SEEDER) - 1)  # FITNESS
Exemplo n.º 2
0
    def __init__(self, *arg, MODEL = None, CTRL=False, NET = None, COOR = None):
        self.P_MIN = 1
        # Parameter
        self.ARG = arg
        self.IO = arg[0] # image cells, action
        self.NB_P_GEN = arg[1]
        self.batch_size = arg[2]
        self.N_TIME = arg[4]
        self.N_CYCLE = arg[5]
        ## Init
        if CTRL :
            self.NET = GRAPH_EAT(None, self.CONTROL_NETWORK(self.IO))
            MODEL = CTRL_NET(self.IO)
        elif NET == None :
            self.NET = GRAPH_EAT([self.IO, self.P_MIN], None)
        else :
            self.NET = NET
        self.NEURON_LIST = self.NET.NEURON_LIST
        if (MODEL == None) and not(CTRL) :
            self.MODEL = pRNN(self.NEURON_LIST, self.batch_size, self.IO[0], STACK=True)
        else :
            self.MODEL = MODEL
        # nn optimiser
        self.GAMMA = 0.9
        #self.optimizer = torch.optim.Adam(self.MODEL.parameters())
        self.optimizer = torch.optim.Adam(self.MODEL.parameters()) #torch.optim.SGD(self.MODEL.parameters(), lr=1e-6, momentum=0.9)
        self.criterion = nn.SmoothL1Loss() # HubberLoss, nn.MSELoss() # because not classification (same comparaison [batch] -> [batch])
        #self.criterion = nn.NLLLoss(reduction='sum') #negative log likelihood loss ([batch,Nout]->[batch])
        self.loss = None
        self.LOSS = []

        ## IO Coordinate
        X_A = np.mgrid[-1:2,-1:2].reshape((2,-1)).T
        X_B = np.array([[0,0],[0,2],[0,4],[2,0],[2,4],[4,0],[4,2],[4,4]])-[2,2]
        self.X,self.Y = np.concatenate((X_A,X_B)), np.array([[0,1],[1,2],[2,0]])-[1,1]
        ## Data sample (memory : 'old_state', 'action', 'new_state', 'reward', 'terminal')
        self.MEMORY = [[],[],[],[],[]]
        self.MEMORY_ = None
        ## Players
        self.prev_state = None
Exemplo n.º 3
0
 def __init__(self,
              arg,
              NAMED_MEMORY=None,
              TYPE="class",
              TIME_DEPENDANT=False):
     # parameter
     self.IO = arg[0]
     self.BATCH = arg[1]
     self.NB_GEN = arg[2]
     self.NB_SEEDER = arg[3]
     self.NB_EPISOD = arg[4]
     self.ALPHA = arg[5]  # 1-% of predict (not random step)
     self.NB_E_P_G = int(self.NB_EPISOD / self.NB_GEN)
     self.TIME_DEP = TIME_DEPENDANT
     self.TYPE = TYPE
     self.NAMED_M = NAMED_MEMORY
     # generate first ENN model
     self.GRAPH_LIST = [
         GRAPH_EAT([self.IO, 1], None) for n in range(self.NB_SEEDER - 1)
     ]
     self.SEEDER_LIST = [CTRL_NET(self.IO)]
     for g in self.GRAPH_LIST:
         NEURON_LIST = g.NEURON_LIST
         self.SEEDER_LIST += [
             pRNN(NEURON_LIST, self.BATCH, self.IO[0], STACK=self.TIME_DEP)
         ]
     # training parameter
     self.NEURON_LIST = []
     self.UPDATE_MODEL()
     # selection
     self.loss = pd.DataFrame(
         columns=['GEN', 'IDX_SEED', 'EPISOD', 'N_BATCH', 'LOSS_VALUES'])
     self.supp_param = None
     # evolution param
     self.NB_CONTROL = int(np.power(self.NB_SEEDER, 1. / 4))
     self.NB_EVOLUTION = int(np.sqrt(self.NB_SEEDER) -
                             1)  # square completion
     self.NB_CHALLENGE = int(self.NB_SEEDER -
                             (self.NB_EVOLUTION *
                              (self.NB_EVOLUTION + 1) + self.NB_CONTROL))
     # evolution variable
     self.PARENTING = [-1 * np.ones(self.NB_SEEDER)[None]]
     self.PARENTING[0][0][:self.NB_CONTROL] = 0
Exemplo n.º 4
0
                self.trace[t] = torch.cat([tt[t] for tt in trace])
        else :
            # Only adapted for SGD, if mini-batch, pseudo-rnn perturbation
            idx_end, self.trace[:-1] = self.graph2net(BATCH_)
            # save for t+1
            for t in range(len(self.trace)):
                self.h[t][BATCH_] = self.trace[t][BATCH_].detach()
        # output probs
        return self.trace[idx_end]

if __name__ == '__main__' :
    IO = (17,3)
    BATCH = 16
    # graph part
    from GRAPH_EAT import GRAPH_EAT
    NET = GRAPH_EAT([IO, 1], None)
    print(NET.NEURON_LIST)
    for BOOL in [False,True] :
        # networks
        model = pRNN(NET.NEURON_LIST, BATCH, IO[0], STACK=BOOL)
        # data test
        tensor_in = torch.randn(BATCH,IO[0])
        tensor_out = model(tensor_in[:5])
        # print
        print('\n' ,tensor_out.shape,model.h[0].shape)
        # init train
        OPTIM = torch.optim.Adam(model.parameters()) 
        CRITERION = nn.CrossEntropyLoss()
        # step of train
        for i in range(5):
            print(i)
Exemplo n.º 5
0
class Q_AGENT():
    def __init__(self, *arg, MODEL = None, CTRL=False, NET = None, COOR = None):
        self.P_MIN = 1
        # Parameter
        self.ARG = arg
        self.IO = arg[0] # image cells, action
        self.NB_P_GEN = arg[1]
        self.batch_size = arg[2]
        self.N_TIME = arg[4]
        self.N_CYCLE = arg[5]
        ## Init
        if CTRL :
            self.NET = GRAPH_EAT(None, self.CONTROL_NETWORK(self.IO))
            MODEL = CTRL_NET(self.IO)
        elif NET == None :
            self.NET = GRAPH_EAT([self.IO, self.P_MIN], None)
        else :
            self.NET = NET
        self.NEURON_LIST = self.NET.NEURON_LIST
        if (MODEL == None) and not(CTRL) :
            self.MODEL = pRNN(self.NEURON_LIST, self.batch_size, self.IO[0], STACK=True)
        else :
            self.MODEL = MODEL
        # nn optimiser
        self.GAMMA = 0.9
        #self.optimizer = torch.optim.Adam(self.MODEL.parameters())
        self.optimizer = torch.optim.Adam(self.MODEL.parameters()) #torch.optim.SGD(self.MODEL.parameters(), lr=1e-6, momentum=0.9)
        self.criterion = nn.SmoothL1Loss() # HubberLoss, nn.MSELoss() # because not classification (same comparaison [batch] -> [batch])
        #self.criterion = nn.NLLLoss(reduction='sum') #negative log likelihood loss ([batch,Nout]->[batch])
        self.loss = None
        self.LOSS = []

        ## IO Coordinate
        X_A = np.mgrid[-1:2,-1:2].reshape((2,-1)).T
        X_B = np.array([[0,0],[0,2],[0,4],[2,0],[2,4],[4,0],[4,2],[4,4]])-[2,2]
        self.X,self.Y = np.concatenate((X_A,X_B)), np.array([[0,1],[1,2],[2,0]])-[1,1]
        ## Data sample (memory : 'old_state', 'action', 'new_state', 'reward', 'terminal')
        self.MEMORY = [[],[],[],[],[]]
        self.MEMORY_ = None
        ## Players
        self.prev_state = None
    
    def INIT_ENV(self, ENV_INPUT) :
        self.prev_state = ENV_INPUT.FIRST_STEP_SET(0)

    def PARTY(self, ENV_INPUT):
        # reset game environement (for each batch ? important) 
        for n in range(self.N_CYCLE):
            self.prev_state = ENV_INPUT.FIRST_STEP_SET(n)
            for t in range(self.N_TIME):
                # loop game
                for i in range(self.batch_size):
                    action = self.ACTION(self.prev_state)
                    new_state, reward, DONE = ENV_INPUT.STEP(action)
                    # Memory update
                    if i == self.batch_size-1 : DONE = True
                    self.SEQUENCING(self.prev_state,action,new_state,reward,DONE)
                    # n+1
                    self.prev_state = new_state.copy()
                    # escape loop
                    if DONE == True : break                
                # Reinforcement learning
                self.OPTIM()

    ## Action Exploration/Exploitation Dilemna
    def ACTION(self, Input) :
        img_in = torch.tensor(Input, dtype=torch.float)
        # actor-critic (old version)
        action_probs = self.MODEL(img_in)
        # exploration-exploitation dilemna
        DILEMNA = np.squeeze(action_probs.detach().numpy())
        if DILEMNA.sum() == 0 or str(DILEMNA.sum()) == 'nan' :
            next_action = np.random.randint(self.IO[1])
        else :
            if DILEMNA.min() < 0 : DILEMNA = DILEMNA-DILEMNA.min() # n-1 choice restriction
            ## add dispersion (in q-table, values is near)
            order = np.exp(np.argsort(DILEMNA)+1)
            # probability
            p_norm = order/order.sum()
            #print(order, p_norm)
            next_action = np.random.choice(self.IO[1], p=p_norm)
        return next_action
    
    ## Memory sequencing
    def SEQUENCING(self, prev_state,action,new_state,reward,DONE):
        self.MEMORY[0] += [prev_state]
        self.MEMORY[1] += [action]
        self.MEMORY[2] += [new_state]
        self.MEMORY[3] += [reward]
        self.MEMORY[4] += [DONE]
        if DONE :
            self.MEMORY[0] = torch.tensor(np.concatenate(self.MEMORY[0]), dtype=torch.float)
            self.MEMORY[1] = torch.tensor(np.array(self.MEMORY[1]),  dtype=torch.long).unsqueeze(1)
            self.MEMORY[2] = torch.tensor(np.concatenate(self.MEMORY[2]), dtype=torch.float)
            self.MEMORY[3] = torch.tensor(np.array(self.MEMORY[3]))
            self.MEMORY[4] = torch.tensor(np.array(self.MEMORY[4]), dtype=torch.int)
    
    ## Training Q-Table
    def OPTIM(self) :
        # extract info
        old_state, action, new_state, reward, DONE = self.MEMORY
        # actor proba
        actor = self.MODEL(old_state)
        # Compute predicted Q-values for each action
        pred_q_values_batch = actor.gather(1, action)
        pred_q_values_next  = self.MODEL(new_state)
        # Compute targeted Q-value for action performed
        target_q_values_batch = (reward+(1-DONE)*self.GAMMA*torch.max(pred_q_values_next, 1)[0]).detach().unsqueeze(1)
        self.y = [pred_q_values_batch,target_q_values_batch]
        #[print(i,self.y[i].shape) for i in range(2)]
        #print(self.y[1])
        # zero the parameter gradients
        self.MODEL.zero_grad()
        # Compute the loss
        self.loss = self.criterion(pred_q_values_batch,target_q_values_batch)
        # Do backward pass
        self.loss.backward()
        self.optimizer.step()
        # save loss
        self.LOSS += [self.loss.item()]
        # reset memory
        self.MEMORY_ = self.MEMORY
        self.MEMORY = [[],[],[],[],[]]
    
    ## reset object
    def RESET(self, PROBA):
        GRAPH = self.NET.NEXT_GEN(-1)
        XY_TUPLE = (self.X,self.Y)
        if np.random.choice((False,True), 1, p=[PROBA,1-PROBA])[0]:
            return Q_AGENT(*self.ARG, NET = GRAPH, COOR = XY_TUPLE)
        else :
            return Q_AGENT(*self.ARG, MODEL = self.MODEL, NET = GRAPH, COOR = XY_TUPLE)
    
    ## mutation
    def MUTATION(self, MUT = None):
        # mutate graph
        GRAPH = self.NET.NEXT_GEN(MUT)
        return Q_AGENT(*self.ARG, NET = GRAPH)
    
    ## control group
    def CONTROL_NETWORK(self, IO) :
        """
        For Lyfe problem : not generalized
        """
        # init number of connection per layer
        """NB_H_LAYER = 2"""
        """NB_C_P_LAYER = int(np.sqrt(self.IO[0]) + np.sqrt(self.IO[1]))"""
        # network equivalence --> passer à 17 ?
        NET = np.array([[-1, 3, 4, 32, [[2,0],[2,1],[2,2],[2,3]]],
                        [ 1, 4, IO[0], 10, [[0,i] for i in range(IO[0])]],
                        [ 2, 4, 4, 20, [[1,0],[1,1],[1,2],[1,3]]]])
        # Listing
        LIST_C = np.array([[0,0,i] for i in range(IO[0])]+
                          [[10,1,0],[10,1,1],[10,1,2],[10,1,3],
                          [20,2,0],[20,2,1],[20,2,2],[20,2,3]])
        return [IO, NET.copy(), LIST_C.copy()]
Exemplo n.º 6
0
 def SELECTION(self, GEN, supp_factor=1):
     # sup median loss selection
     TailLoss = np.ones(self.NB_SEEDER)
     # extract data
     sub_loss = self.loss[self.loss.GEN == GEN]
     # verify if you have SDG (only evolution selection)
     if sub_loss.size > 0:
         gb_seed = sub_loss.groupby('IDX_SEED')
         # sup median loss selection
         for i, g in gb_seed:
             if self.ALPHA != 1:
                 Tail_eps = g.EPISOD.min() + (g.EPISOD.max() -
                                              g.EPISOD.min()) * self.ALPHA
             else:
                 Tail_eps = g.EPISOD.median()
             TailLoss[int(i)] = g[g.EPISOD > Tail_eps].LOSS_VALUES.mean()
         # normalization
         relativeLOSS = (TailLoss - TailLoss.min()) / (TailLoss.max() -
                                                       TailLoss.min())
     else:
         relativeLOSS = TailLoss
     # coeffect, belong to [0,3]
     score = supp_factor + supp_factor * relativeLOSS + relativeLOSS
     # order
     order = np.argsort(score[self.NB_CONTROL:])
     ### stock control network
     NET_C = self.SEEDER_LIST[:self.NB_CONTROL]
     ### generation parenting
     PARENT = [0] * self.NB_CONTROL
     ### survivor
     GRAPH_S = []
     NET_S = []
     GRAPH_IDX = list(order[:self.NB_EVOLUTION])
     for i in GRAPH_IDX:
         GRAPH_S += [self.GRAPH_LIST[i]]
         if np.random.choice((True, False), 1, p=[0.9, 0.1]):
             NET_S += [self.SEEDER_LIST[self.NB_CONTROL:][i]]
         else:
             NET_S += [
                 pRNN(GRAPH_S[-1].NEURON_LIST,
                      self.BATCH,
                      self.IO[0],
                      STACK=self.TIME_DEP)
             ]
         PARENT += [i + 1]
     ### mutation
     GRAPH_M = []
     NET_M = []
     for g, j in zip(GRAPH_S, GRAPH_IDX):
         for i in range(self.NB_EVOLUTION):
             GRAPH_M += [g.NEXT_GEN()]
             NET_M += [
                 pRNN(GRAPH_M[-1].NEURON_LIST,
                      self.BATCH,
                      self.IO[0],
                      STACK=self.TIME_DEP)
             ]
             PARENT += [j + 1]
     ### news random
     GRAPH_N = []
     NET_N = []
     for n in range(self.NB_CHALLENGE):
         GRAPH_N += [GRAPH_EAT([self.IO, 1], None)]
         NET_N += [
             pRNN(GRAPH_N[-1].NEURON_LIST,
                  self.BATCH,
                  self.IO[0],
                  STACK=self.TIME_DEP)
         ]
         PARENT += [-1]
     ### update seeder list and stock info
     self.PARENTING += [np.array(PARENT)[None]]
     self.GRAPH_LIST = GRAPH_S + GRAPH_M + GRAPH_N
     self.SEEDER_LIST = NET_C + NET_S + NET_M + NET_N
     ### update model
     self.UPDATE_MODEL()
Exemplo n.º 7
0
 def fit(self, DATA, LABEL):
     # gen loop
     for o in tqdm(range(self.NB_GEN)):
         DATA, LABEL = shuffle(DATA, LABEL)
         P = (self.NB_GEN - o) / (2 * self.NB_GEN)  # proba
         # compilation
         for n in range(self.NB_BATCH_P_GEN):
             data = torch.tensor(DATA[n * self.BATCH:(n + 1) *
                                      self.BATCH].reshape(-1, self.IO[0]),
                                 dtype=torch.float)
             target = torch.tensor(LABEL[n * self.BATCH:(n + 1) *
                                         self.BATCH]).type(torch.LongTensor)
             # seed
             for s in range(self.NB_SEEDER):
                 self.OPTIM[s].zero_grad()
                 output = self.SEEDER_LIST[s](data)
                 self.LOSS[s] = self.CRITERION[s](output, target)
                 self.LOSS[s].backward()
                 self.OPTIM[s].step()
             # score loss
             self.SCORE_LOSS += [torch.tensor(self.LOSS).numpy()[None]]
         # score accuracy
         train_idx = np.random.randint(self.N, size=self.BATCH)
         dt_train = torch.tensor(DATA[train_idx].reshape((-1, self.IO[0])),
                                 dtype=torch.float)
         tg_train = torch.tensor(LABEL[train_idx])
         max_idx = self.predict(dt_train, False)
         self.ACCUR += [
             ((max_idx == tg_train).sum(1) / self.BATCH).numpy()[None]
         ]
         # evolution
         SCORE_LIST = ((1 - self.ACCUR[-1]).squeeze()) * (
             self.SCORE_LOSS[-1].squeeze())  # square effect
         ## fitness (in accuracy test)
         ORDER = np.argsort(SCORE_LIST[self.NB_CONTROL:]).astype(int)
         # control
         CTRL = self.SEEDER_LIST[:self.NB_CONTROL]
         PARENT = [0] * self.NB_CONTROL
         # survivor (reset weight or not)
         BEST = []
         B_G_ = []
         B_I = []
         for i in ORDER[:self.NB_SURVIVOR]:
             B_G_ += [self.GRAPH_LIST[i]]
             if np.random.choice((True, False), 1, p=[P, 1 - P]):
                 BEST += [self.SEEDER_LIST[self.NB_CONTROL:][i]]
             else:
                 BEST += [
                     pRNN(B_G_[-1].NEURON_LIST, self.BATCH, self.IO[0])
                 ]
             PARENT += [i + 1]
             B_I += [i + 1]
         # mutation
         MUTS = []
         M_G_ = []
         for g, j in zip(B_G_, B_I):
             for i in range(self.NB_CHILD):
                 M_G_ += [g.NEXT_GEN()]
                 MUTS += [
                     pRNN(M_G_[-1].NEURON_LIST, self.BATCH, self.IO[0])
                 ]
                 PARENT += [j]
         # challenger
         NEWS = []
         N_G_ = []
         for n in range(self.NB_CHALLENGE):
             N_G_ += [GRAPH_EAT([self.IO, 1], None)]
             NEWS += [pRNN(N_G_[-1].NEURON_LIST, self.BATCH, self.IO[0])]
             PARENT += [-1]
         # update
         self.SEEDER_LIST = CTRL + BEST + MUTS + NEWS
         self.GRAPH_LIST = B_G_ + M_G_ + N_G_
         self.PARENT += [np.array(PARENT)[None]]
         # generate loss-optimizer
         self.OPTIM = [
             torch.optim.SGD(s.parameters(), lr=self.LR, momentum=self.MM)
             for s in self.SEEDER_LIST
         ]
         self.CRITERION = [
             nn.CrossEntropyLoss() for n in range(self.NB_SEEDER)
         ]
     # compact evolution data
     self.SCORE_LOSS = np.concatenate(self.SCORE_LOSS).T
     self.ACCUR = np.concatenate(self.ACCUR).T
     self.PARENT = np.concatenate(self.PARENT).T
     # best loop weight optimization
     self.BEST_MODEL = pRNN(self.GRAPH_LIST[ORDER[0]].NEURON_LIST,
                            self.BATCH, self.IO[0])
     self.OPTIM_BEST = torch.optim.SGD(self.BEST_MODEL.parameters(),
                                       lr=self.LR,
                                       momentum=self.MM)
     for i in tqdm(range(self.NB_GEN)):
         DATA, LABEL = shuffle(DATA, LABEL)
         for n in range(self.NB_BATCH_P_GEN):
             data = torch.tensor(DATA[n * self.BATCH:(n + 1) *
                                      self.BATCH].reshape(-1, self.IO[0]),
                                 dtype=torch.float)
             target = torch.tensor(LABEL[n * self.BATCH:(n + 1) *
                                         self.BATCH]).type(torch.LongTensor)
             self.OPTIM_BEST.zero_grad()
             output = self.BEST_MODEL(data)
             self.LOSS_BEST = self.BEST_CRIT(output, target)
             self.LOSS_BEST.backward()
             self.OPTIM_BEST.step()
             # score loss
             self.BEST_SCORE_LOSS += [self.LOSS_BEST.detach().numpy()[None]]
     self.BEST_SCORE_LOSS = np.concatenate(self.BEST_SCORE_LOSS)
     # Extract learned weight
     self.BEST_WEIGHT = list(self.BEST_MODEL.parameters())
     # save object
     if (not os.path.isdir('OUT')): os.makedirs('OUT')
     TIME = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
     filehandler = open("OUT" + os.path.sep + "MODEL_" + TIME + ".obj",
                        'wb')
     pickle.dump(self, filehandler)
     filehandler.close()