Пример #1
0
 def __init__(self, mode):
     self.mode = mode
     cu.mem('Reinforcement Learning Started')
     self.environment = RegionFilteringEnvironment(
         config.get(mode + 'Database'), mode)
     self.controller = QNetwork()
     cu.mem('QNetwork controller created')
     self.learner = None
     self.agent = RegionFilteringAgent(self.controller, self.learner)
     self.task = RegionFilteringTask(self.environment,
                                     config.get(mode + 'GroundTruth'))
     self.experiment = Experiment(self.task, self.agent)
 def __init__(self, mode):
   self.mode = mode
   cu.mem('Reinforcement Learning Started')
   self.environment = RegionFilteringEnvironment(config.get(mode+'Database'), mode)
   self.controller = QNetwork()
   cu.mem('QNetwork controller created')
   self.learner = None
   self.agent = RegionFilteringAgent(self.controller, self.learner)
   self.task = RegionFilteringTask(self.environment, config.get(mode+'GroundTruth'))
   self.experiment = Experiment(self.task, self.agent)
Пример #3
0
class ReinforcementLearningRunner():
    def __init__(self, mode):
        self.mode = mode
        cu.mem('Reinforcement Learning Started')
        self.environment = RegionFilteringEnvironment(
            config.get(mode + 'Database'), mode)
        self.controller = QNetwork()
        cu.mem('QNetwork controller created')
        self.learner = None
        self.agent = RegionFilteringAgent(self.controller, self.learner)
        self.task = RegionFilteringTask(self.environment,
                                        config.get(mode + 'GroundTruth'))
        self.experiment = Experiment(self.task, self.agent)

    def runEpoch(self, interactions, maxImgs):
        img = 0
        s = cu.tic()
        while img < maxImgs:
            self.experiment.doInteractions(interactions)
            self.agent.learn()
            self.agent.reset()
            self.environment.loadNextEpisode()
            img += 1
        s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)

    def run(self):
        if self.mode == 'train':
            self.agent.persistMemory = True
            self.agent.startReplayMemory(len(self.environment.db.images),
                                         config.geti('trainInteractions'),
                                         config.geti('stateFeatures'))
            self.train()
        elif self.mode == 'test':
            self.agent.persistMemory = False
            self.test()

    def train(self):
        interactions = config.geti('trainInteractions')
        minEpsilon = config.getf('minTrainingEpsilon')
        epochSize = len(self.environment.db.images) / 2
        epsilon = 1.0
        self.controller.setEpsilonGreedy(epsilon)
        print 'Epoch 0: Exploration'
        self.runEpoch(interactions, len(self.environment.db.images))
        self.learner = QLearning()
        self.agent.learner = self.learner
        epoch = 1
        egEpochs = config.geti('epsilonGreedyEpochs')
        while epoch <= egEpochs:
            epsilon = epsilon - (1.0 - minEpsilon) / float(egEpochs)
            if epsilon < minEpsilon: epsilon = minEpsilon
            self.controller.setEpsilonGreedy(epsilon)
            print 'Epoch', epoch, '(epsilon-greedy:{:5.3f})'.format(epsilon)
            self.runEpoch(interactions, epochSize)
            epoch += 1
        epoch = 1
        maxEpochs = config.geti('exploitLearningEpochs')
        while epoch <= maxEpochs:
            print 'Epoch', epoch + egEpochs, '(exploitation mode: epsilon={:5.3f})'.format(
                epsilon)
            self.runEpoch(interactions, epochSize)
            epoch += 1

    def test(self):
        interactions = config.geti('testInteractions')
        self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
        self.runEpoch(interactions, len(self.environment.db.images))
class ReinforcementLearningRunner():

  def __init__(self, mode):
    self.mode = mode
    cu.mem('Reinforcement Learning Started')
    self.environment = RegionFilteringEnvironment(config.get(mode+'Database'), mode)
    self.controller = QNetwork()
    cu.mem('QNetwork controller created')
    self.learner = None
    self.agent = RegionFilteringAgent(self.controller, self.learner)
    self.task = RegionFilteringTask(self.environment, config.get(mode+'GroundTruth'))
    self.experiment = Experiment(self.task, self.agent)

  def runEpoch(self, interactions, maxImgs):
    img = 0
    s = cu.tic()
    while img < maxImgs:
      self.experiment.doInteractions(interactions)
      self.agent.learn()
      self.agent.reset()
      self.environment.loadNextEpisode()
      img += 1
    s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)

  def run(self):
    if self.mode == 'train':
      self.agent.persistMemory = True
      self.agent.startReplayMemory(len(self.environment.db.images), config.geti('trainInteractions'), config.geti('stateFeatures'))
      self.train()
    elif self.mode == 'test':
      self.agent.persistMemory = False
      self.test()

  def train(self):
    interactions = config.geti('trainInteractions')
    minEpsilon = config.getf('minTrainingEpsilon')
    epochSize = len(self.environment.db.images)/2
    epsilon = 1.0
    self.controller.setEpsilonGreedy(epsilon)
    print 'Epoch 0: Exploration'
    self.runEpoch(interactions, len(self.environment.db.images))
    self.learner = QLearning()
    self.agent.learner = self.learner
    epoch = 1
    egEpochs = config.geti('epsilonGreedyEpochs')
    while epoch <= egEpochs:
      epsilon = epsilon - (1.0-minEpsilon)/float(egEpochs) 
      if epsilon < minEpsilon: epsilon = minEpsilon
      self.controller.setEpsilonGreedy(epsilon)
      print 'Epoch',epoch ,'(epsilon-greedy:{:5.3f})'.format(epsilon)
      self.runEpoch(interactions, epochSize)
      epoch += 1
    epoch = 1
    maxEpochs = config.geti('exploitLearningEpochs')
    while epoch <= maxEpochs:
      print 'Epoch',epoch+egEpochs,'(exploitation mode: epsilon={:5.3f})'.format(epsilon)
      self.runEpoch(interactions, epochSize)
      epoch += 1

  def test(self):
    interactions = config.geti('testInteractions')
    self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
    self.runEpoch(interactions, len(self.environment.db.images))