Exemplo n.º 1
0
    def __init__(self, args):
        super(A3C_Learner, self).__init__()

        self.actor_id = args.actor_id
        self.game = args.game
        self.gamma = args.gamma
        self.entropy_regularisation_strength = args.entropy_regularisation_strength
        self.batch_size = args.batch_size
        self.checkpoint_interval = args.checkpoint_interval
        self.file_init_weights = args.file_init_weights
        self.name_save_file = args.name_save_file
        self.local_step = 0
        self.global_step = args.global_step
        self.barrier = args.barrier
        self.queue = args.queue
        self.max_global_steps = args.max_global_steps
        self.pipes = args.pipes
        self.thread_step_counter = 1
        self.nb_actions = args.nb_actions
        self.epsilon = args.epsilon
        self.num_actor_learners = args.num_actor_learners
        self.env = atari_environment.AtariEnvironment(args.game,
                                                      visualize=args.visualize)

        self.logger = logging_utils.getLogger(
            __name__ + ":Process {}".format(self.actor_id))
Exemplo n.º 2
0
	def __init__(self, conf):
		self.name = conf['name']
		self.nb_actions = conf['nb_actions']
		self.gamma = conf['gamma']
		self.actor_id = None if 'actor_id' not in conf else conf['actor_id']
		self.entropy_regularisation_strength = conf['entropy_regularisation_strength']
		self.build_network()
		self.create_assign_op_weights()
		self.create_op_loss()

		self._tf_session = tf.Session()

		self._tf_session.run(tf.initialize_all_variables())

		name_logger = __name__
		if self.actor_id != None:
			name_logger += ":Process {}".format(self.actor_id)
		self.logger = logging_utils.getLogger(name_logger)

		if self.actor_id == 0:
			self.saver = tf.train.Saver(max_to_keep=10)
		
		self.writer = tf.summary.FileWriter('./tf_logs/Process_{}'.format(self.actor_id), graph=self._tf_session.graph_def)

		self._tf_summary_total_episode_reward = tf.placeholder(tf.float32, [])
		self._tf_summary_len_episode = tf.placeholder(tf.float32, [])

		tf.summary.scalar("total_episode_reward", self._tf_summary_total_episode_reward)
		tf.summary.scalar("len_episode", self._tf_summary_len_episode)

		self._tf_summary_op = tf.merge_all_summaries()
Exemplo n.º 3
0
import sys
import os
import numpy as np
from threading import Thread, Lock
import tensorflow as tf
import logging_utils
import time
from emulator import get_num_actions
import importlib
from q_network import *
from policy_v_network import *
from value_based_actor_learner import *
from policy_based_actor_learner import *
import math

logger = logging_utils.getLogger('main')


def generate_epsilon():
    """ Generate lower limit for decaying epsilon. """
    epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]}
    return np.random.choice(epsilon['limits'], p=epsilon['probs'])


def check_or_create_checkpoint_dir(checkpoint_dir):
    """ Create checkpoint directory if it does not exist """
    if not os.path.exists(checkpoint_dir):
        try:
            os.makedirs(checkpoint_dir)
        except OSError:
            pass
Exemplo n.º 4
0
from skimage.transform import resize
from skimage.color import rgb2gray
import numpy as np
import atari_py
import logging_utils
logger = logging_utils.getLogger('emulator')

import matplotlib.pyplot as plt

IMG_SCALE = 255.0
IMG_SIZE_X = 84
IMG_SIZE_Y = 84
NR_IMAGES = 4


def get_num_actions(rom_path, rom_name):
    #import os
    #print os.path.abspath(atari_py.__file__)
    game_path = atari_py.get_game_path(rom_name)
    ale = atari_py.ALEInterface()
    ale.loadROM(game_path)
    return ale.getMinimalActionSet()

class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episode = False):
        
        self.ale = atari_py.ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
Exemplo n.º 5
0
import numpy as np
from multiprocessing import Process
import logging_utils
import tensorflow as tf
import ctypes
import pyximport
pyximport.install()
from hogupdatemv import copy, apply_grads_mom_rmsprop, apply_grads_adam
import time
import utils

CHECKPOINT_INTERVAL = 500000

logger = logging_utils.getLogger('actor_learner')


def generate_final_epsilon():
    """ Generate lower limit for decaying epsilon. """
    epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]}
    return np.random.choice(epsilon['limits'], p=epsilon['probs'])


class ActorLearner(Process):
    def __init__(self, args):

        super(ActorLearner, self).__init__()

        self.summ_base_dir = args.summ_base_dir

        self.local_step = 0
        self.global_step = args.global_step
Exemplo n.º 6
0
import argparse
from multiprocessing import Process, Value, Barrier, Queue, Pipe
import numpy as np
from A3C_Learner import A3C_Learner
import time
import logging_utils
import atari_environment

logger = logging_utils.getLogger(__name__)

def main(args):
	logger.debug("CONFIGURATION : {}".format(args))

	#Global shared counter alloated in the shared memory. i = signed int
	args.global_step = Value('i', 0)

	#Barrier used to synchronize the threads
	args.barrier = Barrier(args.num_actor_learners)

	#Thread safe queue used to communicate between the threads
	args.queue = Queue()

	#Number of actions available at each steps of the game
	args.nb_actions = atari_environment.get_num_actions(args.game)

	if args.visualize == 0:
		args.visualize = False
	else:
		args.visualize = True
	
	actor_learners = []
Exemplo n.º 7
0
import numpy as np
from multiprocessing import Process 
import logging_utils
import tensorflow as tf
import ctypes
import pyximport; pyximport.install()
from hogupdatemv import copy, apply_grads_mom_rmsprop, apply_grads_adam
import time
import utils

CHECKPOINT_INTERVAL = 500000
 
logger = logging_utils.getLogger('actor_learner')

def generate_final_epsilon():
    """ Generate lower limit for decaying epsilon. """
    epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]}
    return np.random.choice(epsilon['limits'], p=epsilon['probs']) 

class ActorLearner(Process):
    
    def __init__(self, args):
        
        super(ActorLearner, self).__init__()
       
        self.summ_base_dir = args.summ_base_dir
        
        self.local_step = 0
        self.global_step = args.global_step

        self.actor_id = args.actor_id
Exemplo n.º 8
0
import sys
import os
import numpy as np
from threading import Thread, Lock
import tensorflow as tf
import logging_utils
import time
from emulator import get_num_actions
import importlib
from q_network import *
from policy_v_network import *
from value_based_actor_learner import *
from policy_based_actor_learner import *
import math

logger = logging_utils.getLogger('main')

def generate_epsilon():
    """ Generate lower limit for decaying epsilon. """
    epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]}
    return np.random.choice(epsilon['limits'], p=epsilon['probs']) 

def check_or_create_checkpoint_dir(checkpoint_dir):
    """ Create checkpoint directory if it does not exist """
    if not os.path.exists(checkpoint_dir):
        try:
            os.makedirs(checkpoint_dir)
        except OSError:
            pass

def restore_vars(saver, sess, game_name, actor_learner_type, 
Exemplo n.º 9
0
# encoding: utf-8
import abc
import os
from logging_utils import getLogger, SentinelBuilder

from ._compat import with_metaclass

logger = getLogger(__name__)
sentinel = SentinelBuilder(logger)


class Client(with_metaclass(abc.ABCMeta)):

    def is_alive(self, pid):
        try:
            with logger.context(pid=pid), sentinel('Checking pid owner liveness'):
                return self._is_alive(pid)
        except:
            return False

    @abc.abstractmethod
    def _is_alive(self, pid):
        pass # pragma: no cover

    def terminate(self, pid):
        with logger.context(pid=pid), sentinel('Terminating pid owner'):
            return self._terminate(pid)

    @abc.abstractmethod
    def _terminate(self, pid):
        pass # pragma: no cover