Exemplo n.º 1
0
    def __init__(self, load=None, **kwargs):
        if load is None:
            args = {}
        else:
            args = util.load_params(load, 'train')

        util.update(args, mode=RL.Mode.TRAIN, **kwargs)
        print(args)
        Default.__init__(self, **args)

        if self.init:
            self.model.init()
            self.model.save()
        else:
            self.model.restore()

        context = zmq.Context.instance()

        self.experience_socket = context.socket(zmq.PULL)
        experience_addr = "tcp://%s:%d" % (
            self.dump, util.port(self.model.name + "/experience"))
        self.experience_socket.bind(experience_addr)

        self.params_socket = context.socket(zmq.PUB)
        params_addr = "tcp://%s:%d" % (self.dump,
                                       util.port(self.model.name + "/params"))
        print("Binding params socket to", params_addr)
        self.params_socket.bind(params_addr)

        self.sweep_size = self.batches * self.batch_size
        print("Sweep size", self.sweep_size)

        self.buffer = util.CircularQueue(self.sweep_size)

        self.last_save = time.time()
Exemplo n.º 2
0
  def __init__(self, load=None, **kwargs):
    if load is None:
      args = {}
    else:
      args = util.load_params(load, 'train')
    
    util.update(args,
        mode=RL.Mode.TRAIN,
        **kwargs
    )
    util.pp.pprint(args)
    Default.__init__(self, **args)

    if self.init:
      self.rl.init()
      self.rl.save()
    else:
      self.rl.restore()
    
    print("Loading experiences from", self.data)
    
    start_time = time.time()
    self.experiences = hickle.load(self.data)
    print("Loaded experiences in %d seconds." % (time.time() - start_time))
    if 'initial' not in self.experiences:
      self.experiences['initial'] = []
Exemplo n.º 3
0
def run(**kwargs):
    load = kwargs.get('load')
    if load:
        params = util.load_params(load, 'agent')
    else:
        params = {}

    util.update(params, **kwargs)
    print(params)

    if params.get('gui'):
        params['dolphin'] = True

    if params.get('user') is None:
        params['user'] = tempfile.mkdtemp() + '/'

    print("Creating cpu.")
    cpu = CPU(**params)

    params['cpus'] = cpu.pids

    if params.get('dolphin'):
        dolphinRunner = DolphinRunner(**params)
        # delay for a bit to let the cpu start up
        time.sleep(2)
        print("Running dolphin.")
        dolphin = dolphinRunner()
    else:
        dolphin = None

    print("Running cpu.")
    cpu.run(dolphin_process=dolphin)
Exemplo n.º 4
0
def get_pop_ids(path):
    agent_params = util.load_params(path)
    agent_pop_size = args.pop_size or agent_params.get('pop_size')

    if agent_pop_size:
        return list(range(agent_pop_size))
    else:
        return [-1]
Exemplo n.º 5
0
def get_agents(path):
    params = util.load_params(path)
    pop_size = params.get('pop_size')

    if pop_size:
        pop_ids = list(range(pop_size))
    else:
        pop_ids = [-1]

    return [(path, params, pop_id) for pop_id in pop_ids]
Exemplo n.º 6
0
def get_agents(path):
    params = util.load_params(path)
    pop_size = params.get('pop_size')
    if pop_size and args.pop_size:
        pop_size = min(pop_size, args.pop_size)

    if pop_size:
        pop_ids = range(pop_size)
    else:
        pop_ids = [-1]

    return [(path, params, pop_id) for pop_id in pop_ids]
Exemplo n.º 7
0
    def __init__(self, load=None, **kwargs):
        if load is None:
            args = {}
        else:
            args = util.load_params(load, 'train')

        kwargs.update(experience_length=6000, )
        util.update(args, mode=RL.Mode.TRAIN, **kwargs)
        util.pp.pprint(args)
        Default.__init__(self, **args)

        if self.init:
            self.rl.init()
            self.rl.save()
        else:
            self.rl.restore()

        if self.data is None:
            self.data = os.path.join(self.rl.path, 'experience')

        print("Loading experiences from", self.data)

        files = os.listdir(self.data)

        if self.file_limit:
            files = files[:self.file_limit]

        data_paths = [os.path.join(self.data, f) for f in files]

        print("Loading %d experiences." % len(files))

        self.experiences = []
        parallel = True

        if parallel:
            for paths in util.chunk(data_paths, 100):
                self.experiences.extend(
                    util.async_map(load_experience, paths)())
        else:
            for path in data_paths:
                with open(path, 'rb') as f:
                    self.experiences.append(pickle.load(f))

        self.valid_size = self.valid_batches * self.batch_size
Exemplo n.º 8
0
    def __init__(self, load=None, **kwargs):
        if load is None:
            args = {}
        else:
            args = util.load_params(load, 'train')

        util.update(args, mode=RL.Mode.TRAIN, **kwargs)
        util.pp.pprint(args)
        Default.__init__(self, **args)

        addresses = netifaces.ifaddresses(self.dump)
        address = addresses[netifaces.AF_INET][0]['addr']

        with open(os.path.join(self.model.path, 'ip'), 'w') as f:
            f.write(address)

        context = zmq.Context.instance()

        self.experience_socket = context.socket(zmq.PULL)
        experience_addr = "tcp://%s:%d" % (
            address, util.port(self.model.name + "/experience"))
        self.experience_socket.bind(experience_addr)

        if self.send:
            import nnpy
            self.params_socket = nnpy.Socket(nnpy.AF_SP, nnpy.PUB)
            params_addr = "tcp://%s:%d" % (
                address, util.port(self.model.name + "/params"))
            print("Binding params socket to", params_addr)
            self.params_socket.bind(params_addr)

        self.sweep_size = self.batches * self.batch_size
        print("Sweep size", self.sweep_size)

        if self.init:
            self.model.init()
            self.model.save()
        else:
            self.model.restore()

        self.last_save = time.time()
Exemplo n.º 9
0
def run(**kwargs):
    load = kwargs.get('load')
    if load:
        params = util.load_params(load, 'agent')
    else:
        params = {}

    util.update(params, **kwargs)
    pp.pprint(params)

    if params.get('gui'):
        params['dolphin'] = True

    if params.get('user') is None:
        params['user'] = tempfile.mkdtemp() + '/'

    if params.get('random_swap'):
        task_id = os.environ.get('SLURM_ARRAY_TASK_ID')
        if task_id is not None:
            params['swap'] = int(task_id) % 2
        else:
            import random
            params['swap'] = random.getrandbits(1)

    print("Creating cpu.")
    cpu = CPU(**params)

    params['cpus'] = cpu.pids

    if params.get('dolphin'):
        dolphinRunner = DolphinRunner(**params)
        # delay for a bit to let the cpu start up
        time.sleep(2)
        print("Running dolphin.")
        dolphin = dolphinRunner()
    else:
        dolphin = None

    print("Running cpu.")
    cpu.run(dolphin_process=dolphin)
Exemplo n.º 10
0
    def __init__(self, load=None, **kwargs):
        if load is None:
            args = {}
        else:
            args = util.load_params(load, 'train')

        util.update(args, **kwargs)
        util.pp.pprint(args)
        Default.__init__(self, **args)

        addresses = netifaces.ifaddresses(self.dump)
        address = addresses[netifaces.AF_INET][0]['addr']

        util.makedirs(self.learner.path)
        with open(os.path.join(self.learner.path, 'ip'), 'w') as f:
            f.write(address)

        self.experience_socket = nnpy.Socket(nnpy.AF_SP, nnpy.PULL)
        experience_addr = "tcp://%s:%d" % (
            address, util.port(self.learner.path + "/experience"))
        self.experience_socket.bind(experience_addr)

        if self.send:
            self.params_socket = nnpy.Socket(nnpy.AF_SP, nnpy.PUB)
            params_addr = "tcp://%s:%d" % (
                address, util.port(self.learner.path + "/params"))
            print("Binding params socket to", params_addr)
            self.params_socket.bind(params_addr)

        self.sweep_size = self.batch_size
        print("Sweep size", self.sweep_size)

        if self.init:
            self.learner.init()
            self.learner.save()
        else:
            self.learner.restore()

        self.last_save = time.time()
Exemplo n.º 11
0
parser.add_argument('--profile',
                    action='store_true',
                    help='heap profile trainer')
parser.add_argument('--disk',
                    action='store_true',
                    help='run agents and dump experiences to disk')
parser.add_argument('--tenenbaum',
                    action='store_true',
                    help='run trainer on higher priority')
parser.add_argument('--use_everything',
                    action='store_true',
                    help='run agents on lower priority')

args = parser.parse_args()

params = util.load_params(args.path)

run_trainer = True
run_agents = True

if args.local:
    agent_dump = "localhost"
    trainer_dump = "127.0.0.1"
else:  # running on openmind
    agent_dump = None
    if args.disk:
        run_trainer = False
    elif args.trainer:
        agent_dump = "172.16.24.%s" % args.trainer
        run_trainer = False
    else: