Пример #1
0
 def put(self, actor_id):
     dbid = Actor.get_dbid(g.tenant, actor_id)
     try:
         actor = Actor.from_db(actors_store[dbid])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     previous_image = actor.image
     args = self.validate_put(actor)
     args['tenant'] = g.tenant
     update_image = False
     if args['image'] == previous_image:
         args['status'] = actor.status
     else:
         update_image = True
         args['status'] = SUBMITTED
     args['api_server'] = g.api_server
     args['owner'] = g.user
     actor = Actor(**args)
     actors_store[actor.db_id] = actor.to_db()
     if update_image:
         ch = CommandChannel()
         ch.put_cmd(actor_id=actor.db_id, image=actor.image, tenant=args['tenant'])
     # return ok(result={'update_image': str(update_image)},
     #           msg="Actor updated successfully.")
     return ok(result=actor.display(),
               msg="Actor updated successfully.")
Пример #2
0
 def get(self, actor_id, execution_id):
     def get_hypermedia(actor, exc):
         return {'_links': {'self': '{}/actors/v2/{}/executions/{}/logs'.format(actor.api_server, actor.id, exc.id),
                            'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner),
                            'execution': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc.id)},
                 }
     dbid = Actor.get_dbid(g.tenant, actor_id)
     try:
         actor = Actor.from_db(actors_store[dbid])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     try:
         excs = executions_store[dbid]
     except KeyError:
         raise APIException("No executions found for actor {}.".format(actor_id))
     try:
         exc = Execution.from_db(excs[execution_id])
     except KeyError:
         raise APIException("Execution not found {}.".format(execution_id))
     try:
         logs = logs_store[execution_id]
     except KeyError:
         logs = ""
     result={'logs': logs}
     result.update(get_hypermedia(actor, exc))
     return ok(result, msg="Logs retrieved successfully.")
Пример #3
0
def subscribe(actor_id, worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    actor_ch = ActorMsgChannel(actor_id)
    t = threading.Thread(target=process_worker_ch, args=(worker_ch, actor_id, actor_ch))
    t.start()
    print("Worker subscribing to actor channel...")
    while keep_running:
        update_worker_status(actor_id, worker_ch.name, READY)
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        print("Received message {}. Starting actor container...".format(str(msg)))
        message = msg.pop("msg", "")
        try:
            stats, logs = execute_actor(actor_id, worker_ch, image, message, msg)
        except DockerStartContainerError as e:
            print("Got DockerStartContainerError: {}".format(str(e)))
            Actor.set_status(actor_id, ERROR)
            continue
        # add the execution to the actor store
        print("Actor container finished successfully. Got stats object:{}".format(str(stats)))
        exc_id = Execution.add_execution(actor_id, stats)
        Execution.set_logs(exc_id, logs)
Пример #4
0
    def do_save(self):
        for hexsha in self.cached_data:

            val = self.cached_data[hexsha]
            try:
                actor = Actor.objects.get(full_name = val['name'])
            except Actor.DoesNotExist:
                actor = Actor(full_name = val['name'])
                actor.save()
                #Create the actor

            try:
                commit = Commit.objects.get(hexsha = hexsha)
            except Commit.DoesNotExist:
                commit = Commit(hexsha = hexsha, repo = self.repo_model, actor = actor)
                commit.save()

            for path, fun in val['funcs']:
                if not Function.objects.filter(name = fun, path = path).exists():
                    fmodel = Function(name = fun, commit = commit, path = path)
                    fmodel.save()
                    print "Saved  `%s` : `%s`" % (path[-16:], fun)

            for file_name in val['files_changed']:
                FileChange(path = file_name, actor = actor, commit = commit).save()


        self.cached_data.clear()
Пример #5
0
 def get(self, actor_id):
     dbid = Actor.get_dbid(g.tenant, actor_id)
     try:
         actor = Actor.from_db(actors_store[dbid])
     except KeyError:
         raise APIException(
             "actor not found: {}. db_id:{}'".format(actor_id, dbid), 404)
     return ok(result=actor.display(), msg="Actor retrieved successfully.")
Пример #6
0
 def post(self, actor_id):
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         actor = Actor.from_db(actors_store[id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     args = self.validate_post()
     Execution.add_execution(id, args)
     return ok(result=actor.display(), msg="Actor execution added successfully.")
Пример #7
0
 def get(self, actor_id, ch_name):
     try:
         Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise WorkerException("actor not found: {}'".format(actor_id))
     try:
         worker = get_worker(actor_id, ch_name)
     except WorkerException as e:
         raise APIException(e.message, 404)
     return ok(result=worker, msg="Worker retrieved successfully.")
Пример #8
0
 def post(self, actor_id):
     dbid = Actor.get_dbid(g.tenant, actor_id)
     args = self.validate_post()
     state = args['state']
     try:
         actor = Actor.from_db(actors_store[dbid])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     actors_store.update(dbid, 'state', state)
     return ok(result=actor.display(), msg="State updated successfully.")
Пример #9
0
 def post(self, actor_id):
     """Add new permissions for an actor"""
     try:
         Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     args = self.validate_post()
     add_permission(args['user'], actor_id, args['level'])
     permissions = get_permissions(actor_id)
     return ok(result=permissions, msg="Permission added successfully.")
Пример #10
0
 def get(self, actor_id):
     try:
         Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     try:
         permissions = get_permissions(actor_id)
     except PermissionsException as e:
         raise APIException(e.message, 404)
     return ok(result=permissions, msg="Permissions retrieved successfully.")
Пример #11
0
 def post(self):
     args = self.validate_post()
     args['executions'] = {}
     args['state'] = ''
     args['subscriptions'] = []
     args['status'] = SUBMITTED
     actor = Actor(args)
     actors_store[actor.id] = actor.to_db()
     ch = CommandChannel()
     ch.put_cmd(actor_id=actor.id, image=actor.image)
     return ok(result=actor, msg="Actor created successfully.")
Пример #12
0
 def post(self):
     args = self.validate_post()
     args['tenant'] = g.tenant
     args['api_server'] = g.api_server
     args['owner'] = g.user
     actor = Actor(**args)
     actors_store[actor.db_id] = actor.to_db()
     ch = CommandChannel()
     ch.put_cmd(actor_id=actor.db_id, image=actor.image, tenant=args['tenant'])
     add_permission(g.user, actor.db_id, 'UPDATE')
     return ok(result=actor.display(), msg="Actor created successfully.", request=request)
Пример #13
0
 def delete(self, actor_id):
     id = Actor.get_dbid(g.tenant, actor_id)
     shutdown_workers(id)
     try:
         actor = Actor.from_db(actors_store[id])
         executions = actor.get('executions') or {}
         for ex_id, val in executions.items():
             del logs_store[ex_id]
     except KeyError:
         print("Did not find actor with id: {}".format(id))
     del actors_store[id]
     del permissions_store[id]
     return ok(result=None, msg='Actor deleted successfully.')
Пример #14
0
    def post(self, actor_id):
        def get_hypermedia(actor, exc):
            return {'_links': {'self': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc),
                               'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner),
                               'messages': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id)},}

        args = self.validate_post()
        d = {}
        # build a dictionary of k:v pairs from the query parameters, and pass a single
        # additional object 'message' from within the post payload. Note that 'message'
        # need not be JSON data.
        for k, v in request.args.items():
            if k == 'message':
                continue
            d[k] = v
        if hasattr(g, 'user'):
            d['_abaco_username'] = g.user
        if hasattr(g, 'api_server'):
            d['_abaco_api_server'] = g.api_server
        # if hasattr(g, 'jwt'):
        #     d['_abaco_jwt'] = g.jwt
        # if hasattr(g, 'jwt_server'):
        #     d['_abaco_jwt_server'] = g.jwt_server
        if hasattr(g, 'jwt_header_name'):
            d['_abaco_jwt_header_name'] = g.jwt_header_name
        dbid = Actor.get_dbid(g.tenant, actor_id)
        # create an execution
        exc = Execution.add_execution(dbid, {'cpu': 0,
                                             'io': 0,
                                             'runtime': 0,
                                             'status': SUBMITTED,
                                             'executor': g.user})
        d['_abaco_execution_id'] = exc
        d['_abaco_Content-Type'] = args.get('_abaco_Content-Type', '')
        ch = ActorMsgChannel(actor_id=dbid)
        ch.put_msg(message=args['message'], d=d)
        # make sure at least one worker is available
        workers = Worker.get_workers(dbid)
        actor = Actor.from_db(actors_store[dbid])
        if len(workers.items()) < 1:
            ch = CommandChannel()
            ch.put_cmd(actor_id=dbid, image=actor.image, tenant=g.tenant, num=1, stop_existing=False)
        result={'execution_id': exc, 'msg': args['message']}
        result.update(get_hypermedia(actor, exc))
        case = Config.get('web', 'case')
        if not case == 'camel':
            return ok(result)
        else:
            return ok(dict_to_camel(result))
Пример #15
0
 def post(self, actor_id):
     """Start new workers for an actor"""
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         actor = Actor.from_db(actors_store[id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     args = self.validate_post()
     num = args.get('num')
     if not num or num == 0:
         num = 1
     ch = CommandChannel()
     ch.put_cmd(actor_id=actor.db_id, image=actor.image, tenant=g.tenant, num=num, stop_existing=False)
     return ok(result=None, msg="Scheduled {} new worker(s) to start.".format(str(num)))
Пример #16
0
 def get(self, actor_id):
     dbid = Actor.get_dbid(g.tenant, actor_id)
     try:
         Actor.from_db(actors_store[dbid])
     except KeyError:
         raise APIException("actor not found: {}'".format(actor_id), 400)
     try:
         workers = Worker.get_workers(dbid)
     except WorkerException as e:
         raise APIException(e.msg, 404)
     result = []
     for id, worker in workers.items():
         worker.update({'id': id})
         result.append(worker)
     return ok(result=result, msg="Workers retrieved successfully.")
Пример #17
0
    def test_serialize_unicode(self):
        """Tests that unicode makes the roundtrip intact"""
        actor_name = u"Za\u017c\u00f3\u0142\u0107"
        movie_title = u'G\u0119\u015bl\u0105 ja\u017a\u0144'
        ac = Actor(name=actor_name)
        mv = Movie(title=movie_title, actor=ac)
        ac.save()
        mv.save()

        serial_str = serializers.serialize(self.serializer_name, [mv])
        self.assertEqual(self._get_field_values(serial_str, "title")[0], movie_title)
        self.assertEqual(self._get_field_values(serial_str, "actor")[0], actor_name)

        obj_list = list(serializers.deserialize(self.serializer_name, serial_str))
        mv_obj = obj_list[0].object
        self.assertEqual(mv_obj.title, movie_title)
Пример #18
0
 def get(self, actor_id):
     dbid = Actor.get_dbid(g.tenant, actor_id)
     try:
         summary = ExecutionsSummary(db_id=dbid)
     except DAOError as e:
         raise APIException("actor not found: {}. DAOError: {}'".format(actor_id, e), 404)
     return ok(result=summary.display(), msg="Actor executions retrieved successfully.")
Пример #19
0
 def get(self, actor_id):
     try:
         actor = Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     return ok(result=actor, msg="Actor retrieved successfully.")
Пример #20
0
 def get(self, actor_id):
     def get_hypermedia(actor):
         return {'_links': {'self': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id),
                            'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner),
                            },
                    }
     # check that actor exists
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         actor = Actor.from_db(actors_store[id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     result={'messages': len(ActorMsgChannel(actor_id=id)._queue._queue)}
     result.update(get_hypermedia(actor))
     return ok(result)
Пример #21
0
 def get(self, actor_id):
     try:
         actor = Actor.from_db(actors_store[actor_id])
         subscriptions = actor.get('subscriptions') or {'subscriptions': None}
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     return ok(result=subscriptions, msg="Subscriptions retrieved successfully.")
Пример #22
0
 def validate_put(self, actor):
     # inherit derived attributes from the original actor, including id and db_id:
     parser = Actor.request_parser()
     # remove since name is only required for POST, not PUT
     parser.remove_argument('name')
     # this update overrides all required and optional attributes
     actor.update(parser.parse_args())
     return actor
Пример #23
0
 def delete(self, actor_id, ch_name):
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         worker = Worker.get_worker(id, ch_name)
     except WorkerException as e:
         raise APIException(e.msg, 404)
     shutdown_worker(ch_name)
     return ok(result=None, msg="Worker scheduled to be stopped.")
Пример #24
0
def main(worker_ch_name, image):
    worker_ch = WorkerChannel(name=worker_ch_name)
    # first, attempt to pull image from docker hub:
    try:
        print("Worker pulling image {}...".format(image))
        pull_image(image)
    except DockerError as e:
        # return a message to the spawner that there was an error pulling image and abort
        worker_ch.put({'status': 'error', 'msg': str(e)})
        raise e
    # inform spawner that image pulled successfully
    print("Image pulled successfully")

    # wait to receive message from spawner that it is time to subscribe to the actor channel
    print("Worker waiting on message from spawner...")
    result = worker_ch.put_sync({'status': 'ok'})

    if result['status'] == 'error':
        print("Worker received error message from spawner: {}. Quiting...".format(str(result)))
        raise WorkerException(str(result))
    actor_id = result.get('actor_id')
    tenant = result.get('tenant')
    print("Worker received ok from spawner. Message: {}, actor_id:{}".format(result, actor_id))
    api_server = None
    client_id = None
    client_secret = None
    access_token = None
    refresh_token = None
    if result.get('client') == 'yes':
        api_server = result.get('api_server')
        client_id = result.get('client_id')
        client_secret = result.get('client_secret')
        access_token = result.get('access_token')
        refresh_token = result.get('refresh_token')
    else:
        print("Did not get client:yes, got client:{}".format(result.get('client')))
    Actor.set_status(actor_id, READY)
    subscribe(tenant,
              actor_id,
              api_server,
              client_id,
              client_secret,
              access_token,
              refresh_token,
              worker_ch)
Пример #25
0
def manage_workers(actor_id):
    """Scale workers for an actor if based on message queue size and policy."""
    print("Entering manage_workers for {}".format(actor_id))
    try:
        actor = Actor.from_db(actors_store[actor_id])
    except KeyError:
        print("Did not find actor; returning.")
        return
    workers = Worker.get_workers(actor_id)
Пример #26
0
 def get(self, actor_id):
     # check that actor exists
     try:
         actor = Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     # TODO
     # retrieve pending messages from the queue
     return ok(result={'messages': []})
Пример #27
0
def handle(socket, address):
    fileobj = socket.makefile('rw')
    while not Actor.by_socket(socket).disconnected:
        line = fileobj.readline()
        if not line:
            Actor.by_socket(socket).flush()
            Actor.by_socket(socket).disconnect()
            continue
        try:
            msg = Message.from_string(line)
            log.debug('<= %s %s' % (repr(msg.target), repr(msg)))
            resp = dispatcher.dispatch(socket, msg)
        except Exception as e:
            log.exception(e)
            actor = Actor.by_socket(socket)
            if actor.is_user() and actor.get_user().registered.nick and actor.get_user().registered.user:
                resp = [
                    Message(actor, 'NOTICE', 'The message your client has just sent could not be parsed or processed.'),
                    Message(actor, 'NOTICE', 'If this is a problem with the server, please open an issue at:'),
                    Message(actor, 'NOTICE', 'https://github.com/abesto/python-ircd'),
                    Message(actor, 'NOTICE', '---'),
                    Message(actor, 'NOTICE', 'The message sent by your client was:'),
                    Message(actor, 'NOTICE', line.strip("\n")),
                    Message(actor, 'NOTICE', 'The error was:'),
                    Message(actor, 'NOTICE', str(e)),
                    Message(actor, 'NOTICE', '---'),
                    Message(actor, 'NOTICE', 'Closing connection.')
                ]
                quit_resp = dispatcher.dispatch(socket, Message(None, 'QUIT', 'Protocol error'))
                if isinstance(quit_resp, list):
                    resp += quit_resp
                else:
                    resp.append(quit_resp)
            else:
                resp = Message(actor, 'ERROR')
            Actor.by_socket(socket).disconnect()

        try:
            router.send(resp)
        except Exception as e:
            log.exception(e)
            Actor.by_socket(socket).disconnect()
Пример #28
0
 def post(self, actor_id):
     args = self.validate_post()
     state = args['state']
     try:
         actor = Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     actor.state = state
     actors_store[actor_id] = actor.to_db()
     return ok(result=actor, msg="State updated successfully.")
Пример #29
0
 def check_new_params(self, cmd):
     valid, msg = self.check_common(cmd)
     # validate the actor_id
     try:
         actor = Actor.from_db(actors_store[cmd.get('actor_id')])
     except KeyError:
         return False, "Unable to look up actor with id: {}".format(cmd.get('actor_id')), None
     # validate the worker id
     try:
         Worker.get_worker(actor_id=cmd.get('actor_id'), ch_name=cmd.get('worker_id'))
     except WorkerException as e:
         return False, "Unable to look up worker: {}".format(e.msg), None
     return valid, msg, actor.owner
Пример #30
0
 def put(self, actor_id):
     try:
         actor = Actor.from_db(actors_store[actor_id])
     except KeyError:
         raise APIException(
             "actor not found: {}'".format(actor_id), 404)
     args = self.validate_put()
     update_image = False
     args['name'] = actor['name']
     args['id'] = actor['id']
     args['executions'] = actor['executions']
     args['state'] = actor['state']
     if args['image'] == actor.image:
         args['status'] = actor.status
     else:
         update_image = True
         args['status'] = SUBMITTED
     actor = Actor(args)
     actors_store[actor.id] = actor.to_db()
     if update_image:
         ch = CommandChannel()
         ch.put_cmd(actor_id=actor.id, image=actor.image)
     return ok(result=actor, msg="Actor updated successfully.")
Пример #31
0
class TD3(object):
    def __init__(self, state_dim, action_dim, max_action, memory, args):

        # actor
        self.actor = Actor(state_dim,
                           action_dim,
                           max_action,
                           layer_norm=args.layer_norm)
        self.actor_target = Actor(state_dim,
                                  action_dim,
                                  max_action,
                                  layer_norm=args.layer_norm)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                lr=args.actor_lr)

        # critic
        self.critic = CriticTD3(state_dim,
                                action_dim,
                                layer_norm=args.layer_norm)
        self.critic_target = CriticTD3(state_dim,
                                       action_dim,
                                       layer_norm=args.layer_norm)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 lr=args.critic_lr)

        # cuda
        if torch.cuda.is_available():
            self.actor = self.actor.cuda()
            self.actor_target = self.actor_target.cuda()
            self.critic = self.critic.cuda()
            self.critic_target = self.critic_target.cuda()

        # misc
        self.criterion = nn.MSELoss()
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.max_action = max_action
        self.memory = memory

        # hyper-parameters
        self.tau = args.tau
        self.discount = args.discount
        self.batch_size = args.batch_size
        self.policy_noise = args.policy_noise
        self.noise_clip = args.noise_clip
        self.policy_freq = args.policy_freq

    def select_action(self, state, noise=None):
        state = FloatTensor(state.reshape(-1, self.state_dim))
        action = self.actor(state).cpu().data.numpy().flatten()

        if noise is not None:
            action += noise.sample()

        return np.clip(action, -self.max_action, self.max_action)

    def train(self, iterations):

        for it in tqdm(range(iterations)):

            # Sample replay buffer
            x, y, u, r, d = self.memory.sample(self.batch_size)
            state = FloatTensor(x)
            next_state = FloatTensor(y)
            action = FloatTensor(u)
            reward = FloatTensor(r)
            done = FloatTensor(1 - d)

            # Select action according to policy and add clipped noise
            noise = np.clip(
                np.random.normal(0,
                                 self.policy_noise,
                                 size=(self.batch_size, self.action_dim)),
                -self.noise_clip, self.noise_clip)
            next_action = self.actor_target(next_state) + FloatTensor(noise)
            next_action = next_action.clamp(-self.max_action, self.max_action)

            # Q target = reward + discount * min_i(Qi(next_state, pi(next_state)))
            with torch.no_grad():
                target_Q1, target_Q2 = self.critic_target(
                    next_state, next_action)
                target_Q = torch.min(target_Q1, target_Q2)
                target_Q = reward + (done * self.discount * target_Q)

            # Get current Q estimates
            current_Q1, current_Q2 = self.critic(state, action)

            # Compute critic loss
            critic_loss = self.criterion(
                current_Q1, target_Q) + self.criterion(current_Q2, target_Q)

            # Optimize the critic
            self.critic_optimizer.zero_grad()
            critic_loss.backward()
            self.critic_optimizer.step()

            # Delayed policy updates
            if it % self.policy_freq == 0:

                # Compute actor loss
                Q1, Q2 = self.critic(state, self.actor(state))
                actor_loss = -Q1.mean()

                # Optimize the actor
                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                self.actor_optimizer.step()

                # Update the frozen target models
                for param, target_param in zip(
                        self.critic.parameters(),
                        self.critic_target.parameters()):
                    target_param.data.copy_(self.tau * param.data +
                                            (1 - self.tau) * target_param.data)

                for param, target_param in zip(self.actor.parameters(),
                                               self.actor_target.parameters()):
                    target_param.data.copy_(self.tau * param.data +
                                            (1 - self.tau) * target_param.data)

    def load(self, filename):
        self.actor.load_model(filename, "actor")
        self.critic.load_model(filename, "critic")

    def save(self, output):
        self.actor.save_model(output, "actor")
        self.critic.save_model(output, "critic")
Пример #32
0
def main():
    env = gym.make('InvertedPendulum-v2')
    # states: [x, theta, x', theta']
    # action: [horizontal force]
    nstates = 4
    nactions = 1

    T = 2048  # environement steps per update
    batch_size = 64
    epochs = 10
    lr = 0.01
    discount = 0.99
    clipping_epsilon = 0.2
    lam = 0.95  # GAE parameter
    total_timesteps = 1000000

    actor = Actor(nstates, nactions)
    critic = Critic(nstates)

    n_updates = total_timesteps // T
    if total_timesteps % T != 0:
        n_updates += 1

    n_batches_per_update = T // batch_size
    if T % batch_size != 0:
        n_batches_per_update += 1

    episode_rewards = []
    critic_losses = []

    for update in tqdm(range(n_updates)):
        states, actions, rewards, dones, values, log_probs, ep_rewards = rollout(
            env, actor, critic, T, nstates, max_ep_length)

        episode_rewards += ep_rewards

        advantages, returns = get_advantages_and_returns(
            dones, rewards, values, discount, lam, T)

        idx = np.arange(T)

        for k in range(epochs):
            np.random.default_rng().shuffle(idx)

            for n in range(0, n_batches_per_update, batch_size):
                batch_idx = idx[n:n + batch_size]
                state = states[batch_idx]
                action = actions[batch_idx]
                log_prob = log_probs[batch_idx]
                advantage = advantages[batch_idx]
                G = returns[batch_idx]

                _, current_log_probs = actor.forward(batch_states,
                                                     batch_actions,
                                                     requires_grad=True)
                ratios = np.exp(current_log_probs - batch_log_probs)
                clipped_ratios = np.minimum(
                    1 + clipping_epsilon,
                    np.maximum(1 - clipping_epsilon, ratios))

                unclipped_surrogate = ratios * batch_A
                clipped_surrogate = clipped_ratios * batch_A
                actor_loss = -np.minimum(unclipped_surrogate,
                                         clipped_surrogate).mean()

                current_state_values = critic.forward(batch_states,
                                                      requires_grad=True)
                critic_loss = ((current_state_values -
                                batch_returns)**2).mean()

                # derivative of actor_loss w.r.t current_log_probs
                dAL_dlp = -unclipped_surrogate
                # derivative of clipped_ratios w.r.t ratios
                dcr_dr = np.zeros_like(ratios)
                dcr_dr[(ratios < 1 + clipping_epsilon)
                       & (ratios > 1 - clipping_epsilon)] = 1.0
                # only include the derivative of the clipped_ratio if the clipped_ratio was used
                clipped_used_idx = clipped_surrogate < unclipped_surrogate
                dAL_dlp[clipped_used_idx] *= dcr_dr[clipped_used_idx]

                # derivative of critic_loss w.r.t current_state_values
                dCL_dsv = current_state_values - batch_returns

                actor.backward(dAL_dlp)
                critic.backward(dCL_dsv)

                actor.optimization_step(lr)
                critic.optimization_step(lr)

                actor_losses.append(actor_loss)
                critic_losses.append(critic_loss)

    env.close()

    fig, ax = plt.subplots()
    ax.plot(moving_average(episode_rewards, 100))
    plt.show()
    plt.close()

    fig, ax = plt.subplots()
    ax.plot(moving_average(critic_losses, 10))
    plt.show()
    plt.close()
Пример #33
0
class DDPG:
    """Implementation of DDPG.

    This implementation is adapted to this particular environment running several agent.
    At each time step, the same actor is controlling each agent sequentially.
    """

    def __init__(self, state_size, action_size, config):
        """Initialize algorithm."""
        if config.PER:
            self.memory = PrioritizeReplayBuffer(
                config.BUFFER_SIZE, config.BATCH_SIZE, config.SEED
            )
        else:
            self.memory = ReplayBuffer(
                config.BUFFER_SIZE, config.BATCH_SIZE, config.SEED
            )

        # Randomly initialize critic netowrk and actor
        self.actor = Actor(state_size, action_size, config.SEED).to(device)
        self.critic = Critic(state_size, action_size, config.SEED).to(device)

        # Initialize target networks with weights from actor critic
        # Actor
        self.actor_target = Actor(state_size, action_size, config.SEED).to(device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        # Critic
        self.critic_target = Critic(state_size, action_size, config.SEED).to(device)
        self.critic_target.load_state_dict(self.critic.state_dict())

        # Actor optimizer
        self.actor_optimizer = torch.optim.Adam(
            self.actor.parameters(), lr=config.LR_ACTOR
        )
        # Critic optimizer
        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(), lr=config.LR_CRITIC
        )

        self.config = config

        self.t_step = 0

        self.expl_noise = config.EXPL_NOISE

    def step(self, target_sample=None, **kwargs):
        """Run a step of algorithm update."""
        # Sample a random minibatch of transitions
        states, actions, rewards, next_states, dones = self._draw_minibatch()

        # Compute the target Q value
        target_Q = self.critic_target(
            next_states, self.actor_target(next_states)
        ).detach()
        y = rewards + (1 - dones) * self.config.GAMMA * target_Q

        # Update critic by minimizing the loss
        current_Q = self.critic(states, actions)

        # Compute TD error
        td_error = y - current_Q

        if self.config.PER:
            # Get importance_sampling_weights
            weights = torch.Tensor(self.memory.importance_sampling()).unsqueeze(1)
            # Update priorities
            self.memory.update_priorities(td_error.detach().cpu().numpy())
            # Compute critic loss
            critic_loss = torch.mean(weights * td_error ** 2)
        else:
            # Compute critic loss
            critic_loss = torch.mean(td_error ** 2)

        # Optimize critic
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        # Clip gradient
        nn.utils.clip_grad_norm_(self.critic.parameters(), 1)
        self.critic_optimizer.step()

        # Update the actor policy using the sampled policy gradient:
        actor_loss = -self.critic(states, self.actor(states)).mean()
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        # CLip gradient
        nn.utils.clip_grad_norm_(self.actor.parameters(), 1)
        self.actor_optimizer.step()

        # Update target networks
        self.soft_update()

    def train(self, env, num_episode):
        """Train a DDPG agent."""
        scores = []
        scores_window = deque(maxlen=100)

        for episode in range(num_episode):
            # Init state and episode score
            states = env.reset(train_mode=True)
            score = np.zeros(states.shape[0])
            done = False

            # Run episode
            while not done:
                # Select and run action
                actions = self.predict_actions(states)
                # TODO: dynamic low and high selection
                actions = self.add_gaussian_noise(actions, -1, 1)
                next_states, rewards, dones = env.step(actions)

                # Store all n_agent episodes in replay buffer
                for state, action, reward, next_state, done in zip(
                    states, actions, rewards, next_states, dones
                ):
                    self.memory.add(state, action, reward, next_state, done)

                # Update time step
                self.t_step = (self.t_step + 1) % self.config.UPDATE_EVERY

                # Optimisation step if UPDATE_EVERY and enough examples in memory
                if self.t_step == 0 and len(self.memory) > self.config.BATCH_SIZE:
                    for _ in range(self.config.UPDATE_STEPS):
                        self.step()

                # Update state and scores
                states = next_states
                score += rewards

                # End episode if any of the agent is done, to avoid storing too much
                # Done transitions in the replay buffer
                done = any(dones)

            # Keep track of running mean
            scores_window.append(max(score))

            # Append current mean to scores list
            scores.append(np.mean(scores_window))

            # Logging
            print(
                "\rEpisode {}\tAverage Score: {:.2f}, Last Score: {:.2f}".format(
                    episode, np.mean(scores_window), max(score)
                ),
                end="",
            )
            if (episode + 1) % 100 == 0:
                print(
                    "\rEpisode {}\tAverage Score: {:.2f}".format(
                        episode, np.mean(scores_window)
                    )
                )

        return scores

    def soft_update(self):
        """Update the frozen target models."""
        tau = self.config.TAU
        # Actor
        for param, target_param in zip(
            self.critic.parameters(), self.critic_target.parameters()
        ):
            target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

        # Critic
        for param, target_param in zip(
            self.actor.parameters(), self.actor_target.parameters()
        ):
            target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

    def predict_actions(self, states, **kwargs):
        """Predict next actions based on current policy."""
        states = torch.from_numpy(states).float().unsqueeze(0).to(device)

        # Set actor to eval mode
        self.actor.eval()

        actions = []
        with torch.no_grad():
            for state in states:
                action = self.actor(state)
                actions.append(action.detach().numpy())

        # Set actor to train mode
        self.actor.train()

        return np.array(actions).squeeze()

    def add_gaussian_noise(self, action, low, high):
        """Add Gaussian noise to action, and clip between low and high."""
        return (action + np.random.normal(0, self.expl_noise, size=action.shape)).clip(
            low, high
        )

    def _draw_minibatch(self):
        """Draw a minibatch in the replay buffer."""
        states, actions, rewards, next_states, done = zip(*self.memory.sample())

        states = torch.Tensor(states).to(device)
        actions = torch.Tensor(actions).to(device)
        rewards = torch.Tensor(rewards).unsqueeze(1).to(device)
        next_states = torch.Tensor(next_states).to(device)
        done = torch.Tensor(done).unsqueeze(1).to(device)

        return states, actions, rewards, next_states, done

    def save_model(self, path, **kwargs):
        """Save actor model weights."""
        torch.save(self.actor.state_dict(), path)
Пример #34
0
 def test_401_drop_actor_unsuccessful(self):
     actor = Actor('ali', 30, 'M')
     actor.insert()
     res = self.client().delete('/actors/' + str(actor.id),
                                headers=settingup_auth(''))
     self.assertEqual(res.status_code, 401)
    def __init__(self, env, nS, nA, config):
        self.seed = config.seed
        self.name = config.name
        self.nA = nA
        self.nS = nS
        self.num_agents = config.num_agents
        self.episodes = config.episodes
        self.tmax = config.tmax
        self.print_every = config.print_every
        self.update_every = config.UPDATE_EVERY
        self.SGD_epoch = config.SGD_epoch
        self.actor_path = config.actor_path
        self.critic_path = config.critic_path
        self.noise = GaussianNoise((self.num_agents, nA), config.episodes)
        # self.noise = OUnoise(nA,config.seed)
        self.winning_condition = config.winning_condition
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # Hyperparams
        self.gamma = config.gamma
        self.buffer_size = config.buffer_size
        self.min_buffer_size = config.min_buffer_size
        self.batch_size = config.batch_size
        self.L2 = config.L2
        self.tau = config.TAU

        # For multi agent
        self.nO = self.num_agents * nS  # Observation space
        self.env = env
        self.R = ReplayBuffer(config.buffer_size, config.batch_size,
                              config.seed)

        # Instantiating Actor and Critic
        self.base_actor = Actor(self.seed, self.nS, self.nA)
        self.base_critic = Critic(self.seed, self.nO, self.nA)

        # Instantiate the desired number of agents and envs
        self.local_critics = [
            Critic(self.seed, self.nO, self.nA)
            for agent in range(self.num_agents)
        ]
        self.local_actors = [
            Actor(self.seed, self.nS, self.nA)
            for agent in range(self.num_agents)
        ]
        self.target_critics = [
            Critic(self.seed, self.nO, self.nA)
            for agent in range(self.num_agents)
        ]
        self.target_actors = [
            Actor(self.seed, self.nS, self.nA)
            for agent in range(self.num_agents)
        ]

        # Copy the weights from base agents to target and local
        map(lambda x: hard_update(self.base_critic, x), self.local_critics)
        map(lambda x: hard_update(self.base_critic, x), self.target_critics)
        map(lambda x: hard_update(self.base_actor, x), self.local_actors)
        map(lambda x: hard_update(self.base_actor, x), self.target_actors)

        # Instantiate optimizers
        self.critic_optimizers = [
            optim.Adam(self.local_critics[i].parameters(),
                       lr=1e-3,
                       weight_decay=self.L2) for i in range(self.num_agents)
        ]
        self.actor_optimizers = [
            optim.Adam(self.local_actors[i].parameters(), lr=1e-4)
            for i in range(self.num_agents)
        ]
Пример #36
0
from collections import deque
import random
import torch
from torch import optim
from tqdm import tqdm
from env import Env
from models import Actor, Critic, create_target_network, update_target_network
from utils import plot

max_steps, update_start, update_interval, batch_size, discount, policy_delay, polyak_rate = 100000, 10000, 4, 128, 0.99, 2, 0.995
env = Env()
actor = Actor()
critic_1 = Critic(state_action=True)
critic_2 = Critic(state_action=True)
target_actor = create_target_network(actor)
target_critic_1 = create_target_network(critic_1)
target_critic_2 = create_target_network(critic_2)
actor_optimiser = optim.Adam(actor.parameters(), lr=1e-3)
critics_optimiser = optim.Adam(list(critic_1.parameters()) +
                               list(critic_2.parameters()),
                               lr=1e-3)
D = deque(maxlen=10000)

state, done, total_reward = env.reset(), False, 0
pbar = tqdm(range(1, max_steps + 1), unit_scale=1, smoothing=0)
for step in pbar:
    with torch.no_grad():
        if step < update_start:
            # To improve exploration take actions sampled from a uniform random distribution over actions at the start of training
            action = torch.tensor([[2 * random.random() - 1]])
        else:
Пример #37
0
class Agent(object):
    '''
    Implementation of a DQN agent that interacts with and learns from the
    environment
    '''
    def __init__(self, state_size, action_size, rand_seed, meta_agent):
        '''Initialize an MetaAgent object.
        :param state_size: int. dimension of each state
        :param action_size: int. dimension of each action
        :param nb_agents: int. number of agents to use
        :param rand_seed: int. random seed
        :param memory: ReplayBuffer object.
        '''

        self.action_size = action_size
        self.__name__ = 'DDPG'

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, rand_seed).to(DEVC)
        self.actor_target = Actor(state_size, action_size, rand_seed).to(DEVC)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   meta_agent.nb_agents, rand_seed).to(DEVC)
        self.critic_target = Critic(state_size, action_size,
                                    meta_agent.nb_agents, rand_seed).to(DEVC)
        # NOTE: the decay corresponds to L2 regularization
        self.critic_optimizer = optim.Adam(
            self.critic_local.parameters(),
            lr=LR_CRITIC)  # , weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, rand_seed)

        # Replay memory
        self.memory = meta_agent.memory

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

    def step(self, state, action, reward, next_state, done, others_states,
             others_actions, others_next_states):
        self.memory.add(state, action, reward, next_state, done, others_states,
                        others_actions, others_next_states)

        # Learn every UPDATE_EVERY time steps.
        self.t_step = (self.t_step + 1) % UPDATE_EVERY
        if self.t_step == 0:
            # If enough samples are available in memory, get random subset and learn
            if len(self.memory) > BATCH_SIZE:
                # source: Sample a random minibatch of N transitions from R
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, states, add_noise=True):
        '''Returns actions for given states as per current policy.
        :param states: array_like. current states
        :param add_noise: Boolean. If should add noise to the action
        '''
        states = torch.from_numpy(states).float().to(DEVC)
        self.actor_local.eval()
        with torch.no_grad():
            actions = self.actor_local(states).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            actions += self.noise.sample()
        return np.clip(actions, -1, 1)

    def reset(self):
        self.noise.reset()

    def learn(self, experiences, gamma):
        '''
        Update policy and value params using given batch of experience tuples.
        Q_targets = r + ? * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value
        :param experiences: Tuple[torch.Tensor]. tuple of (s, a, r, s', done)
        :param gamma: float. discount factor
        '''
        (states, actions, rewards, next_states, dones, others_states,
         others_actions, others_next_states) = experiences
        # rewards_ = torch.clamp(rewards, min=-1., max=1.)
        rewards_ = rewards
        all_states = torch.cat((states, others_states), dim=1).to(DEVC)
        all_actions = torch.cat((actions, others_actions), dim=1).to(DEVC)
        all_next_states = torch.cat((next_states, others_next_states),
                                    dim=1).to(DEVC)

        # --------------------------- update critic ---------------------------
        # Get predicted next-state actions and Q values from target models
        l_all_next_actions = []
        l_all_next_actions.append(self.actor_target(states))
        l_all_next_actions.append(self.actor_target(others_states))
        all_next_actions = torch.cat(l_all_next_actions, dim=1).to(DEVC)

        Q_targets_next = self.critic_target(all_next_states, all_next_actions)
        # Compute Q targets for current states (y_i)
        Q_targets = rewards_ + (gamma * Q_targets_next * (1 - dones))
        # Compute critic loss: L = 1/N SUM{(yi ? Q(si, ai|?Q))^2}
        Q_expected = self.critic_local(all_states, all_actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # --------------------------- update actor ---------------------------
        # Compute actor loss
        this_actions_pred = self.actor_local(states)
        others_actions_pred = self.actor_local(others_states)
        others_actions_pred = others_actions_pred.detach()
        actions_pred = torch.cat((this_actions_pred, others_actions_pred),
                                 dim=1).to(DEVC)
        actor_loss = -self.critic_local(all_states, actions_pred).mean()
        # Minimize the loss
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # ---------------------- update target networks ----------------------
        # Update the critic target networks
        # Update the actor target networks
        self.soft_update(self.critic_local, self.critic_target, TAU)
        self.soft_update(self.actor_local, self.actor_target, TAU)

    def soft_update(self, local_model, target_model, tau):
        '''Soft update model parameters.
        ?_target = ?*?_local + (1 - ?)*?_target
        :param local_model: PyTorch model. weights will be copied from
        :param target_model: PyTorch model. weights will be copied to
        :param tau: float. interpolation parameter
        '''
        iter_params = zip(target_model.parameters(), local_model.parameters())
        for target_param, local_param in iter_params:
            tensor_aux = tau * local_param.data + (1.0 -
                                                   tau) * target_param.data
            target_param.data.copy_(tensor_aux)

    def reset(self):
        self.noise.reset()
Пример #38
0
class TD3:
    def __init__(self,
                 env,
                 state_dim,
                 action_dim,
                 max_action,
                 gamma=0.99,
                 tau=0.005,
                 policy_noise=0.2,
                 noise_clip=0.5,
                 policy_freq=2):
        self.actor = Actor(state_dim, action_dim)
        self.actor_target = Actor(state_dim, action_dim)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-3)

        self.critic = Critic(state_dim, action_dim)
        self.critic_target = Critic(state_dim, action_dim)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3)

        self.max_action = max_action
        self.gamma = gamma
        self.tau = tau
        self.policy_noise = policy_noise
        self.noise_clip = noise_clip
        self.policy_freq = policy_freq

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.actor.to(self.device)
        self.actor_target.to(self.device)
        self.critic.to(self.device)
        self.critic_target.to(self.device)

        self.env = env
        self.total_it = 0

    def select_action(self, state, noise=0.1):
        action = self.actor(state.to(self.device)).data.cpu().numpy().flatten()
        if noise != 0:
            action = (action + np.random.normal(
                0, noise, size=self.env.action_space.shape[0]))

        return action.clip(self.env.action_space.low,
                           self.env.action_space.high)

    def train(self, replay_buffer, batch_size=128):
        self.total_it += 1

        states, states_, actions, rewards, terminal = replay_buffer.sample_buffer(
            batch_size)

        with torch.no_grad():
            noise = (torch.randn_like(actions.to(self.device)) *
                     self.policy_noise).clamp(-self.noise_clip,
                                              self.noise_clip)

            next_action = (self.actor_target(states_.to(self.device)) +
                           noise).clamp(-self.max_action, self.max_action)

            # compute the target Q value
            target_q1, target_q2 = self.critic_target(
                states_.to(self.device), next_action.to(self.device))
            target_q = torch.min(target_q1, target_q2)
            # target_q = rewards + terminal * self.gamma + target_q.cpu()
            # target_q = rewards + (terminal.reshape(256, 1) * self.gamma * target_q).detach()
            target_q = rewards + terminal * self.gamma * target_q[:, 0].cpu()

        # Get current Q value
        current_q1, current_q2 = self.critic(states.to(self.device),
                                             actions.to(self.device))

        # Compute critic loss
        critic_loss = F.mse_loss(current_q1[:, 0], target_q.to(
            self.device)) + F.mse_loss(current_q2[:, 0],
                                       target_q.to(self.device))

        # optimize the critic
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # Delayed policy updates
        if self.total_it % self.policy_freq == 0:
            # Compote actor loss
            actor_loss = -self.critic.q1(states.to(
                self.device), self.actor(states.to(self.device))).mean()

            # Optimize the actor
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

            # Update the frozen target models
            for param, target_param in zip(self.critic.parameters(),
                                           self.critic_target.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)

            for param, target_param in zip(self.actor.parameters(),
                                           self.actor_target.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)

    def save(self, filename):
        torch.save(self.critic.state_dict(), filename + "_critic")
        torch.save(self.critic_optimizer.state_dict(),
                   filename + "_critic_optimizer")
        torch.save(self.actor.state_dict(), filename + "_actor")
        torch.save(self.actor_optimizer.state_dict(),
                   filename + "_actor_optimizer")

    def load(self, filename):
        self.critic.load_state_dict(torch.load(filename + "_critic"))
        self.critic_optimizer.load_state_dict(
            torch.load(filename + "_critic_optimizer"))
        self.actor.load_state_dict(torch.load(filename + "_actor"))
        self.actor_optimizer.load_state_dict(
            torch.load(filename + "_actor_optimizer"))
    def test_get_actors_filtered_by_two_id(self):
        actor_one = Actor(**AppTestCase.test_actor)
        actor_one.insert()
        actor_one_id = actor_one.id
        actor_two = Actor(**AppTestCase.test_actor)
        actor_two.insert()
        actor_two_id = actor_two.id
        actor_id_list = [actor_one_id, actor_two_id]

        res = self.client().get(f'/actors?id={actor_one_id}&id={actor_two_id}')
        data = json.loads(res.data)

        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        self.assertTrue(data['page'] == 1)
        self.assertTrue(data['total_actors'] == 2)
        self.assertTrue(data['actors'][0]['id'] in actor_id_list)
        self.assertTrue(data['actors'][1]['id'] in actor_id_list)

        actor_one.delete()
        actor_two.delete()
Пример #40
0
class Agent():
    """Interacts with and learns from the environment."""
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.epsilon = EPSILON

        ### DEFINE THE ACTOR NETWORK ###
        ### INFINITE STEP BOOTSRAPPING, THEREFORE HIGH VARIANCE ###
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        ### DEFINE THE CRITIC NETWORK ###
        ### ONE STEP BOOTSRAPPING, THEREFORE HIGH BIAS ###
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        ### PROCCESS TO CREATE NOISE ###
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)

    def step(self, state, action, reward, next_state, done, timestep):
        """Save experience in replay memory, and use random sample from buffer to learn."""
        # Save experience / reward
        self.memory.add(state, action, reward, next_state, done)

        # Learn at defined interval, if enough samples are available in memory
        if len(self.memory) > BATCH_SIZE and timestep % LEARN_EVERY == 0:
            for _ in range(LEARN_NUM):
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, state, add_noise=True):
        """Returns actions for given state as per current policy."""
        state = torch.from_numpy(state).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            action = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            action += self.epsilon * self.noise.sample()
        return np.clip(action, -1, 1)

    def reset(self):
        self.noise.reset()

    def learn(self, experiences, gamma):
        """Update policy and value parameters using given batch of experience tuples.
        Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value
        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
            gamma (float): discount factor
        """
        states, actions, rewards, next_states, dones = experiences

        # ---------------------------- update critic ---------------------------- #
        # Get predicted next-state actions and Q values from target models
        actions_next = self.actor_target(next_states)
        Q_targets_next = self.critic_target(next_states, actions_next)
        # Compute Q targets for current states (y_i)
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
        # Compute critic loss
        Q_expected = self.critic_local(states, actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

        # ---------------------------- update actor ---------------------------- #
        # Compute actor loss
        actions_pred = self.actor_local(states)
        actor_loss = -self.critic_local(states, actions_pred).mean()
        # Minimize the loss
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # ----------------------- update target networks ----------------------- #
        self.soft_update(self.critic_local, self.critic_target, TAU)
        self.soft_update(self.actor_local, self.actor_target, TAU)

        # ---------------------------- update noise ---------------------------- #
        self.epsilon -= EPSILON_DECAY
        self.noise.reset()

    def soft_update(self, local_model, target_model, tau):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target
        Params
        ======
            local_model: PyTorch model (weights will be copied from)
            target_model: PyTorch model (weights will be copied to)
            tau (float): interpolation parameter
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1.0 - tau) * target_param.data)
Пример #41
0
class PPO(BaseAgent):
    def __init__(self, config):
        super(PPO, self).__init__()
        self.config = config
        torch.manual_seed(self.config['seed'])
        np.random.seed(self.config['seed'])

        if self.config['experiment'][
                'orthogonal_initialization_and_layer_scaling']:
            weight_init_scheme = 'orthogonal'
        else:
            weight_init_scheme = 'normal'

        self.actor = Actor(
            device=self.config['device'],
            input_dim=self.config['env']['nS'],
            output_dim=self.config['env']['nA'],
            hidden_dims=self.config['model']['actor']['hidden_dims'],
            hidden_activation_fn=self.config['model']['actor']
            ['hidden_acivation_fn'],
            weight_init_scheme=weight_init_scheme)
        self.actor_optimizer = optim.Adam(
            self.actor.parameters(),
            lr=self.config['model']['actor']['lr'],
            betas=self.config['model']['actor']['betas'])

        self.critic = Critic(
            device=self.config['device'],
            input_dim=self.config['env']['nS'],
            hidden_dims=self.config['model']['critic']['hidden_dims'],
            hidden_activation_fn=self.config['model']['critic']
            ['hidden_acivation_fn'],
            weight_init_scheme=weight_init_scheme)
        self.critic_optimizer = optim.Adam(
            self.critic.parameters(),
            lr=self.config['model']['critic']['lr'],
            betas=self.config['model']['critic']['betas'])

        if self.config['train']['gail']:
            self.discriminator = Discriminator(
                device=self.config['device'],
                state_dim=self.config['env']['nS'],
                action_dim=self.config['env']['nA'],
                hidden_dims=self.config['model']['discriminator']
                ['hidden_dims'],
                hidden_activation_fn=self.config['model']['discriminator']
                ['hidden_acivation_fn'],
                weight_init_scheme=weight_init_scheme)
            self.discriminator_optimizer = optim.Adam(
                self.discriminator.parameters(),
                lr=self.config['model']['discriminator']['lr'],
                betas=self.config['model']['discriminator']['betas'])

        # [EXPERIMENT] - reward scaler: r / rs.std()
        if self.config['experiment']['reward_standardization']:
            self.reward_scaler = RewardScaler(
                gamma=self.config['train']['gamma'])

        # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std())
        if self.config['experiment']['observation_normalization']:
            self.observation_scaler = ObservationScaler()

    # train
    def train(self):
        """
        # initialize env, memory
        # foreach episode
        #   foreach timestep
        #     select action
        #     step action
        #     add exp to the memory
        #     if done or timeout or memory_full: update gae & tdlamret
        #     if memory is full
        #       bootstrap value
        #       optimize
        #       clear memory
        #     if done:
        #       wrapup episode
        #       break
        """
        writer_path = os.path.join('experiments', self.config['exp_name'],
                                   'runs')
        self.writer = SummaryWriter(writer_path)

        # Pretrain with BC
        if self.config['train']['bc']:
            bc_train_set, bc_valid_set = get_bc_dataset(
                self.config['train']['bc']['samples_exp_name'],
                self.config['train']['bc']['minimum_score'],
                self.config['train']['bc']['batch_size'],
                self.config['train']['bc']['demo_count'],
                self.config['train']['bc']['val_size'])

            if self.config['experiment']['observation_normalization']:
                use_obs_scaler = True
            else:
                use_obs_scaler = False

            self.actor = pretrain(self.actor,
                                  self.config['train']['bc']['lr'],
                                  self.config['train']['bc']['epochs'],
                                  bc_train_set,
                                  bc_valid_set,
                                  use_obs_scaler,
                                  writer=self.writer)

        # GAIL
        if self.config['train']['gail']:
            self.expert_dataset = get_gail_dataset(
                self.config['train']['gail']['samples_exp_name'],
                self.config['train']['gail']['minimum_score'],
                self.config['train']['gail']['n_samples'],
                self.config['train']['ppo']['memory_size'],
                self.config['train']['gail']['dstep'])

        self.best_score = 0

        # prepare env, memory, stuff
        env = self.init_env(self.config['env']['name'])
        env.seed(self.config['seed'])
        self.memory = PPOMemory(gamma=self.config['train']['gamma'],
                                tau=self.config['train']['gae']['tau'])
        score_queue = deque(maxlen=self.config['train']['average_interval'])
        length_queue = deque(maxlen=self.config['train']['average_interval'])
        if self.config['train']['gail']:
            irl_score_queue = deque(
                maxlen=self.config['train']['average_interval'])

        for episode in trange(1, self.config['train']['max_episodes'] + 1):
            self.episode = episode
            episode_score = 0
            if self.config['train']['gail']:
                irl_episode_score = 0

            # reset env
            state = env.reset()

            for t in range(1,
                           self.config['train']['max_steps_per_episode'] + 1):
                if self.episode % 100 == 0:
                    env.render()

                # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std())
                if self.config['experiment']['observation_normalization']:
                    state = self.observation_scaler(state, update=True)

                # select action & estimate value from the state
                with torch.no_grad():
                    state_tensor = torch.tensor(state).unsqueeze(
                        0).float()  # bsz = 1
                    action_tensor, logpa_tensor = self.actor.select_action(
                        state_tensor)
                    value_tensor = self.critic(state_tensor).squeeze(
                        1)  # don't need bsz dim

                # step action
                action = action_tensor.numpy()[0]  # single worker
                next_state, reward, done, _ = env.step(action)

                # update episode_score
                episode_score += reward

                # GAIL: get irl_reward
                if self.config['train']['gail']:
                    with torch.no_grad():
                        reward = self.discriminator.get_irl_reward(
                            state_tensor, action_tensor).detach()
                        irl_episode_score += reward

                # [EXPERIMENT] - reward scaler r / rs.std()
                if self.config['experiment']['reward_standardization']:
                    reward = self.reward_scaler(reward, update=True)

                # [EXPERIMENT] - reward clipping [-5, 5]
                if self.config['experiment']['reward_clipping']:
                    reward = np.clip(reward, -5, 5)

                # add experience to the memory
                self.memory.store(s=state,
                                  a=action,
                                  r=reward,
                                  v=value_tensor.item(),
                                  lp=logpa_tensor.item())

                # done or timeout or memory full
                # done => v = 0
                # timeout or memory full => v = critic(next_state)
                # update gae & return in the memory!!
                timeout = t == self.config['train']['max_steps_per_episode']
                time_to_optimize = len(
                    self.memory) == self.config['train']['ppo']['memory_size']
                if done or timeout or time_to_optimize:
                    if done:
                        # cuz the game is over, value of the next state is 0
                        v = 0
                    else:
                        # if not, estimate it with the critic
                        next_state_tensor = torch.tensor(next_state).unsqueeze(
                            0).float()  # bsz = 1
                        with torch.no_grad():
                            next_value_tensor = self.critic(
                                next_state_tensor).squeeze(1)
                        v = next_value_tensor.item()

                    # update gae & tdlamret
                    self.memory.finish_path(v)

                # if memory is full, optimize PPO
                if time_to_optimize:
                    self.optimize()

                if done:
                    score_queue.append(episode_score)
                    length_queue.append(t)
                    if self.config['train']['gail']:
                        irl_score_queue.append(irl_episode_score)
                    break

                # update state
                state = next_state

            avg_score = np.mean(score_queue)
            std_score = np.std(score_queue)
            avg_duration = np.mean(length_queue)
            self.writer.add_scalar("info/score", avg_score, self.episode)
            self.writer.add_scalar("info/duration", avg_duration, self.episode)

            if self.config['train']['gail']:
                avg_score = np.mean(irl_score_queue)
                self.writer.add_scalar("info/irl_score", avg_score,
                                       self.episode)

            if self.episode % 100 == 0:
                print("{} - score: {:.1f} +-{:.1f} \t duration: {}".format(
                    self.episode, avg_score, std_score, avg_duration))

            # game-solved condition
            # if avg_score >= self.config['train']['terminal_score']:
            #     print("game solved at ep {}".format(self.episode))
            #     self.save_weight(self.actor, self.config['exp_name'], "best")
            #     break
            if avg_score >= self.best_score and self.episode >= 200:
                print("found best model at episode: {}".format(self.episode))
                self.save_weight(self.actor, self.config['exp_name'], "best")
                self.best_score = avg_score

                # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std())
                if self.config['experiment']['observation_normalization']:
                    self.observation_scaler.save(self.config['exp_name'])

        self.save_weight(self.actor, self.config['exp_name'], "last")
        return self.best_score

    # optimize
    def optimize(self):
        data = self.prepare_data(self.memory.get())

        # gail
        if self.config['train']['gail']:
            self.optimize_gail(data)

        self.optimize_ppo(data)

    def prepare_data(self, data):
        states_tensor = torch.from_numpy(np.stack(
            data['states'])).float()  # bsz, 8
        actions_tensor = torch.tensor(data['actions']).long()  # bsz
        logpas_tensor = torch.tensor(data['logpas']).float()  # bsz
        tdlamret_tensor = torch.tensor(data['tdlamret']).float()  # bsz
        advants_tensor = torch.tensor(data['advants']).float()  # bsz
        values_tensor = torch.tensor(data['values']).float()  # bsz

        # normalize advant a.k.a atarg
        advants_tensor = (advants_tensor - advants_tensor.mean()) / (
            advants_tensor.std() + 1e-5)

        data_tensor = dict(states=states_tensor,
                           actions=actions_tensor,
                           logpas=logpas_tensor,
                           tdlamret=tdlamret_tensor,
                           advants=advants_tensor,
                           values=values_tensor)

        return data_tensor

    def ppo_iter(self, batch_size, ob, ac, oldpas, atarg, tdlamret,
                 vpredbefore):
        total_size = ob.size(0)
        indices = np.arange(total_size)
        np.random.shuffle(indices)
        n_batches = total_size // batch_size
        for nb in range(n_batches):
            ind = indices[batch_size * nb:batch_size * (nb + 1)]
            yield ob[ind], ac[ind], oldpas[ind], atarg[ind], tdlamret[
                ind], vpredbefore[ind]

    def optimize_gail(self, data):
        """
        https://github.com/openai/baselines/blob/master/baselines/gail/trpo_mpi.py
        bsz = learner_batch_size // d_step
        for each ob_batch, ac_batch in learner_dataset:
            get ob_expert, ac_expert from expert_dataset
            get learner_logit from D
            get expert_logit from D
            get learner loss vs. torch.ones()
            get expert loss vs. torch.zeros()
            update D
        """
        loss_fn = nn.BCELoss()
        D_losses = []
        learner_accuracies = []
        expert_accuracies = []

        learner_ob = data['states']
        learner_ac = data['actions']
        rub = torch.zeros_like(
            learner_ob)  # not doing anything.. just wanted to reuse ppo_iter()
        learner_iter = self.ppo_iter(self.expert_dataset.batch_size,
                                     learner_ob, learner_ac, rub, rub, rub,
                                     rub)
        for learner_ob_b, learner_ac_b, _, _, _, _ in learner_iter:
            expert_ob_b, expert_ac_b = self.expert_dataset.get_next_batch()
            if self.config['experiment']['observation_normalization']:
                expert_ob_b = self.observation_scaler(expert_ob_b,
                                                      update=False).float()

            learner_logit = self.discriminator.forward(learner_ob_b,
                                                       learner_ac_b)
            learner_prob = torch.sigmoid(learner_logit)

            expert_logit = self.discriminator.forward(expert_ob_b, expert_ac_b)
            expert_prob = torch.sigmoid(expert_logit)

            learner_loss = loss_fn(learner_prob, torch.ones_like(learner_prob))
            expert_loss = loss_fn(expert_prob, torch.zeros_like(expert_prob))

            loss = learner_loss + expert_loss
            D_losses.append(loss.item())

            self.discriminator_optimizer.zero_grad()
            loss.backward()
            self.discriminator_optimizer.step()

            learner_acc = ((learner_prob >= 0.5).float().mean().item())
            expert_acc = ((expert_prob < 0.5).float().mean().item())

            learner_accuracies.append(learner_acc)
            expert_accuracies.append(expert_acc)

        avg_d_loss = np.mean(D_losses)
        avg_learner_accuracy = np.mean(learner_accuracies)
        avg_expert_accuracy = np.mean(expert_accuracies)

        self.writer.add_scalar("info/discrim_loss", avg_d_loss, self.episode)
        self.writer.add_scalars("info/gail_accuracy", {
            'learner': avg_learner_accuracy,
            'expert': avg_expert_accuracy
        }, self.episode)

    def optimize_ppo(self, data):
        """
        https://github.com/openai/baselines/blob/master/baselines/ppo1/pposgd_simple.py line 164

        # get data from the memory
        # prepare dataloader
        # foreach optim_epochs
        #   foreach batch
        #     calculate loss and gradient
        #     update nn
        """

        ob = data['states']
        ac = data['actions']
        oldpas = data['logpas']
        atarg = data['advants']
        tdlamret = data['tdlamret']
        vpredbefore = data['values']

        # can't be arsed..
        eps = self.config['train']['ppo']['clip_range']

        policy_losses = []
        entropy_losses = []
        value_losses = []

        # foreach policy_update_epochs
        for i in range(self.config['train']['ppo']['optim_epochs']):
            # foreach batch
            data_loader = self.ppo_iter(
                self.config['train']['ppo']['batch_size'], ob, ac, oldpas,
                atarg, tdlamret, vpredbefore)
            for batch in data_loader:
                ob_b, ac_b, old_logpas_b, atarg_b, vtarg_b, old_vpred_b = batch

                # policy loss
                cur_logpas, cur_entropies = self.actor.get_predictions(
                    ob_b, ac_b)
                ratio = torch.exp(cur_logpas - old_logpas_b)

                # clip ratio
                clipped_ratio = torch.clamp(ratio, 1. - eps, 1. + eps)

                # policy_loss
                surr1 = ratio * atarg_b

                if self.config['experiment']['policy_noclip']:
                    pol_surr = -surr1.mean()
                else:
                    surr2 = clipped_ratio * atarg_b
                    pol_surr = -torch.min(surr1, surr2).mean()

                # value_loss
                cur_vpred = self.critic(ob_b).squeeze(1)

                # [EXPERIMENT] - value clipping: clipped_value = old_values + (curr_values - old_values).clip(-eps, +eps)
                if self.config['experiment']['value_clipping']:
                    cur_vpred_clipped = old_vpred_b + (
                        cur_vpred - old_vpred_b).clamp(-eps, eps)
                    vloss1 = (cur_vpred - vtarg_b).pow(2)
                    vloss2 = (cur_vpred_clipped - vtarg_b).pow(2)
                    vf_loss = torch.max(vloss1, vloss2).mean()
                else:
                    # original value_loss
                    vf_loss = (cur_vpred - vtarg_b).pow(2).mean()

                # entropy_loss
                pol_entpen = -cur_entropies.mean()

                # total loss
                c1 = self.config['train']['ppo']['coef_vf']
                c2 = self.config['train']['ppo']['coef_entpen']

                # actor - backward
                self.actor_optimizer.zero_grad()
                policy_loss = pol_surr + c2 * pol_entpen
                policy_loss.backward()

                # [EXPERIMENT] - clipping gradient with max_norm=0.5
                if self.config['experiment']['clipping_gradient']:
                    nn.utils.clip_grad_norm_(self.actor.parameters(),
                                             max_norm=0.5)

                self.actor_optimizer.step()

                # critic - backward
                self.critic_optimizer.zero_grad()
                value_loss = c1 * vf_loss
                value_loss.backward()

                # [EXPERIMENT] - clipping gradient with max_norm=0.5
                if self.config['experiment']['clipping_gradient']:

                    nn.utils.clip_grad_norm_(self.critic.parameters(),
                                             max_norm=0.5)

                self.critic_optimizer.step()

                policy_losses.append(pol_surr.item())
                entropy_losses.append(pol_entpen.item())
                value_losses.append(vf_loss.item())

        avg_policy_loss = np.mean(policy_losses)
        avg_value_losses = np.mean(value_losses)
        avg_entropy_losses = np.mean(entropy_losses)

        self.writer.add_scalar("info/policy_loss", avg_policy_loss,
                               self.episode)
        self.writer.add_scalar("info/value_loss", avg_value_losses,
                               self.episode)
        self.writer.add_scalar("info/entropy_loss", avg_entropy_losses,
                               self.episode)

    # play
    def play(self,
             num_episodes=1,
             save_traj=False,
             seed=9999,
             record=False,
             save_result=False):

        # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std())
        if self.config['experiment']['observation_normalization']:
            self.observation_scaler.load(self.config['exp_name'])

        # load policy
        self.load_weight(self.actor, self.config['exp_name'])

        env = self.init_env(self.config['env']['name'])
        env.seed(seed)
        if record:
            from gym import wrappers
            rec_dir = os.path.join("experiments", self.config['exp_name'],
                                   "seed_{}".format(seed))
            env = wrappers.Monitor(env, rec_dir, force=True)
        scores, trajectories = [], []

        for episode in range(num_episodes):
            current_trajectory = []
            episode_score = 0

            # initialize env
            state = env.reset()

            while True:
                # env.render()

                # [EXPERIMENT] - observation scaler: (ob - ob.mean()) / (ob.std())
                if self.config['experiment']['observation_normalization']:
                    state = self.observation_scaler(state, update=False)

                # select greedy action
                with torch.no_grad():
                    action_tensor = self.actor.select_greedy_action(state)
                action = action_tensor.numpy()[0]  # single env

                current_trajectory.append((state, action))

                # run action
                next_state, reward, done, _ = env.step(action)

                # add reward
                episode_score += reward

                # update state
                state = next_state

                # game over condition
                if done:
                    scores.append(episode_score)
                    trajectories.append((current_trajectory, episode_score))
                    break

        avg_score = np.mean(scores)
        print("Average score {} on {} games".format(avg_score, num_episodes))
        if save_result:
            played_result_path = os.path.join("experiments",
                                              self.config['exp_name'], "runs",
                                              "play_score.pth")
            torch.save(scores, played_result_path)

        if save_traj:
            demo_dir = os.path.join("experiments", self.config['exp_name'],
                                    "demonstration")
            os.makedirs(demo_dir)
            torch.save(trajectories, os.path.join(demo_dir, "demo.pth"))
            print("saved {} trajectories.".format(num_episodes))

        env.close()
    def __init__(self, params):
        """Initialize an Agent object."""

        self.params = params
        self.update_target_every = params['update_target_every']
        self.update_every = params['update_every']
        self.actor_update_every_multiplier = params[
            'actor_update_every_multiplier']
        self.update_intensity = params['update_intensity']
        self.gamma = params['gamma']
        self.action_size = params['actor_params']['action_size']
        self.num_agents = params['num_agents']
        self.num_atoms = params['critic_params']['num_atoms']
        self.v_min = params['critic_params']['v_min']
        self.v_max = params['critic_params']['v_max']
        self.update_target_type = params['update_target_type']
        self.device = params['device']
        self.name = params['name']
        self.lr_reduction_factor = params['lr_reduction_factor']
        self.tau = params['tau']
        self.d4pg = params['d4pg']

        # Distributes the number of atoms across the range of v min and max
        self.atoms = torch.linspace(self.v_min, self.v_max,
                                    self.num_atoms).to(self.device)

        # Initialize time step count
        self.t_step = 0

        # Active and Target Actor networks
        self.actor_active = Actor(params['actor_params']).to(device)
        self.actor_target = Actor(params['actor_params']).to(device)

        if self.d4pg:
            # Active and Target D4PG Critic networks
            self.critic_active = D4PGCritic(params['critic_params']).to(device)
            self.critic_target = D4PGCritic(params['critic_params']).to(device)
        else:
            # Active and Target Critic networks
            self.critic_active = Critic(params['critic_params']).to(device)
            self.critic_target = Critic(params['critic_params']).to(device)

        self.actor_optimizer = optim.Adam(self.actor_active.parameters(),
                                          lr=params['actor_params']['lr'])
        self.critic_optimizer = optim.Adam(self.critic_active.parameters(),
                                           lr=params['critic_params']['lr'])

        self.schedule_lr = params['schedule_lr']
        self.lr_steps = 0

        # Create learning rate schedulers if required to reduce the learning rate
        # depeninding on plateuing of scores
        if self.schedule_lr:
            self.actor_scheduler = ReduceLROnPlateau(
                self.actor_optimizer,
                mode='max',
                factor=params['lr_reduction_factor'],
                patience=params['lr_patience_factor'],
                verbose=False,
            )
            self.critic_scheduler = ReduceLROnPlateau(
                self.critic_optimizer,
                mode='max',
                factor=params['lr_reduction_factor'],
                patience=params['lr_patience_factor'],
                verbose=False,
            )

        print("\n################ ACTOR ################\n")
        print(self.actor_active)

        print("\n################ CRITIC ################\n")
        print(self.critic_active)

        # Initiate exploration parameters by adding noise to the actions
        self.noise = params['noise']

        # Replay memory
        self.memory = params['experience_replay']
class D4PGAgent(Agent):
    """An advance D4PG agent with an option to run on a simpler DDPG mode.
    The agent uses a distributional value estimation when running on D4PG vs
    the traditional single value estimation when running on DDPG mode."""
    def __init__(self, params):
        """Initialize an Agent object."""

        self.params = params
        self.update_target_every = params['update_target_every']
        self.update_every = params['update_every']
        self.actor_update_every_multiplier = params[
            'actor_update_every_multiplier']
        self.update_intensity = params['update_intensity']
        self.gamma = params['gamma']
        self.action_size = params['actor_params']['action_size']
        self.num_agents = params['num_agents']
        self.num_atoms = params['critic_params']['num_atoms']
        self.v_min = params['critic_params']['v_min']
        self.v_max = params['critic_params']['v_max']
        self.update_target_type = params['update_target_type']
        self.device = params['device']
        self.name = params['name']
        self.lr_reduction_factor = params['lr_reduction_factor']
        self.tau = params['tau']
        self.d4pg = params['d4pg']

        # Distributes the number of atoms across the range of v min and max
        self.atoms = torch.linspace(self.v_min, self.v_max,
                                    self.num_atoms).to(self.device)

        # Initialize time step count
        self.t_step = 0

        # Active and Target Actor networks
        self.actor_active = Actor(params['actor_params']).to(device)
        self.actor_target = Actor(params['actor_params']).to(device)

        if self.d4pg:
            # Active and Target D4PG Critic networks
            self.critic_active = D4PGCritic(params['critic_params']).to(device)
            self.critic_target = D4PGCritic(params['critic_params']).to(device)
        else:
            # Active and Target Critic networks
            self.critic_active = Critic(params['critic_params']).to(device)
            self.critic_target = Critic(params['critic_params']).to(device)

        self.actor_optimizer = optim.Adam(self.actor_active.parameters(),
                                          lr=params['actor_params']['lr'])
        self.critic_optimizer = optim.Adam(self.critic_active.parameters(),
                                           lr=params['critic_params']['lr'])

        self.schedule_lr = params['schedule_lr']
        self.lr_steps = 0

        # Create learning rate schedulers if required to reduce the learning rate
        # depeninding on plateuing of scores
        if self.schedule_lr:
            self.actor_scheduler = ReduceLROnPlateau(
                self.actor_optimizer,
                mode='max',
                factor=params['lr_reduction_factor'],
                patience=params['lr_patience_factor'],
                verbose=False,
            )
            self.critic_scheduler = ReduceLROnPlateau(
                self.critic_optimizer,
                mode='max',
                factor=params['lr_reduction_factor'],
                patience=params['lr_patience_factor'],
                verbose=False,
            )

        print("\n################ ACTOR ################\n")
        print(self.actor_active)

        print("\n################ CRITIC ################\n")
        print(self.critic_active)

        # Initiate exploration parameters by adding noise to the actions
        self.noise = params['noise']

        # Replay memory
        self.memory = params['experience_replay']

    def act(self, states, add_noise=True, pretrain=False):
        """Returns actions for given state as per current policy."""

        # If pretraining is active, the agent gives a random action thereby encouraging
        # intial exploration of the state space quickly
        if pretrain:
            actions = np.random.uniform(-1., 1.,
                                        (self.num_agents, self.action_size))

        else:
            with torch.no_grad():
                actions = self.actor_active(
                    states.to(device).float()).detach().to('cpu').numpy()
            if add_noise:
                noise = self.noise.create_noise(actions.shape)
                actions += noise

            actions = np.clip(actions, -1., 1.)

        return actions, self.noise.epsilon

    def step(self,
             states,
             actions,
             rewards,
             next_states,
             dones,
             pretrain=False):
        """Save experience in replay memory, and use random sample from buffer to learn."""

        self.memory.add((states, actions, rewards, next_states, dones))
        self.t_step += 1

        if pretrain == False:
            return self.learn_()

        return None, None

    def learn_(self):
        "Learns from experience using a distributional value estimation when in D4PG mode"
        actor_loss = None
        critic_loss = None

        # If enough samples are available in memory and its time to learn, then learn!
        if self.memory.ready() and self.t_step % self.update_every == 0:

            # Learns multiple times with the same set of experience
            for _ in range(self.update_intensity):

                # Samples from the replay buffer which has calculated the n step returns in advance
                # Next state represents the state at the n'th step
                states, next_states, actions, rewards, dones = self.memory.sample(
                )

                if self.d4pg:
                    atoms = self.atoms.unsqueeze(0)

                    # Calculate log probability distribution using Zw with regards to stored actions
                    log_probs = self.critic_active(states, actions, log=True)

                    # Calculate the projected log probabilities from the target actor and critic networks
                    # Since back propogation is not required. Tensors are detach to increase speed
                    target_dist = self._get_targets(rewards,
                                                    next_states).detach()

                    # The critic loss is calculated using a weighted distribution instead of the mean to
                    # arrive at a more accurate result. Cross Entropy loss is used as it is considered to
                    # be the most ideal for categorical value distributions as utlized in the D4PG
                    critic_loss = -(target_dist * log_probs).sum(-1).mean()

                else:

                    # Get predicted next-state actions and Q values from target models
                    actions_next = self.actor_target(next_states)
                    Q_targets_next = self.critic_target(
                        next_states, actions_next).detach()
                    # Compute Q targets for current states (y_i)
                    Q_targets = rewards + (self.gamma * Q_targets_next *
                                           (1 - dones))
                    # Compute critic loss
                    Q_expected = self.critic_active(states, actions)
                    critic_loss = F.mse_loss(Q_expected, Q_targets)

                # Execute gradient descent for the critic
                self.critic_optimizer.zero_grad()
                critic_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.critic_active.parameters(),
                                               1)
                self.critic_optimizer.step()
                critic_loss = critic_loss.item()

                # Update actor every x multiples of critic
                if self.t_step % (self.actor_update_every_multiplier *
                                  self.update_every) == 0:

                    if self.d4pg:
                        # Predicts the action for the actor networks loss calculation
                        predicted_action = self.actor_active(states)
                        # Predict the value distribution using the critic with regards to action predicted by actor
                        probs = self.critic_active(states, predicted_action)
                        # Multiply probabilities by atom values and sum across columns to get Q values
                        expected_reward = (probs * atoms).sum(-1)
                        # Calculate the actor network loss (Policy Gradient)
                        # Get the negative of the mean across the expected rewards to do gradient ascent
                        actor_loss = -expected_reward.mean()
                    else:
                        actions_pred = self.actor_active(states)
                        actor_loss = -self.critic_active(states,
                                                         actions_pred).mean()

                    # Execute gradient ascent for the actor
                    self.actor_optimizer.zero_grad()
                    actor_loss.backward()
                    self.actor_optimizer.step()
                    actor_loss = actor_loss.item()

        # Updates the target networks every n steps
        if self.t_step % self.update_target_every == 0:
            self._update_target_networks()

        # Returns the actor and critic losses to store on tensorboard
        return actor_loss, critic_loss

    def _get_targets(self, rewards, next_states):
        """
        Calculate Yᵢ from target networks using the target actor and 
        and distributed critic networks
        """

        target_actions = self.actor_target(next_states)
        target_probs = self.critic_target(next_states, target_actions)

        # Project the categorical distribution
        projected_probs = self._get_value_distribution(rewards, target_probs)
        return projected_probs

    def _get_value_distribution(self, rewards, probs):
        """
        Returns the projected value distribution for the input state/action pair
        """

        delta_z = (self.v_max - self.v_min) / (self.num_atoms - 1)

        # Rewards were stored with the first reward followed by each of the discounted rewards, sum up the
        # reward with its discounted reward
        projected_atoms = rewards.unsqueeze(
            -1
        ) + self.gamma**self.memory.rollout_length * self.atoms.unsqueeze(0)
        projected_atoms.clamp_(self.v_min, self.v_max)
        b = (projected_atoms - self.v_min) / delta_z

        # Professional level GPUs have floating point math that is more accurate
        # to the n'th degree than traditional GPUs. This might be due to binary
        # imprecision resulting in 99.000000001 ceil() rounding to 100 instead of 99.
        # According to sources, forcibly reducing the precision seems to be the only
        # solution to the problem. Luckily it doesn't result in any complications to
        # the accuracy of calculating the lower and upper bounds correctly
        precision = 1
        b = torch.round(b * 10**precision) / 10**precision
        lower_bound = b.floor()
        upper_bound = b.ceil()

        m_lower = (upper_bound +
                   (lower_bound == upper_bound).float() - b) * probs
        m_upper = (b - lower_bound) * probs

        projected_probs = torch.tensor(np.zeros(probs.size())).to(self.device)

        for idx in range(probs.size(0)):
            projected_probs[idx].index_add_(0, lower_bound[idx].long(),
                                            m_lower[idx].double())
            projected_probs[idx].index_add_(0, upper_bound[idx].long(),
                                            m_upper[idx].double())
        return projected_probs.float()
Пример #44
0
def subscribe(tenant,
              actor_id,
              worker_id,
              api_server,
              client_id,
              client_secret,
              access_token,
              refresh_token,
              worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    logger.debug("Top of subscribe().")
    actor_ch = ActorMsgChannel(actor_id)

    try:
        leave_containers = Config.get('workers', 'leave_containers')
    except configparser.NoOptionError:
        logger.info("No leave_containers value configured.")
        leave_containers = False
    if hasattr(leave_containers, 'lower'):
        leave_containers = leave_containers.lower() == "true"
    logger.info("leave_containers: {}".format(leave_containers))

    try:
        mem_limit = Config.get('workers', 'mem_limit')
    except configparser.NoOptionError:
        logger.info("No mem_limit value configured.")
        mem_limit = "-1"
    mem_limit = str(mem_limit)

    try:
        max_cpus = Config.get('workers', 'max_cpus')
    except configparser.NoOptionError:
        logger.info("No max_cpus value configured.")
        max_cpus = "-1"

    logger.info("max_cpus: {}".format(max_cpus))

    ag = None
    if api_server and client_id and client_secret and access_token and refresh_token:
        logger.info("Creating agave client.")
        verify = get_tenant_verify(tenant)
        ag = Agave(api_server=api_server,
                   token=access_token,
                   refresh_token=refresh_token,
                   api_key=client_id,
                   api_secret=client_secret,
                   verify=verify)
    else:
        logger.info("Not creating agave client.")
    logger.info("Starting the process worker channel thread.")
    t = threading.Thread(target=process_worker_ch, args=(tenant, worker_ch, actor_id, worker_id, actor_ch, ag))
    t.start()
    logger.info("Worker subscribing to actor channel.")

    # keep track of whether we need to update the worker's status back to READY; otherwise, we
    # will hit redis with an UPDATE every time the subscription loop times out (i.e., every 2s)
    update_worker_status = True

    # shared global tracking whether this worker should keep running; shared between this thread and
    # the "worker channel processing" thread.
    global keep_running

    # main subscription loop -- processing messages from actor's mailbox
    while keep_running:
        if update_worker_status:
            Worker.update_worker_status(actor_id, worker_id, READY)
            update_worker_status = False
        try:
            msg, msg_obj = actor_ch.get_one()
        except channelpy.ChannelClosedException:
            logger.info("Channel closed, worker exiting...")
            keep_running = False
            sys.exit()
        logger.info("worker {} processing new msg.".format(worker_id))

        try:
            Worker.update_worker_status(actor_id, worker_id, BUSY)
        except Exception as e:
            logger.error("unexpected exception from call to update_worker_status. Nacking message."
                         "actor_id: {}; worker_id: {}; status: {}; exception: {}".format(actor_id,
                                                                                         worker_id,
                                                                                         BUSY,
                                                                                         e))
            msg_obj.nack(requeue=True)
            raise e
        update_worker_status = True
        logger.info("Received message {}. Starting actor container...".format(msg))
        # the msg object is a dictionary with an entry called message and an arbitrary
        # set of k:v pairs coming in from the query parameters.
        message = msg.pop('message', '')
        try:
            actor = Actor.from_db(actors_store[actor_id])
            execution_id = msg['_abaco_execution_id']
            content_type = msg['_abaco_Content_Type']
            mounts = actor.mounts
            logger.debug("actor mounts: {}".format(mounts))
        except Exception as e:
            logger.error("unexpected exception retrieving actor, execution, content-type, mounts. Nacking message."
                         "actor_id: {}; worker_id: {}; status: {}; exception: {}".format(actor_id,
                                                                                         worker_id,
                                                                                         BUSY,
                                                                                         e))
            msg_obj.nack(requeue=True)
            raise e

        # for results, create a socket in the configured directory.
        try:
            socket_host_path_dir = Config.get('workers', 'socket_host_path_dir')
        except (configparser.NoSectionError, configparser.NoOptionError) as e:
            logger.error("No socket_host_path configured. Cannot manage results data. Nacking message")
            Actor.set_status(actor_id, ERROR, msg="Abaco instance not configured for results data.")
            msg_obj.nack(requeue=True)
            raise e
        socket_host_path = '{}.sock'.format(os.path.join(socket_host_path_dir, worker_id, execution_id))
        logger.info("Create socket at path: {}".format(socket_host_path))
        # add the socket as a mount:
        mounts.append({'host_path': socket_host_path,
                       'container_path': '/_abaco_results.sock',
                       'format': 'ro'})
        # for binary data, create a fifo in the configured directory. The configured
        # fifo_host_path_dir is equal to the fifo path in the worker container:
        fifo_host_path = None
        if content_type == 'application/octet-stream':
            try:
                fifo_host_path_dir = Config.get('workers', 'fifo_host_path_dir')
            except (configparser.NoSectionError, configparser.NoOptionError) as e:
                logger.error("No fifo_host_path configured. Cannot manage binary data.")
                Actor.set_status(actor_id, ERROR, msg="Abaco instance not configured for binary data. Nacking message.")
                msg_obj.nack(requeue=True)
                raise e
            fifo_host_path = os.path.join(fifo_host_path_dir, worker_id, execution_id)
            try:
                os.mkfifo(fifo_host_path)
                logger.info("Created fifo at path: {}".format(fifo_host_path))
            except Exception as e:
                logger.error("Could not create fifo_path. Nacking message. Exception: {}".format(e))
                msg_obj.nack(requeue=True)
                raise e
            # add the fifo as a mount:
            mounts.append({'host_path': fifo_host_path,
                           'container_path': '/_abaco_binary_data',
                           'format': 'ro'})

        # the execution object was created by the controller, but we need to add the worker id to it now that we
        # know which worker will be working on the execution.
        logger.debug("Adding worker_id to execution.")
        try:
            Execution.add_worker_id(actor_id, execution_id, worker_id)
        except Exception as e:
            logger.error("Unexpected exception adding working_id to the Execution. Nacking message. Exception: {}".format(e))
            msg_obj.nack(requeue=True)
            raise e

        # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted.
        privileged = False
        if type(actor['privileged']) == bool and actor['privileged']:
            privileged = True
        logger.debug("privileged: {}".format(privileged))

        # overlay resource limits if set on actor:
        if actor.mem_limit:
            mem_limit = actor.mem_limit
        if actor.max_cpus:
            max_cpus = actor.max_cpus

        # retrieve the default environment registered with the actor.
        environment = actor['default_environment']
        logger.debug("Actor default environment: {}".format(environment))

        # construct the user field from the actor's uid and gid:
        user = get_container_user(actor)
        logger.debug("Final user valiue: {}".format(user))
        # overlay the default_environment registered for the actor with the msg
        # dictionary
        environment.update(msg)
        environment['_abaco_access_token'] = ''
        environment['_abaco_actor_dbid'] = actor_id
        environment['_abaco_actor_id'] = actor.id
        environment['_abaco_worker_id'] = worker_id
        environment['_abaco_container_repo'] = actor.image
        environment['_abaco_actor_state'] = actor.state
        environment['_abaco_actor_name'] = actor.name or 'None'
        logger.debug("Overlayed environment: {}".format(environment))

        # if we have an agave client, get a fresh set of tokens:
        if ag:
            try:
                ag.token.refresh()
                token = ag.token.token_info['access_token']
                environment['_abaco_access_token'] = token
                logger.info("Refreshed the tokens. Passed {} to the environment.".format(token))
            except Exception as e:
                logger.error("Got an exception trying to get an access token. Stoping worker and nacking message. "
                             "Exception: {}".format(e))
                msg_obj.nack(requeue=True)
                raise e

        else:
            logger.info("Agave client `ag` is None -- not passing access token.")
        logger.info("Passing update environment: {}".format(environment))
        try:
            stats, logs, final_state, exit_code, start_time = execute_actor(actor_id,
                                                                            worker_id,
                                                                            execution_id,
                                                                            image,
                                                                            message,
                                                                            user,
                                                                            environment,
                                                                            privileged,
                                                                            mounts,
                                                                            leave_containers,
                                                                            fifo_host_path,
                                                                            socket_host_path,
                                                                            mem_limit,
                                                                            max_cpus)
        except DockerStartContainerError as e:
            logger.error("Worker {} got DockerStartContainerError: {} trying to start actor for execution {}."
                         "Placing message back on queue.".format(worker_id, e, execution_id))
            # if we failed to start the actor container, we leave the worker up and re-queue the original
            # message; NOTE - we use the "low level" put() instead of put_message() because we have the
            # exact message we want to place in the queue; put_message is used by the controller to
            msg_obj.nack(requeue=True)
            logger.debug('message requeued.')
            continue
        except DockerStopContainerError as e:
            logger.error("Worker {} was not able to stop actor for execution: {}; Exception: {}. "
                         "Putting the actor in error status and shutting down workers.".format(worker_id, execution_id, e))
            Actor.set_status(actor_id, ERROR, "Error executing container: {}".format(e))
            # since the error was with stopping the actor, we will consider this message "processed"; this choice
            # could be reconsidered/changed
            msg_obj.ack()
            shutdown_workers(actor_id, delete_actor_ch=False)
            # wait for worker to be shutdown..
            time.sleep(600)
            break
        except Exception as e:
            logger.error("Worker {} got an unexpected exception trying to run actor for execution: {}."
                         "Putting the actor in error status and shutting down workers. Exception: {}; type: {}".format(worker_id, execution_id, e, type(e)))
            Actor.set_status(actor_id, ERROR, "Error executing container: {}".format(e))
            # the execute_actor function raises a DockerStartContainerError if it met an exception before starting the
            # actor container; if the container was started, then another exception should be raised. Therefore,
            # we can assume here that the container was at least started and we can ack the message.
            msg_obj.ack()
            shutdown_workers(actor_id, delete_actor_ch=False)
            # wait for worker to be shutdown..
            time.sleep(600)
            break
        # ack the message
        msg_obj.ack()

        # Add the completed stats to the execution
        logger.info("Actor container finished successfully. Got stats object:{}".format(str(stats)))
        Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats, final_state, exit_code, start_time)
        logger.info("Added execution: {}".format(execution_id))

        # Add the logs to the execution
        Execution.set_logs(execution_id, logs)
        logger.info("Added execution logs.")

        # Update the worker's last updated and last execution fields:
        try:
            Worker.update_worker_execution_time(actor_id, worker_id)
        except KeyError:
            # it is possible that this worker was sent a gracful shutdown command in the other thread
            # and that spawner has already removed this worker from the store.
            logger.info("worker {} got unexpected key error trying to update its execution time. "
                        "Worker better be shutting down! keep_running: {}".format(worker_id, keep_running))
            if keep_running:
                logger.error("worker couldn't update's its execution time but keep_running is still true!")

        logger.info("worker time stamps updated.")
Пример #45
0
def main():
    order_book_id_number = 10
    toy_data = create_toy_data(order_book_ids_number=order_book_id_number,
                               feature_number=20,
                               start="2019-05-01",
                               end="2019-12-12",
                               frequency="D")
    env = PortfolioTradingGym(data_df=toy_data,
                              sequence_window=5,
                              add_cash=True)
    env = Numpy(env)
    env = ch.envs.Logger(env, interval=1000)
    env = ch.envs.Torch(env)
    env = ch.envs.Runner(env)

    # create net
    action_size = env.action_space.shape[0]
    number_asset, seq_window, features_number = env.observation_space.shape

    input_size = features_number

    actor = Actor(input_size=input_size,
                  hidden_size=50,
                  action_size=action_size)
    critic = Critic(input_size=input_size,
                    hidden_size=50,
                    action_size=action_size)

    target_actor = create_target_network(actor)
    target_critic = create_target_network(critic)
    actor_optimiser = optim.Adam(actor.parameters(), lr=LEARNING_RATE_ACTOR)
    critic_optimiser = optim.Adam(critic.parameters(), lr=LEARNING_RATE_CRITIC)
    replay = ch.ExperienceReplay()
    ou_noise = OrnsteinUhlenbeckNoise(mu=np.zeros(action_size))

    def get_action(state):
        action = actor(state)
        action = action + ou_noise()[0]
        return action

    def get_random_action(state):
        action = torch.softmax(torch.randn(action_size), dim=0)
        return action

    for step in range(1, MAX_STEPS + 1):
        with torch.no_grad():

            if step < UPDATE_START:
                replay += env.run(get_random_action, steps=1)
            else:
                replay += env.run(get_action, steps=1)

        replay = replay[-REPLAY_SIZE:]
        if step > UPDATE_START and step % UPDATE_INTERVAL == 0:
            sample = random.sample(replay, BATCH_SIZE)
            batch = ch.ExperienceReplay(sample)

            next_values = target_critic(batch.next_state(),
                                        target_actor(batch.next_state())).view(
                                            -1, 1)
            values = critic(batch.state(), batch.action()).view(-1, 1)
            rewards = ch.normalize(batch.reward())
            #rewards = batch.reward()/100.0   change the convergency a lot
            value_loss = ch.algorithms.ddpg.state_value_loss(
                values, next_values.detach(), rewards, batch.done(), DISCOUNT)
            critic_optimiser.zero_grad()
            value_loss.backward()
            critic_optimiser.step()

            # Update policy by one step of gradient ascent
            policy_loss = -critic(batch.state(), actor(batch.state())).mean()
            actor_optimiser.zero_grad()
            policy_loss.backward()
            actor_optimiser.step()

            # Update target networks
            ch.models.polyak_average(target_critic, critic, POLYAK_FACTOR)
            ch.models.polyak_average(target_actor, actor, POLYAK_FACTOR)
Пример #46
0
    def put(self, actor_id):
        logger.debug("top of PUT /actors/{}".format(actor_id))
        dbid = Actor.get_dbid(g.tenant, actor_id)
        try:
            actor = Actor.from_db(actors_store[dbid])
        except KeyError:
            logger.debug("did not find actor {} in store.".format(dbid))
            raise ResourceError(
                "No actor found with id: {}.".format(actor_id), 404)
        previous_image = actor.image
        previous_status = actor.status
        previous_owner = actor.owner
        args = self.validate_put(actor)
        logger.debug("PUT args validated successfully.")
        args['tenant'] = g.tenant
        # user can force an update by setting the force param:
        update_image = args.get('force')
        if not update_image and args['image'] == previous_image:
            logger.debug("new image is the same and force was false. not updating actor.")
            logger.debug("Setting status to the actor's previous status which is: {}".format(previous_status))
            args['status'] = previous_status
        else:
            update_image = True
            args['status'] = SUBMITTED
            logger.debug("new image is different. updating actor.")
        args['api_server'] = g.api_server

        # we do not allow a PUT to override the owner in case the PUT is issued by another user
        args['owner'] = previous_owner
        use_container_uid = args.get('use_container_uid')
        if Config.get('web', 'case') == 'camel':
            use_container_uid = args.get('useContainerUid')
        try:
            use_tas = Config.get('workers', 'use_tas_uid')
        except configparser.NoOptionError:
            logger.debug("no use_tas_uid config.")
            use_tas = False
        if hasattr(use_tas, 'lower'):
            use_tas = use_tas.lower() == 'true'
        else:
            logger.error("use_tas_uid configured but not as a string. use_tas_uid: {}".format(use_tas))
        logger.debug("use_tas={}. user_container_uid={}".format(use_tas, use_container_uid))
        if use_tas and not use_container_uid:
            uid, gid, tasdir = get_tas_data(g.user, g.tenant)
            if uid and gid:
                args['uid'] = uid
                args['gid'] = gid
            if tasdir:
                args['tasdir'] = tasdir
        args['mounts'] = get_all_mounts(args)
        args['last_update_time'] = get_current_utc_time()
        logger.debug("update args: {}".format(args))
        actor = Actor(**args)
        actors_store[actor.db_id] = actor.to_db()
        logger.info("updated actor {} stored in db.".format(actor_id))
        if update_image:
            worker_ids = [Worker.request_worker(tenant=g.tenant, actor_id=actor.db_id)]
            ch = CommandChannel()
            ch.put_cmd(actor_id=actor.db_id, worker_ids=worker_ids, image=actor.image, tenant=args['tenant'])
            ch.close()
            logger.debug("put new command on command channel to update actor.")
        # put could have been issued by a user with
        if not previous_owner == g.user:
            set_permission(g.user, actor.db_id, UPDATE)
        return ok(result=actor.display(),
                  msg="Actor updated successfully.")
Пример #47
0
def run(seed, noise_type, layer_norm, **kwargs):
    """Configure things."""
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0: logger.set_level(logger.DISABLED)
    """Create Simulation envs."""
    env = PegintoHoles()
    """Create True envs"""
    # env = Env_robot_control()
    """Parse noise_type"""
    action_noise = None
    param_noise = None
    nb_actions = env.action_dim

    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(0.2) * np.ones(nb_actions))
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))
    """Configure components."""
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_dim,
                    observation_shape=env.state_dim)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)
    """Seed everything to make things reproducible."""
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    """Disable logging to avoid noise."""
    start_time = time.time()
    """Train the model"""
    training.train(env=env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   **kwargs)
    """Eval the result"""
    logger.info('total runtime: {}s'.format(time.time() - start_time))
Пример #48
0
    def post(self, actor_id):
        def get_hypermedia(actor, exc):
            return {'_links': {'self': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc),
                               'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner),
                               'messages': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id)},}

        logger.debug("top of POST /actors/{}/messages.".format(actor_id))
        dbid = Actor.get_dbid(g.tenant, actor_id)
        try:
            Actor.from_db(actors_store[dbid])
        except KeyError:
            logger.debug("did not find actor: {}.".format(actor_id))
            raise ResourceError("No actor found with id: {}.".format(actor_id), 404)
        args = self.validate_post()
        d = {}
        # build a dictionary of k:v pairs from the query parameters, and pass a single
        # additional object 'message' from within the post payload. Note that 'message'
        # need not be JSON data.
        logger.debug("POST body validated. actor: {}.".format(actor_id))
        for k, v in request.args.items():
            if k == 'message':
                continue
            d[k] = v
        logger.debug("extra fields added to message from query parameters: {}.".format(d))
        if hasattr(g, 'user'):
            d['_abaco_username'] = g.user
            logger.debug("_abaco_username: {} added to message.".format(g.user))
        if hasattr(g, 'api_server'):
            d['_abaco_api_server'] = g.api_server
            logger.debug("_abaco_api_server: {} added to message.".format(g.api_server))
        # if hasattr(g, 'jwt'):
        #     d['_abaco_jwt'] = g.jwt
        # if hasattr(g, 'jwt_server'):
        #     d['_abaco_jwt_server'] = g.jwt_server
        if hasattr(g, 'jwt_header_name'):
            d['_abaco_jwt_header_name'] = g.jwt_header_name
            logger.debug("abaco_jwt_header_name: {} added to message.".format(g.jwt_header_name))

        # create an execution
        exc = Execution.add_execution(dbid, {'cpu': 0,
                                             'io': 0,
                                             'runtime': 0,
                                             'status': SUBMITTED,
                                             'executor': g.user})
        logger.info("Execution {} added for actor {}".format(exc, actor_id))
        d['_abaco_execution_id'] = exc
        d['_abaco_Content_Type'] = args.get('_abaco_Content_Type', '')
        logger.debug("Final message dictionary: {}".format(d))
        ch = ActorMsgChannel(actor_id=dbid)
        ch.put_msg(message=args['message'], d=d)
        ch.close()
        logger.debug("Message added to actor inbox. id: {}.".format(actor_id))
        # make sure at least one worker is available
        actor = Actor.from_db(actors_store[dbid])
        actor.ensure_one_worker()
        logger.debug("ensure_one_worker() called. id: {}.".format(actor_id))
        if args.get('_abaco_Content_Type') == 'application/octet-stream':
            result = {'execution_id': exc, 'msg': 'binary - omitted'}
        else:
            result={'execution_id': exc, 'msg': args['message']}
        result.update(get_hypermedia(actor, exc))
        case = Config.get('web', 'case')
        if not case == 'camel':
            return ok(result)
        else:
            return ok(dict_to_camel(result))
Пример #49
0
    parser.add_argument('--demo-length',type=int, default=sys.maxsize, help='number of demo episodes to run')
    parser.add_argument('--distance', action='store_true', help='shows model with the distance version of per')
    parser.add_argument('--impact', action='store_true', help='shows model with the impact version of per')
    args = parser.parse_args()
    if args.distance:
        model_path = os.path.join("models/HandManipulateBlock-v0/distance/" "model.pt")
    elif args.impact:
        model_path = os.path.join("models/HandManipulateBlock-v0/impact/","model.pt")
    else:
        model_path = os.path.join("models/HandManipulateBlock-v0/normal/","model.pt")

    env = gym.make('HandManipulateBlock-v0')
    env_params = get_params(env)
    
    mean_obs, std_obs, mean_g, std_g, model = torch.load(model_path, map_location=lambda storage, loc: storage)
    agent = Actor(env_params, 256)
    agent.load_state_dict(model)

    for __ in range(args.demo_length):
        state = env.reset()
        state = Normalize(state, mean_obs, std_obs, mean_g, std_g)
        for _ in range (env._max_episode_steps):
            env.render()
            with torch.no_grad():
                action = agent.forward(state)
            action = action.detach().numpy().squeeze()
            new_state, reward, _, info = env.step(action)
            new_state = Normalize(new_state, mean_obs, std_obs, mean_g, std_g)
            state = new_state

Пример #50
0
    def check_metrics(self, actor_ids):
        for actor_id in actor_ids:
            logger.debug("TOP OF CHECK METRICS")

            query = {
                'query': 'message_count_for_actor_{}'.format(actor_id.decode("utf-8").replace('-', '_')),
                'time': datetime.datetime.utcnow().isoformat() + "Z"
            }
            r = requests.get(PROMETHEUS_URL + '/api/v1/query', params=query)
            data = json.loads(r.text)['data']['result']

            change_rate = 0
            try:
                previous_data = last_metric[actor_id]
                try:
                    change_rate = int(data[0]['value'][1]) - int(previous_data[0]['value'][1])
                except:
                    logger.debug("Could not calculate change rate.")
            except:
                logger.info("No previous data yet for new actor {}".format(actor_id))

            last_metric.update({actor_id: data})
            # Add a worker if message count reaches a given number
            try:
                logger.debug("METRICS current message count: {}".format(data[0]['value'][1]))
                if int(data[0]['value'][1]) >= 1:
                    tenant, aid = actor_id.decode('utf8').split('_')
                    logger.debug('METRICS Attempting to create a new worker for {}'.format(actor_id))
                    try:
                        # create a worker & add to this actor
                        actor = Actor.from_db(actors_store[actor_id])
                        worker_ids = [Worker.request_worker(tenant=tenant, actor_id=aid)]
                        logger.info("New worker id: {}".format(worker_ids[0]))
                        ch = CommandChannel()
                        ch.put_cmd(actor_id=actor.db_id,
                                   worker_ids=worker_ids,
                                   image=actor.image,
                                   tenant=tenant,
                                   num=1,
                                   stop_existing=False)
                        ch.close()
                        logger.debug('METRICS Added worker successfully for {}'.format(actor_id))
                    except Exception as e:
                        logger.debug("METRICS - SOMETHING BROKE: {} - {} - {}".format(type(e), e, e.args))
                elif int(data[0]['value'][1]) <= 1:
                    logger.debug("METRICS made it to scale down block")
                    # Check the number of workers for this actor before deciding to scale down
                    workers = Worker.get_workers(actor_id)
                    logger.debug('METRICS NUMBER OF WORKERS: {}'.format(len(workers)))
                    try:
                        if len(workers) == 1:
                            logger.debug("METRICS only one worker, won't scale down")
                        else:
                            while len(workers) > 0:
                                logger.debug('METRICS made it STATUS check')
                                worker = workers.popitem()[1]
                                logger.debug('METRICS SCALE DOWN current worker: {}'.format(worker['status']))
                                # check status of the worker is ready
                                if worker['status'] == 'READY':
                                    logger.debug("METRICS I MADE IT")
                                    # scale down
                                    try:
                                        shutdown_worker(worker['id'])
                                        continue
                                    except Exception as e:
                                        logger.debug('METRICS ERROR shutting down worker: {} - {} - {}'.format(type(e), e, e.args))
                                    logger.debug('METRICS shut down worker {}'.format(worker['id']))

                    except IndexError:
                        logger.debug('METRICS only one worker found for actor {}. '
                                     'Will not scale down'.format(actor_id))
                    except Exception as e:
                        logger.debug("METRICS SCALE UP FAILED: {}".format(e))


            except Exception as e:
                logger.debug("METRICS - ANOTHER ERROR: {} - {} - {}".format(type(e), e, e.args))
Пример #51
0
class Agent:
    def __init__(self, input_dims, alpha=0.001, beta=0.002, env=None, gamma=0.99,
                n_actions=2, max_size=1000000, tau=0.005, hd1=400, hd2=300, 
                batch_size=64, noise=0.1):
        self.gamma = gamma
        self.tau = tau
        self.batch_size = batch_size
        self.n_actions = n_actions
        self.noise = noise
        self.memory = MemoryBuffer(max_size)
        self.max_action = env.action_space.high[0]
        self.min_action = env.action_space.low[0]

        self.actor = Actor(n_actions=n_actions)
        self.critic = Critic()
        self.target_actor = Actor(n_actions=n_actions)
        self.target_critic = Critic()

        self.actor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))
        self.critic.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=beta))
        self.target_actor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))
        self.target_critic.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))

        self.update_weights()

    def remember(self, state, action, reward, next_state, done):
        self.memory.add(state, action, reward, next_state, done)

    def train(self):
        cl, al = self.learn()
        if cl is not None:
            self.update_weights()

        return cl, al

    def update_weights(self, tau=None):
        if tau is None:
            tau = self.tau

        weights = []
        targets = self.target_actor.weights
        for i, weight in enumerate(self.actor.weights):
            weights.append(weight * tau + targets[i] * (1 - tau))
        self.target_actor.set_weights(weights)

        weights = []
        targets = self.target_critic.weights
        for i, weight in enumerate(self.critic.weights):
            weights.append(weight * tau + targets[i] * (1 - tau))
        self.target_critic.set_weights(weights)

    def choose_action(self, observation, evaluate=False):
        state = tf.convert_to_tensor([observation], dtype=tf.float32)
        actions = self.actor(state)

        if not evaluate:
            actions += tf.random.normal(shape=[self.n_actions], mean=0.0, stddev=self.noise)
        
        actions = tf.clip_by_value(actions, self.min_action, self.max_action)

        return actions[0]

    # @tf.function
    def learn(self):
        if len(self.memory) < self.batch_size:
            return None, None

        states, actions, rewards, next_states, done = self.memory.sample(self.batch_size)

        states = tf.convert_to_tensor(states, dtype=tf.float32)
        actions = tf.convert_to_tensor(actions, dtype=tf.float32)
        rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)
        next_states = tf.convert_to_tensor(next_states, dtype=tf.float32)

        with tf.GradientTape() as tape:
            target_actions = self.target_actor(next_states)
            critic_value_ = tf.squeeze(self.target_critic(next_states, target_actions), 1)
            critic_value = tf.squeeze(self.critic(states, actions), 1)
            target = rewards + self.gamma * critic_value_ * (1 - done)
            critic_loss = tf.keras.losses.MSE(target, critic_value)

        critic_gradient = tape.gradient(critic_loss, self.critic.trainable_variables)
        self.critic.optimizer.apply_gradients(
            zip(critic_gradient, self.critic.trainable_variables)
        )

        with tf.GradientTape() as tape:
            new_policy_actions = self.actor(states)
            actor_loss = -self.critic(states, new_policy_actions)
            actor_loss = tf.math.reduce_mean(actor_loss)
            
        actor_gradient = tape.gradient(actor_loss, self.actor.trainable_variables)
        self.actor.optimizer.apply_gradients(
            zip(actor_gradient, self.actor.trainable_variables)
        )

        self.update_weights()
        
Пример #52
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.

    # Create envs.
    env = gym.make(env_id)
    env = bench.Monitor(env, os.path.join(logger.get_dir(), '0'))

    if evaluation:
        eval_env = gym.make(env_id)
        eval_env = bench.Monitor(eval_env,
                                 os.path.join(logger.get_dir(), 'gym_eval'))
        #env = bench.Monitor(env, None)
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed
    logger.info('seed={}, logdir={}'.format(seed, logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    start_time = time.time()
    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    logger.info('total runtime: {}s'.format(time.time() - start_time))
Пример #53
0
from collections import deque
import random
import torch
from torch import optim
from tqdm import tqdm
from hyperparams import ACTION_NOISE, DISCOUNT, HIDDEN_SIZE, LEARNING_RATE, MAX_STEPS, POLICY_DELAY, POLYAK_FACTOR, REPLAY_SIZE, TARGET_ACTION_NOISE, TARGET_ACTION_NOISE_CLIP, UPDATE_INTERVAL, UPDATE_START
from hyperparams import OFF_POLICY_BATCH_SIZE as BATCH_SIZE
from env import Env
from models import Actor, Critic, create_target_network, update_target_network
from utils import plot

env = Env()
actor = Actor(HIDDEN_SIZE)
critic_1 = Critic(HIDDEN_SIZE, state_action=True)
critic_2 = Critic(HIDDEN_SIZE, state_action=True)
target_actor = create_target_network(actor)
target_critic_1 = create_target_network(critic_1)
target_critic_2 = create_target_network(critic_2)
actor_optimiser = optim.Adam(actor.parameters(), lr=LEARNING_RATE)
critics_optimiser = optim.Adam(list(critic_1.parameters()) +
                               list(critic_2.parameters()),
                               lr=LEARNING_RATE)
D = deque(maxlen=REPLAY_SIZE)

state, done, total_reward = env.reset(), False, 0
pbar = tqdm(range(1, MAX_STEPS + 1), unit_scale=1, smoothing=0)
for step in pbar:
    with torch.no_grad():
        if step < UPDATE_START:
            # To improve exploration take actions sampled from a uniform random distribution over actions at the start of training
            action = torch.tensor([[2 * random.random() - 1]])
Пример #54
0
def main(worker_id, image):
    """
    Main function for the worker process.

    This function
    """
    logger.info("Entering main() for worker: {}, image: {}".format(
        worker_id, image))
    spawner_worker_ch = SpawnerWorkerChannel(worker_id=worker_id)

    # first, attempt to pull image from docker hub:
    try:
        logger.info("Worker pulling image {}...".format(image))
        pull_image(image)
    except DockerError as e:
        # return a message to the spawner that there was an error pulling image and abort
        # this is not necessarily an error state: the user simply could have provided an
        # image name that does not exist in the registry. This is the first time we would
        # find that out.
        logger.info(
            "worker got a DockerError trying to pull image. Error: {}.".format(
                e))
        spawner_worker_ch.put({'status': 'error', 'msg': str(e)})
        raise e
    logger.info("Image {} pulled successfully.".format(image))

    # inform spawner that image pulled successfully and, simultaneously,
    # wait to receive message from spawner that it is time to subscribe to the actor channel
    logger.debug("Worker waiting on message from spawner...")
    result = spawner_worker_ch.put_sync({'status': 'ok'})
    logger.info(
        "Worker received reply from spawner. result: {}.".format(result))

    # should be OK to close the spawner_worker_ch on the worker side since spawner was first client
    # to open it.
    spawner_worker_ch.close()

    if result['status'] == 'error':
        # we do not expect to get an error response at this point. this needs investigation
        logger.error(
            "Worker received error message from spawner: {}. Quiting...".
            format(str(result)))
        raise WorkerException(str(result))

    actor_id = result.get('actor_id')
    tenant = result.get('tenant')
    logger.info(
        "Worker received ok from spawner. Message: {}, actor_id:{}".format(
            result, actor_id))
    api_server = None
    client_id = None
    client_secret = None
    access_token = None
    refresh_token = None
    if result.get('client') == 'yes':
        logger.info("Got client: yes, result: {}".format(result))
        api_server = result.get('api_server')
        client_id = result.get('client_id')
        client_secret = result.get('client_secret')
        access_token = result.get('access_token')
        refresh_token = result.get('refresh_token')
    else:
        logger.info("Did not get client:yes, got result:{}".format(result))
    try:
        Actor.set_status(actor_id, READY, status_message=" ")
    except KeyError:
        # it is possible the actor was already deleted during worker start up; if
        # so, the worker should have a stop message waiting for it. starting subscribe
        # as usual should allow this process to work as expected.
        pass
    logger.info("Actor status set to READY. subscribing to inbox.")
    worker_ch = WorkerChannel(worker_id=worker_id)
    subscribe(tenant, actor_id, worker_id, api_server, client_id,
              client_secret, access_token, refresh_token, worker_ch)
Пример #55
0
    def __init__(self, state_dim, action_dim, max_action, memory, args):

        # misc
        self.criterion = nn.MSELoss()
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.max_action = max_action
        self.memory = memory
        self.n = args.n_actor

        # actor
        self.actors = [
            Actor(state_dim,
                  action_dim,
                  max_action,
                  layer_norm=args.layer_norm) for i in range(self.n)
        ]
        self.actors_target = [
            Actor(state_dim,
                  action_dim,
                  max_action,
                  layer_norm=args.layer_norm) for i in range(self.n)
        ]
        self.actors_optimizer = [
            torch.optim.Adam(self.actors[i].parameters(), lr=args.actor_lr)
            for i in range(self.n)
        ]

        for i in range(self.n):
            self.actors_target[i].load_state_dict(self.actors[i].state_dict())

        # critic
        self.critic = CriticTD3(state_dim,
                                action_dim,
                                layer_norm=args.layer_norm)
        self.critic_target = CriticTD3(state_dim,
                                       action_dim,
                                       layer_norm=args.layer_norm)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 lr=args.critic_lr)

        # cuda
        if torch.cuda.is_available():
            for i in range(self.n):
                self.actors[i] = self.actors[i].cuda()
                self.actors_target[i] = self.actors_target[i].cuda()
            self.critic = self.critic.cuda()
            self.critic_target = self.critic_target.cuda()

        # shared memory
        for i in range(self.n):
            self.actors[i].share_memory()
            self.actors_target[i].share_memory()
        self.critic.share_memory()
        self.critic_target.share_memory()

        # hyper-parameters
        self.tau = args.tau
        self.discount = args.discount
        self.batch_size = args.batch_size
        self.policy_noise = args.policy_noise
        self.noise_clip = args.noise_clip
        self.policy_freq = args.policy_freq
Пример #56
0
class DDPGAgent:
    def __init__(self, env, gamma, tau, buffer_maxlen, batch_size,
                 critic_learning_rate, actor_learning_rate, update_per_step,
                 seed):
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # hyperparameters
        self.num_replay_updates_per_step = update_per_step
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau

        # initialize actor and critic networks
        self.critic = Critic(env.observation_space.shape[0],
                             env.action_space.shape[0], seed).to(self.device)
        self.critic_target = Critic(env.observation_space.shape[0],
                                    env.action_space.shape[0],
                                    seed).to(self.device)

        self.actor = Actor(env.observation_space.shape[0],
                           env.action_space.shape[0], seed).to(self.device)
        self.actor_target = Actor(env.observation_space.shape[0],
                                  env.action_space.shape[0],
                                  seed).to(self.device)

        # optimizers
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=critic_learning_rate)
        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=actor_learning_rate)

        self.buffer = ReplayBuffer(buffer_maxlen, batch_size, seed)
        self.noise = OUNoise(env.action_space.shape[0])

    def get_action(self, state):
        state = torch.FloatTensor(state).to(self.device)
        self.actor.eval()
        with torch.no_grad():
            action = self.actor(state)
        self.actor.train()

        action = action.cpu().numpy()
        return action

    def step(self, state, action, reward, next_state, done):
        # Save experience in replay buffer
        self.buffer.add(state, action, reward, next_state, done)

        q_loss, policy_loss = None, None
        # If enough samples are available in buffer, get random subset and learn
        if len(self.buffer) >= self.batch_size:
            # update the network "num_replay_updates_per_step" times in each step
            for _ in range(self.num_replay_updates_per_step):
                experiences = self.buffer.sample()
                q_loss, policy_loss = self.learn(experiences)
                q_loss = q_loss.detach().item()
                policy_loss = policy_loss.detach().item()

        return q_loss, policy_loss

    def learn(self, experiences):
        """Updating actor and critic parameters based on sampled experiences from replay buffer."""
        states, actions, rewards, next_states, dones = experiences

        curr_Q = self.critic(states, actions)
        next_actions = self.actor_target(next_states).detach()
        next_Q = self.critic_target(next_states, next_actions).detach()
        target_Q = rewards + self.gamma * next_Q * (1 - dones)

        # losses
        q_loss = F.mse_loss(curr_Q, target_Q)
        policy_loss = -self.critic(states, self.actor(states)).mean()

        # update actor
        self.actor_optimizer.zero_grad()
        policy_loss.backward()
        self.actor_optimizer.step()

        # update critic
        self.critic_optimizer.zero_grad()
        q_loss.backward()
        self.critic_optimizer.step()

        # update target networks
        for target_param, param in zip(self.actor_target.parameters(),
                                       self.actor.parameters()):
            target_param.data.copy_(param.data * self.tau + target_param.data *
                                    (1.0 - self.tau))

        for target_param, param in zip(self.critic_target.parameters(),
                                       self.critic.parameters()):
            target_param.data.copy_(param.data * self.tau + target_param.data *
                                    (1.0 - self.tau))
        return q_loss, policy_loss
Пример #57
0
def subscribe(tenant, actor_id, worker_id, api_server, client_id,
              client_secret, access_token, refresh_token, worker_ch):
    """
    Main loop for the Actor executor worker. Subscribes to the actor's inbox and executes actor
    containers when message arrive. Also subscribes to the worker channel for future communications.
    :return:
    """
    logger.debug("Top of subscribe().")
    actor_ch = ActorMsgChannel(actor_id)
    try:
        leave_containers = Config.get('workers', 'leave_containers')
    except configparser.NoOptionError:
        leave_containers = False
    ag = None
    if api_server and client_id and client_secret and access_token and refresh_token:
        logger.info("Creating agave client.")
        verify = get_tenant_verify(tenant)
        ag = Agave(api_server=api_server,
                   token=access_token,
                   refresh_token=refresh_token,
                   api_key=client_id,
                   api_secret=client_secret,
                   verify=verify)
    else:
        logger.info("Not creating agave client.")
    logger.info("Starting the process worker channel thread.")
    t = threading.Thread(target=process_worker_ch,
                         args=(tenant, worker_ch, actor_id, worker_id,
                               actor_ch, ag))
    t.start()
    logger.info("Worker subscribing to actor channel.")
    update_worker_status = True
    global keep_running
    while keep_running:
        if update_worker_status:
            Worker.update_worker_status(actor_id, worker_id, READY)
            update_worker_status = False
        try:
            msg = actor_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        except channelpy.ChannelClosedException:
            logger.info("Channel closed, worker exiting...")
            keep_running = False
            sys.exit()
        update_worker_status = True
        logger.info(
            "Received message {}. Starting actor container...".format(msg))
        # the msg object is a dictionary with an entry called message and an arbitrary
        # set of k:v pairs coming in from the query parameters.
        message = msg.pop('message', '')
        actor = Actor.from_db(actors_store[actor_id])
        execution_id = msg['_abaco_execution_id']
        content_type = msg['_abaco_Content_Type']
        mounts = actor.mounts
        logger.debug("actor mounts: {}".format(mounts))
        # for binary data, create a fifo in the configured directory. The configured
        # fifo_host_path_dir is equal to the fifo path in the worker container:
        fifo_host_path = None
        if content_type == 'application/octet-stream':
            try:
                fifo_host_path_dir = Config.get('workers',
                                                'fifo_host_path_dir')
            except (configparser.NoSectionError, configparser.NoOptionError):
                logger.error(
                    "No fifo_host_path configured. Cannot manage binary data.")
                Actor.set_status(
                    actor_id,
                    ERROR,
                    msg="Abaco instance not configured for binary data.")
                continue
            fifo_host_path = os.path.join(fifo_host_path_dir, worker_id,
                                          execution_id)
            logger.info("Create fifo at path: {}".format(fifo_host_path))
            try:
                os.mkfifo(fifo_host_path)
            except Exception as e:
                logger.error(
                    "Could not create fifo_path. Exception: {}".format(e))
                raise e
            # add the fifo as a mount:
            mounts.append({
                'host_path': fifo_host_path,
                'container_path': '/_abaco_binary_data',
                'format': 'ro'
            })

        # the execution object was created by the controller, but we need to add the worker id to it now that we
        # know which worker will be working on the execution.
        logger.debug("Adding worker_id to execution.")
        Execution.add_worker_id(actor_id, execution_id, worker_id)

        # privileged dictates whether the actor container runs in privileged mode and if docker daemon is mounted.
        privileged = False
        if type(actor['privileged']) == bool and actor['privileged']:
            privileged = True
        logger.debug("privileged: {}".format(privileged))

        # retrieve the default environment registered with the actor.
        environment = actor['default_environment']
        logger.debug("Actor default environment: {}".format(environment))

        # construct the user field from the actor's uid and gid:
        user = get_container_user(actor)
        logger.debug("Final user valiue: {}".format(user))
        # overlay the default_environment registered for the actor with the msg
        # dictionary
        environment.update(msg)
        environment['_abaco_access_token'] = ''
        environment['_abaco_actor_dbid'] = actor_id
        environment['_abaco_actor_id'] = actor.id
        environment['_abaco_actor_state'] = actor.state
        logger.debug("Overlayed environment: {}".format(environment))

        # if we have an agave client, get a fresh set of tokens:
        if ag:
            try:
                ag.token.refresh()
                token = ag.token.token_info['access_token']
                environment['_abaco_access_token'] = token
                logger.info(
                    "Refreshed the tokens. Passed {} to the environment.".
                    format(token))
            except Exception as e:
                logger.error(
                    "Got an exception trying to get an access token: {}".
                    format(e))
        else:
            logger.info(
                "Agave client `ag` is None -- not passing access token.")
        logger.info("Passing update environment: {}".format(environment))
        try:
            stats, logs, final_state, exit_code, start_time = execute_actor(
                actor_id, worker_id, worker_ch, image, message, user,
                environment, privileged, mounts, leave_containers,
                fifo_host_path)
        except DockerStartContainerError as e:
            logger.error("Got DockerStartContainerError: {}".format(e))
            Actor.set_status(actor_id, ERROR,
                             "Error executing container: {}".format(e))
            continue
        # Add the completed stats to the execution
        logger.info(
            "Actor container finished successfully. Got stats object:{}".
            format(str(stats)))
        Execution.finalize_execution(actor_id, execution_id, COMPLETE, stats,
                                     final_state, exit_code, start_time)
        logger.info("Added execution: {}".format(execution_id))

        # Add the logs to the execution
        Execution.set_logs(execution_id, logs)
        logger.info("Added execution logs.")

        # Update the worker's last updated and last execution fields:
        Worker.update_worker_execution_time(actor_id, worker_id)
        logger.info("worker time stamps updated.")
Пример #58
0
class DDPG():
    """Interacts with and learns from the environment."""

    def __init__(self, state_size, action_size, random_seed, hyper, num_agents, memory):

        self.action_size = action_size
        self.num_agents  = num_agents
    
        # Actor Network (w/ Target Network)
        self.actor_local     = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target    = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=hyper['LR_ACTOR'])

        # Critic Network (w/ Target Network)
        self.critic_local     = Critic(state_size, action_size, num_agents, random_seed).to(device)
        self.critic_target    = Critic(state_size, action_size, num_agents, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=hyper['LR_CRITIC']) #, weight_decay=hyper['WEIGHT_DECAY'])

        # Noise process
        self.noise = OUNoise(action_size, random_seed)
        self.t           = 0 
        self.memory      = memory

    def step(self, state, action, reward, next_state, done, others_states,others_actions, others_next_states):
        self.memory.add(state, action, reward, next_state, done, others_states, others_actions, others_next_states)
        self.t = (self.t + 1) % hyper['UPDATE_EVERY']
        if self.t == 0:
            if len(self.memory) > hyper['BATCH_SIZE']:
                experiences = self.memory.sample()
                self.learn(experiences, hyper['GAMMA'])

    def act(self, states, add_noise=True):
        states = torch.from_numpy(states).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            actions = self.actor_local(states).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            actions += self.noise.sample()
        return np.clip(actions, -1, 1)
    
    def reset(self):
        self.noise.reset()

    def learn(self, experiences, gamma):
        (states, actions, rewards, next_states, dones, others_states,
         others_actions, others_next_states) = experiences 
        rewards_ = rewards
        all_states = torch.cat((states, others_states), dim=1).to(device)
        all_actions = torch.cat((actions, others_actions), dim=1).to(device)
        all_next_states = torch.cat((next_states, others_next_states), dim=1).to(device)

        # --------------------------- update critic --------------------------- 
        l_all_next_actions = []
        l_all_next_actions.append(self.actor_target(states))
        l_all_next_actions.append(self.actor_target(others_states))
        all_next_actions = torch.cat(l_all_next_actions, dim=1).to(device)

        Q_targets_next = self.critic_target(all_next_states, all_next_actions) 
        Q_targets = rewards_ + (gamma * Q_targets_next * (1 - dones)) 
        Q_expected = self.critic_local(all_states, all_actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.critic_optimizer.zero_grad()
        critic_loss.backward() 
        # torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

        # --------------------------- update actor --------------------------- 
        this_actions_pred = self.actor_local(states)
        others_actions_pred = self.actor_local(others_states)
        others_actions_pred = others_actions_pred.detach()
        actions_pred = torch.cat((this_actions_pred, others_actions_pred), dim=1).to(device)
        actor_loss = -self.critic_local(all_states, actions_pred).mean()
        # Minimize the loss
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # ---------------------- update target networks ---------------------- 
        self.soft_update(self.critic_local, self.critic_target, hyper['TAU'])
        self.soft_update(self.actor_local, self.actor_target, hyper['TAU']) 
        
    def soft_update(self, local_model, target_model, tau): 
        iter_params = zip(target_model.parameters(), local_model.parameters())
        for target_param, local_param in iter_params:
            tensor_aux = tau*local_param.data + (1.0-tau)*target_param.data
            target_param.data.copy_(tensor_aux)
Пример #59
0
class TestCapstone(unittest.TestCase):
    def setUp(self):
        self.app = APP
        self.client = self.app.test_client
        database_name = "capstone_test"
        database_username = "******"
        database_password = "******"
        self.database_path = "postgresql://{}:{}@{}/{}".format(
            database_username,
            database_password,
            'localhost:5432',
            database_name)
        setup_db(self.app, self.database_path)

        with self.app.app_context():
            db.drop_all()
            db.create_all()

            self.executive_token = os.getenv("EXECUTIVE_TOKEN")
            self.director_token = os.getenv("DIRECTOR_TOKEN")
            self.assistant_token = os.getenv("ASSISTANT_TOKEN")

            self.existing_actor = Actor(name="Brad", age=45, gender="M")
            self.existing_actor.create()
            self.existing_movie = Movie(
                title="Once Upon",
                release_date="2019-10-04 19:09:33.77486")
            self.existing_movie.create()

    def tearDown(self):
        with self.app.app_context():
            db.session.rollback()
            db.session.close()

    def test_precreated_actor_exists(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        self.assertIsNotNone(actor)

    def test_precreated_movie_exists(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.77486").first()
        self.assertIsNotNone(movie)

    def test_assistant_should_get_all_actors(self):
        actor = Actor(name="Abls", age=123, gender="M")
        actor.create()
        res = self.client().get(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.assistant_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        actors = Actor.query.all()
        self.assertEqual(len(data['actors']), len(actors))

    def test_director_should_get_all_actors(self):
        actor = Actor(name="Abls", age=123, gender="M")
        actor.create()
        res = self.client().get(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.director_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        actors = Actor.query.all()
        self.assertEqual(len(data['actors']), len(actors))

    def test_executive_should_get_all_actors(self):
        actor = Actor(name="Abls", age=123, gender="M")
        actor.create()
        res = self.client().get(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        actors = Actor.query.all()
        self.assertEqual(len(data['actors']), len(actors))

    def test_assistant_should_get_all_movies(self):
        movie = Movie(
            title="Test Title",
            release_date="2012-04-23 18:25:43.511")
        movie.create()
        res = self.client().get(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.assistant_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        movies = Movie.query.all()
        self.assertEqual(len(data['movies']), len(movies))

    def test_director_should_get_all_movies(self):
        movie = Movie(
            title="Test Title",
            release_date="2012-04-23 18:25:43.511")
        movie.create()
        res = self.client().get(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.director_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        movies = Movie.query.all()
        self.assertEqual(len(data['movies']), len(movies))

    def test_executive_should_get_all_movies(self):
        movie = Movie(
            title="Test Title",
            release_date="2012-04-23 18:25:43.511")
        movie.create()
        res = self.client().get(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        movies = Movie.query.all()
        self.assertEqual(len(data['movies']), len(movies))

    def test_assistant_cant_create_actor(self):
        res = self.client().post(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.assistant_token)},
            json=sample_actor)
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 401)
        self.assertFalse(data['success'])

    def test_director_should_create_actor(self):
        res = self.client().post(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.director_token)},
            json=sample_actor)
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 201)
        self.assertTrue(data['success'])

    def test_executive_should_create_actor(self):
        res = self.client().post(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)},
            json=sample_actor)
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 201)
        self.assertTrue(data['success'])

        createdId = data['created']
        actor = Actor.query.get(createdId)
        self.assertIsNotNone(actor)
        self.assertEqual(actor.id, createdId)

    def test_incorrect_create_actor(self):
        res = self.client().post(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)})
        self.assertEqual(res.status_code, 400)
        data = json.loads(res.data)
        self.assertFalse(data['success'])

    def test_assistant_cant_create_movie(self):
        res = self.client().post(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.assistant_token)},
            json=sample_movie)
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 401)
        self.assertFalse(data['success'])

    def test_director_cant_create_movie(self):
        res = self.client().post(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.director_token)},
            json=sample_movie)
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 401)
        self.assertFalse(data['success'])

    def test_executive_should_create_movie(self):
        res = self.client().post(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)},
            json=sample_movie)
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 201)
        self.assertTrue(data['success'])

        createdId = data['created']
        movie = Movie.query.get(createdId)
        self.assertIsNotNone(movie)
        self.assertEqual(movie.id, createdId)

    def test_incorrect_create_movie(self):
        res = self.client().post(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)})
        self.assertEqual(res.status_code, 400)
        data = json.loads(res.data)
        self.assertFalse(data['success'])

    def test_assistant_cant_patch_actor(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        res = self.client().patch(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.assistant_token)},
            json=dict(
                id=actor.id))
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 401)
        self.assertFalse(data['success'])

    def test_director_should_patch_actor(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        res = self.client().patch(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.director_token)},
            json=dict(
                id=actor.id,
                name="NewName",
                age=22,
                gender="F"))
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])

        patched_actor = data['patched']
        self.assertEqual(actor.id, patched_actor.get('id'))
        self.assertEqual("NewName", patched_actor.get('name'))
        self.assertEqual(22, patched_actor.get('age'))
        self.assertEqual("F", patched_actor.get('gender'))

    def test_executive_should_patch_actor(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        res = self.client().patch(
            '/actors',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)},
            json=dict(
                id=actor.id,
                name="NewName",
                age=22,
                gender="F"))
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])

        patched_actor = data['patched']
        self.assertEqual(actor.id, patched_actor.get('id'))
        self.assertEqual("NewName", patched_actor.get('name'))
        self.assertEqual(22, patched_actor.get('age'))
        self.assertEqual("F", patched_actor.get('gender'))

    def test_assistant_cant_patch_movie(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.77486").first()
        new_title = "New Title"
        new_release_date = "2020-11-04 19:09:33.77486"
        res = self.client().patch(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.assistant_token)},
            json=dict(
                id=movie.id,
                title=new_title,
                release_date=new_release_date))
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 401)
        self.assertFalse(data['success'])

    def test_director_should_patch_movie(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.774860").first()
        new_title = "New Title"
        new_release_date = "2020-11-04 19:09:33.774860"
        res = self.client().patch(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.director_token)},
            json=dict(
                id=movie.id,
                title=new_title,
                release_date=new_release_date))
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        newMovie = Movie.query.get(movie.id)
        self.assertEqual(newMovie.title, new_title)
        self.assertEqual(newMovie.release_date.strftime(
            "%Y-%m-%d %H:%M:%S.%f"), new_release_date)

    def test_executive_should_patch_movie(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.774860").first()
        new_title = "New Title"
        new_release_date = "2020-11-04 19:09:33.774860"
        res = self.client().patch(
            '/movies',
            headers={
                "Authorization": "Bearer {}".format(
                    self.executive_token)},
            json=dict(
                id=movie.id,
                title=new_title,
                release_date=new_release_date))
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertTrue(data['success'])
        newMovie = Movie.query.get(movie.id)
        self.assertEqual(newMovie.title, new_title)
        self.assertEqual(newMovie.release_date.strftime(
            "%Y-%m-%d %H:%M:%S.%f"), new_release_date)

    def test_assistant_cant_delete_actor(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        self.assertIsNotNone(actor)
        res = self.client().delete('/actors/{}'.format(
            actor.id),
            headers={"Authorization": "Bearer {}".format(
                self.assistant_token)})
        self.assertEqual(res.status_code, 401)

    def test_director_cant_delete_actor(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        self.assertIsNotNone(actor)
        res = self.client().delete('/actors/{}'.format(
            actor.id),
            headers={"Authorization": "Bearer {}".format(
                self.director_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertEqual(data['deleted']['id'], actor.id)

    def test_executive_should_delete_actor(self):
        actor = Actor.query.filter_by(name="Brad", age=45, gender="M").first()
        self.assertIsNotNone(actor)
        res = self.client().delete('/actors/{}'.format(
            actor.id),
            headers={"Authorization": "Bearer {}".format(
                self.executive_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertEqual(data['deleted']['id'], actor.id)

    def test_assistant_cant_delete_movie(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.77486").first()
        self.assertIsNotNone(movie)
        res = self.client().delete('/movies/{}'.format(
            movie.id),
            headers={"Authorization": "Bearer {}".format(
                self.assistant_token)})
        self.assertEqual(res.status_code, 401)

    def test_director_cant_delete_movie(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.77486").first()
        self.assertIsNotNone(movie)
        res = self.client().delete('/movies/{}'.format(
            movie.id),
            headers={"Authorization": "Bearer {}".format(
                self.director_token)})
        self.assertEqual(res.status_code, 401)

    def test_executive_should_delete_movie(self):
        movie = Movie.query.filter_by(
            title="Once Upon",
            release_date="2019-10-04 19:09:33.77486").first()
        self.assertIsNotNone(movie)
        res = self.client().delete('/movies/{}'.format(
            movie.id),
            headers={"Authorization": "Bearer {}".format(
                self.executive_token)})
        data = json.loads(res.data)
        self.assertEqual(res.status_code, 200)
        self.assertEqual(data['deleted']['id'], movie.id)

    def test_nonexisting_route(self):
        res = self.client().get('/nonexisting')
        self.assertEqual(res.status_code, 404)
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from models import Actor, Role, ActorRole, engine

Base = declarative_base()

engine = create_engine('sqlite:///actor_roles.db')
Base.metadata.create_all(engine)

session = sessionmaker()
session.configure(bind=engine)
Base.metadata.bind = engine

session = session()

bale = Actor(name="Christian Bale")
hathaway = Actor(name="Anne Hathaway")
pfeiffer = Actor(name="Michelle Pfeiffer")
keaton = Actor(name="Michael Keaton")
arnett = Actor(name="Will Arnett")

batman = Role(character="Batman")
catwoman = Role(character="Catwoman")
burry = Role(character="Dr. Michael Burry")
american_psycho = Role(character="Patrick Bateman")

batman.actors.append(bale)
batman.actors.append(keaton)
batman.actors.append(arnett)
catwoman.actors.append(pfeiffer)
catwoman.actors.append(hathaway)