Пример #1
0
def learn():
    while len(Chromosome.all()) < 30:
        Chromosome().create()
    test_set = Match.test_set()
    if not test_set:
        print('There are no matches to learn from')
        return
    agent_pool = [Agent(c) for c in Chromosome.all()]
    reserve = []
    n = 1
    with Pool(4) as pool:
        while True:
            try:
                print('Starting Generation {}'.format(n))
                start = datetime.now()
                for n2, agent in enumerate(agent_pool):
                    print('Running Agent {}'.format(n2))
                    guesses = pool.map(agent.guess, test_set)
                    agent.right = guesses.count(True)
                    agent.wrong = guesses.count(False)
                    agent.chromosome.agent_score = agent.accuracy
                    agent.chromosome.save()
                agent_pool.extend(reserve)
                agent_pool.sort(key=get_score, reverse=True)
                print('Generation took {}'.format(datetime.now() - start))
                print('Best Agent: {}% accurate'.format(agent_pool[0].accuracy * 100))
                Chromosome.delete_many(ids=[agent.chromosome.id for agent in agent_pool[15:]])
                agent_pool = [Agent(agent.chromosome.mutate()) for agent in agent_pool[:15]]  # mutate the top half
                reserve = agent_pool[:15]
                n += 1
            except KeyboardInterrupt:
                Chromosome.clean()  # keep best 15
                break
    print('Finished learning')
Пример #2
0
 def test_agent_collisions2(self):
     world = World(5, 5)
     # agent moving north
     agentN = Agent(0)
     agentN.direction = Direction.NORTH
     agentN.move = True
     # agent moving south
     agentS = Agent(1)
     agentS.direction = Direction.SOUTH
     agentS.move = True
     # agent moving east
     agentE = Agent(2)
     agentE.direction = Direction.EAST
     agentE.move = True
     # agent moving west
     agentW = Agent(3)
     agentW.direction = Direction.WEST
     agentW.move = True
     # setup
     world.agents[(2, 0)] = agentN
     world.agents[(2, 4)] = agentS
     world.agents[(1, 1)] = agentE
     world.agents[(3, 1)] = agentW
     # update & test
     world.update()
     self.assertEqual(world.agents[(2, 1)], agentN,
                      '(1) world incorrectly updated north agent')
     self.assertEqual(world.agents[(2, 3)], agentS,
                      '(1) world incorrectly updated south agent')
     self.assertEqual(world.agents[(1, 1)], agentE,
                      '(1) world incorrectly updated east agent')
     self.assertEqual(world.agents[(3, 1)], agentW,
                      '(1) world incorrectly updated west agent')
     world.update()
     self.assertEqual(world.agents[(2, 2)], agentN,
                      '(2) world incorrectly updated north agent')
     self.assertEqual(world.agents[(2, 3)], agentS,
                      '(2) world incorrectly updated south agent')
     self.assertEqual(world.agents[(2, 1)], agentE,
                      '(2) world incorrectly updated east agent')
     self.assertEqual(world.agents[(3, 1)], agentW,
                      '(2) world incorrectly updated west agent')
     world.update()
     self.assertEqual(world.agents[(2, 2)], agentN,
                      '(3) world incorrectly updated north agent')
     self.assertEqual(world.agents[(2, 3)], agentS,
                      '(3) world incorrectly updated south agent')
     self.assertEqual(world.agents[(2, 1)], agentE,
                      '(3) world incorrectly updated east agent')
     self.assertEqual(world.agents[(3, 1)], agentW,
                      '(3) world incorrectly updated west agent')
     self.assertEqual(len(world.agents), 4,
                      'World has incorrect number of agents')
Пример #3
0
def main():
    # Initialize world
    world = World(5, 5)
    world.agents[(0,0)] = Agent()
    world.agents[(0,0)].move = True
    world.agents[(0,0)].direction = Direction.EAST
    world.agents[(3,3)] = Agent()
    world.agents[(3,3)].move = True
    print('0', world)
    # Run simulation
    for t in range(1,10):
        world.update()
        print(t, world)
Пример #4
0
 def test_agent_move_east(self):
     world = World(5, 10)
     agent = Agent(0)
     agent.direction = Direction.EAST
     agent.move = True
     world.agents[(0, 0)] = agent
     self.assertEqual(world.agents[(0, 0)], agent,
                      '(0) world incorrectly set agent position')
     world.update()
     self.assertTrue((0, 0) not in world.agents,
                     'agent not moved from previous location')
     self.assertEqual(world.agents[(1, 0)], agent,
                      '(1) world incorrectly updated to move agent East')
     world.update()
     self.assertEqual(world.agents[(2, 0)], agent,
                      '(2) world incorrectly updated to move agent East')
     world.update()
     self.assertEqual(world.agents[(3, 0)], agent,
                      '(3) world incorrectly updated to move agent East')
     world.update()
     self.assertEqual(world.agents[(4, 0)], agent,
                      '(4) world incorrectly updated to move agent East')
     world.update()
     self.assertEqual(
         world.agents[(0, 0)], agent,
         '(5) world incorrectly updated to move agent East (wrap)')
     self.assertEqual(len(world.agents), 1,
                      'World has incorrect number of agents')
Пример #5
0
def get_command(agent_id):
    agent = Agent.query.get(agent_id)
    if not agent:
        agent = Agent(agent_id)
        db.session.add(agent)
        db.session.commit()
    # Report basic info about the agent
    info = request.json
    if info:
        if 'platform' in info:
            agent.operating_system = info['platform']
        if 'hostname' in info:
            agent.hostname = info['hostname']
        if 'username' in info:
            agent.username = info['username']
    agent.last_online = datetime.now()
    agent.remote_ip = request.remote_addr
    agent.geolocation = geolocation(agent.remote_ip)
    db.session.commit()
    # Return pending commands for the agent
    cmd_to_run = ''
    cmd = agent.commands.order_by(Command.timestamp.desc()).first()
    if cmd:
        cmd_to_run = cmd.cmdline
        db.session.delete(cmd)
        db.session.commit()
    return cmd_to_run
Пример #6
0
def add_new_agent(agent_name):
    """
    валидация и запись нового контрагента в БД
    agent_name (str): имя контрагента из формы на стр. agent_add.html
    """

    agent = session.query(Agent).filter_by(name=agent_name).all()

    # проверка на наличие контрагента в базе данных
    if agent:
        eel.alert_message(f'Контрагент "{agent[0].name}" уже сущесвует')
        print(f'Контрагент "{agent[0].name}" уже сущесвует')
        return False

    # валидация имени контрагента
    if agent_name:
        print(agent_name)
        new_agent = Agent(name=agent_name)
        session.add(new_agent)
        session.commit()
        new_agent_id = session.query(Agent).filter_by(
            name=agent_name).first().id
        eel.alert_message(
            f'Добавлен новый контрагент: {agent_name} c id: {new_agent_id}')
        print(f'Добавлен новый контрагент: {agent_name} c id: {new_agent_id}')
    else:
        eel.alert_message(f'Не верное имя агента: {agent_name}')
        print(f'Не верное имя агента: {agent_name}')
        return False
    return True
Пример #7
0
 def test_agent_move_south(self):
     world = World(10, 5)
     agent = Agent(0)
     agent.direction = Direction.SOUTH
     agent.move = True
     world.agents[(0, 4)] = agent
     self.assertEqual(world.agents[(0, 4)], agent,
                      '(0) world incorrectly set agent position')
     world.update()
     self.assertTrue((0, 4) not in world.agents,
                     'agent not moved from previous location')
     self.assertEqual(world.agents[(0, 3)], agent,
                      '(1) world incorrectly updated to move agent South')
     world.update()
     self.assertEqual(world.agents[(0, 2)], agent,
                      '(2) world incorrectly updated to move agent South')
     world.update()
     self.assertEqual(world.agents[(0, 1)], agent,
                      '(3) world incorrectly updated to move agent South')
     world.update()
     self.assertEqual(world.agents[(0, 0)], agent,
                      '(4) world incorrectly updated to move agent South')
     world.update()
     self.assertEqual(
         world.agents[(0, 4)], agent,
         '(5) world incorrectly updated to move agent South (wrap)')
     self.assertEqual(len(world.agents), 1,
                      'World has incorrect number of agents')
Пример #8
0
def main(
        env_name, n_epoch, learning_rate, gamma, n_hidden,
        seed_val=0, max_steps=1000
):
    '''train an a2c network some gym env'''
    # define env
    env = gym.make(env_name)
    env.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    # define agent
    state_dim, n_actions, action_space_type = get_env_info(env)
    agent = Agent(state_dim, n_hidden, n_actions)
    optimizer = torch.optim.Adam(agent.parameters(), lr=learning_rate)
    # train
    log_step = np.zeros((n_epoch,))
    log_return = np.zeros((n_epoch,))
    log_loss_v = np.zeros((n_epoch,))
    log_loss_p = np.zeros((n_epoch,))
    for i in range(n_epoch):
        cumulative_reward, step, probs, rewards, values = run(
            agent, env, gamma=gamma, max_steps=max_steps
        )
        # update weights
        returns = compute_returns(rewards, gamma=gamma, normalize=True)
        loss_policy, loss_value = compute_a2c_loss(probs, values, returns)
        loss = loss_policy + loss_value
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # log message
        log_step[i] = step
        log_return[i] = cumulative_reward
        log_loss_v[i] = loss_value.item()
        log_loss_p[i] = loss_policy.item()
        if np.mod(i, 10) == 0:
            print(
                'Epoch : %.3d | R: %.2f, steps: %4d | L: pi: %.2f, V: %.2f' %
                (i, log_return[i], log_step[i], log_loss_p[i], log_loss_v[i])
            )

    # save weights
    # ckpt_fname = f'../log/agent-{env_name}-{n_epoch}.pth'
    ckpt_fname = f'../log/agent-{env_name}.pth'
    torch.save(agent.state_dict(), ckpt_fname)

    '''show learning curve: return, steps'''
    f, axes = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
    axes[0].plot(log_return)
    axes[1].plot(log_step)
    axes[0].set_title(f'Learning curve: {env_name}')
    axes[0].set_ylabel('Return')
    axes[1].set_ylabel('#steps')
    axes[1].set_xlabel('Epoch')
    sns.despine()
    f.tight_layout()
    f.savefig(f'../figs/lc-{env_name}.png', dpi=120)
Пример #9
0
def main(env_name, n_hidden):
    '''render the performance of a saved ckpt'''
    # define env and agent
    env = gym.make(env_name).env
    state_dim, n_actions, action_space_type = get_env_info(env)
    agent = Agent(state_dim, n_hidden, n_actions)
    agent.load_state_dict(torch.load(f'../log/agent-{env_name}.pth'))
    agent.eval()
    cumulative_reward, step, probs, rewards, values = run(agent,
                                                          env,
                                                          render=True)
Пример #10
0
def upload():
    to_load = {}
    with open('/tmp/svrdata.pkl', 'rb') as f:
        to_load = cPickle.load(f)
    for d in to_load.get('pos_to_put', []):
        try:
            agent = Agent.select(Agent.q.AgentName == d['name'])[0]
        except IndexError:
            try:
                agent = Agent(**d['data'])
                # karena 'id'nya tidak standar,
                # SQLObject bilang Not Found
            except SQLObjectNotFound:
                agent = Agent.select(Agent.q.AgentName == d['name'])[0]

        # Periksa apakah table pemuat Logs untuk
        # pos ini telah tersedia
        try:
            rs = conn.queryAll("SELECT SamplingDate FROM %s \
                LIMIT 0, 1" % (agent.table_name))
        except:
            rs = conn.queryAll("CREATE TABLE %s \
                LIKE tpl_agent" % agent.table_name)
        print agent.AgentName
        for l in d['logs']:
            print '\t', l['SamplingDate'], l['SamplingTime']
            sql = "SELECT COUNT(*) FROM %s \
               WHERE SamplingDate='%s' AND \
               SamplingTime='%s'" % (agent.table_name, l['SamplingDate'],
                                     l['SamplingTime'])
            rs = conn.queryAll(sql)
            print '\t\trs[0][0]:', rs[0][0]
            if rs[0][0] == 0:

                sql = "INSERT INTO %s (RID, ReceivedDate, \
                    ReceivedTime, DataType, StatusPort, \
                   SamplingDate, SamplingTime, Temperature, \
                   Humidity, Rain, Rain1, Rain2, Rain3, \
                   Rain4, WLevel, Wlevel1, WLevel2, \
                    WLevel3, WLevel4, up_since, sq) VALUES (%s, '%s', \
                    '%s', %s, '%s', '%s', '%s', %s, %s, \
                    %s, %s, %s, %s, %s, %s, %s, %s, %s, \
                    %s, '%s', %s)" % (
                    agent.table_name, l['RID'], l['ReceivedDate'],
                    l['ReceivedTime'], l['DataType'], l['StatusPort'],
                    l['SamplingDate'], l['SamplingTime'], l['Temperature'],
                    l['Humidity'], l['Rain'], l['Rain1'], l['Rain2'],
                    l['Rain3'], l['Rain4'], l['WLevel'], l['WLevel1'],
                    l['WLevel2'], l['WLevel3'], l['WLevel4'], l['up_since'],
                    l['sq'])
                rs = conn.query(sql)
                print '\tsaved:', l['SamplingDate'], l['SamplingTime']
                del l
Пример #11
0
    def create_new_agent():
        '''
        Create a new Agent
        ---
        tags:
          - Agents
        parameters:
          - name: Agent
            in: body
            schema:
              type: object
              properties:
                name:
                    type: string
                email:
                    type: string
                phone_number:
                    type: string

        responses:
          201:
            description: Returns the new Agent
        '''
        try:
            body = request.get_json()

            if body is None:
                abort(400)

            user_input = {
                "name": body.get('name', None),
                "phone_number": body.get('phone_number', None),
                "email": body.get('email', None)
            }
            agent = Agent(name=user_input['name'],
                          phone_number=user_input['phone_number'],
                          email=user_input['email'])

            db.session.add(agent)
            db.session.commit()

            return jsonify({
                "success": True,
                "result": agent.format(),
                "timestamp": time.time()
            }), 201
        except AuthError:
            abort(401)
        except Exception as err:
            db.session.rollback()
            return jsonify({"success": False, "error": str(err)}), 500
        finally:
            db.session.close()
Пример #12
0
def add_agent():
    """добавление нового агента"""

    name = input('Введите название контрагента: ')
    q = session.query(Agent).filter_by(name=name).all()

    # проверка на наличие агента в базе
    if q:
        print(q[0], 'уже сущесвует.')
    else:
        new_agent = Agent(name=name)
        session.add(new_agent)
        session.commit()
    return
Пример #13
0
def get_command(agent_id):
    agent = Agent.query.get(agent_id)
    if not agent:
        agent = Agent(agent_id)
        db.session.add(agent)
        db.session.commit()
    # Report basic info about the agent
    info = request.json
    if info:
        if 'platform' in info:
            plataform = info['platform']
            plataform = str(plataform)
            agent.operating_system = base64.b16decode(plataform)
        if 'hostname' in info:
            hostname = info['hostname']
            hostname = str(hostname)
            agent.hostname = base64.b16decode(hostname)
        if 'username' in info:
            username = info['username']
            username = str(username)
            agent.username = base64.b16decode(username)
        if 'cpu' in info:
            cpu = info['cpu']
            cpu = str(cpu)
            agent.cpu = base64.b16decode(cpu)
        if 'gpu' in info:
            gpu = info['gpu']
            gpu = str(gpu)
            agent.gpu = base64.b16decode(gpu)
        if 'memory' in info:
            agent.memory = base64.b16decode(info['memory'])
            agent.memory = int(agent.memory) / 2.**30
            agent.memory = ('% 6.2f' % agent.memory) + " GB"
            agent.memory = str(agent.memory)
    agent.last_online = datetime.now()
    agent.remote_ip = request.remote_addr
    agent.geolocation = geolocation(agent.remote_ip)
    db.session.commit()
    # Return pending commands for the agent
    cmd_to_run = ''
    cmd = agent.commands.order_by(Command.timestamp.desc()).first()
    if cmd:
        cmd_to_run = cmd.cmdline
        db.session.delete(cmd)
        db.session.commit()
    return cmd_to_run
Пример #14
0
 def initialize_agent(self, world, i, team, nn_model=None, nn_weights=None):
     agent = Agent(i)
     agent.move = bool(random.getrandbits(1))
     agent.direction = Direction(random.randrange(4))
     agent.team = team
     agent.recurrent_memory = np.array(self.recurrent_nodes * [0.0])
     if nn_model:
         agent.model = nn_model
     else:
         agent.model = self.initialize_agent_nn()
         if nn_weights:
             agent.model.set_weights(nn_weights)
     x = random.randrange(world.width)
     y = random.randrange(world.height)
     while (x, y) in world.agents:
         x = random.randrange(world.width)
         y = random.randrange(world.height)
     world.agents[(x, y)] = agent
     return (x, y)
Пример #15
0
    def save(self, *args, **kwargs):
        """
        Create the new user. If no username is supplied (may be hidden
        via ``ACCOUNTS_PROFILE_FORM_EXCLUDE_FIELDS`` or
        ``ACCOUNTS_NO_USERNAME``), we generate a unique username, so
        that if profile pages are enabled, we still have something to
        use as the profile's slug.
        """
        account_type = self.cleaned_data['type']
        username, email, password1, password2 = (
            self.cleaned_data['username'], self.cleaned_data['email'],
            self.cleaned_data['password1'], self.cleaned_data['password2'])
        print 'user type:{} name:{} email:{}'.format(account_type, username,
                                                     email)

        if password1 != password2:
            raise forms.ValidationError(_("Password not match"))

        try:
            existing_user = get_object_or_404(User, username=username)
            print '1' * 20
            if existing_user:
                raise forms.ValidationError(_("Username already exists"))
        except Http404 as e:
            print e

        try:
            print '2' * 20
            existing_user = get_object_or_404(User, email=email)
            print 'existing_user:{} email:{}'.format(existing_user, email)
            if existing_user:
                raise forms.ValidationError(_("Email already exists"))
        except Http404 as e:
            print e

        new_user = User.objects.create_user(username, email, password1)
        user = authenticate(username=username, password=password1)

        if int(account_type) == 1:
            agent_group = Group.objects.get(name='AgentGroup')
            print agent_group
            new_user.groups.add(agent_group)
            new_user.save()

            agent = Agent(user=new_user,
                          username=username,
                          email=email,
                          password=password1,
                          signup_flag=False)
            agent.save()
        elif int(account_type) == 2:
            customer_group = Group.objects.get(name='CustomerGroup')
            print customer_group
            new_user.groups.add(customer_group)
            new_user.save()

            customer = Customer(user=new_user,
                                username=username,
                                email=email,
                                password=password1,
                                signup_flag=False)
            customer.save()
        else:
            print 'incorrect account_type:{}'.format(account_type)
        return user
Пример #16
0
def train(args):
    chrome_driver_path = args.chrome_driver_path
    checkpoint_path = args.checkpoint_path
    nb_actions = args.nb_actions
    initial_epsilon = args.initial_epsilon
    epsilon = initial_epsilon
    final_epsilon = args.final_epsilon
    gamma = args.gamma
    nb_memory = args.nb_memory
    nb_expolre = args.nb_expolre
    is_debug = args.is_debug
    batch_size = args.batch_size
    nb_observation = args.nb_observation
    desired_fps = args.desired_fps
    is_cuda = True if args.use_cuda and torch.cuda.is_available() else False
    log_frequency = args.log_frequency
    save_frequency = args.save_frequency
    ratio_of_win = args.ratio_of_win
    if args.exploiting:
        nb_observation = -1
        epsilon = final_epsilon

    seed = 22
    np.random.seed(seed)
    memory = deque()
    env = DinoSeleniumEnv(chrome_driver_path, speed=args.game_speed)
    agent = Agent(env)
    game_state = GameState(agent, debug=is_debug)
    qnetwork = QNetwork(nb_actions)
    if is_cuda:
        qnetwork.cuda()
    optimizer = torch.optim.Adam(qnetwork.parameters(), 1e-4)
    tmp_param = next(qnetwork.parameters())
    try:
        m = torch.load(checkpoint_path)
        qnetwork.load_state_dict(m["qnetwork"])
        optimizer.load_state_dict(m["optimizer"])
    except:
        logger.warn("No model found in {}".format(checkpoint_path))
    loss_fcn = torch.nn.MSELoss()
    action_indx = 0  # do nothing as the first action
    screen, reward, is_gameover, score = game_state.get_state(action_indx)
    current_state = np.expand_dims(screen, 0)
    # [IMAGE_CHANNELS,IMAGE_WIDTH,IMAGE_HEIGHT]
    current_state = np.tile(current_state, (IMAGE_CHANNELS, 1, 1))
    initial_state = current_state

    t = 0
    last_time = 0
    sum_scores = 0
    total_loss = 0
    max_score = 0
    qvalues = np.array([0, 0])
    lost_action = []
    win_actions = []
    action_random = 0
    action_greedy = 0
    episodes = 0
    nb_episodes = 0
    if not args.exploiting:
        try:
            t, memory, epsilon, nb_episodes = pickle.load(open(
                "cache.p", "rb"))
        except:
            logger.warn("Could not load cache file! Starting from scratch.")
    try:
        while True:
            qnetwork.eval()
            if np.random.random() < epsilon:  # epsilon greedy
                action_indx = np.random.randint(nb_actions)
                action_random += 1
            else:
                action_greedy += 1
                tensor = torch.from_numpy(current_state).float().unsqueeze(0)
                with torch.no_grad():
                    qvalues = qnetwork(tensor).squeeze()
                _, action_indx = qvalues.max(-1)
                action_indx = action_indx.item()
            if epsilon > final_epsilon and t > nb_observation:
                epsilon -= (initial_epsilon - final_epsilon) / nb_expolre
            screen, reward, is_gameover, score = game_state.get_state(
                action_indx)
            if is_gameover:
                episodes += 1
                nb_episodes += 1
                lost_action.append(action_indx)
                sum_scores += score
            else:
                win_actions.append(action_indx)
            if score > max_score:
                max_score = score
            if last_time:
                fps = 1 / (time.time() - last_time)
                if fps > desired_fps:
                    time.sleep(1 / desired_fps - 1 / fps)
            if last_time and t % log_frequency == 0:
                logger.info('fps: {0}'.format(1 / (time.time() - last_time)))
            last_time = time.time()
            screen = np.expand_dims(screen, 0)
            next_state = np.append(screen,
                                   current_state[:IMAGE_CHANNELS - 1, :, :],
                                   axis=0)
            if not args.exploiting and (is_gameover
                                        or np.random.random() < ratio_of_win):
                memory.append((current_state, action_indx, reward, next_state,
                               is_gameover))
            if len(memory) > nb_memory:
                memory.popleft()
            if nb_observation > 0 and t > nb_observation:
                indxes = np.random.choice(len(memory),
                                          batch_size,
                                          replace=False)
                minibatch = [memory[b] for b in indxes]
                inputs = tmp_param.new(batch_size, IMAGE_CHANNELS, IMAGE_WIDTH,
                                       IMAGE_HEIGHT).zero_()
                targets = tmp_param.new(batch_size, nb_actions).zero_()
                for i, (state_t, action_t, reward_t, state_t1,
                        is_gameover_t1) in enumerate(minibatch):
                    inputs[i] = torch.from_numpy(state_t).float()
                    tensor = inputs[i].unsqueeze(0)
                    with torch.no_grad():
                        qvalues = qnetwork(tensor).squeeze()
                    targets[i] = qvalues
                    if is_gameover_t1:
                        assert reward_t == -1
                        targets[i, action_t] = reward_t
                    else:
                        tensor = torch.from_numpy(state_t1).float().unsqueeze(
                            0)
                        with torch.no_grad():
                            qvalues = qnetwork(tensor).squeeze()
                        qvalues = qvalues.cpu().numpy()
                        targets[i, action_t] = reward_t + gamma * qvalues.max()
                qnetwork.train()
                qnetwork.zero_grad()
                q_values = qnetwork(inputs)
                loss = loss_fcn(q_values, targets)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            current_state = initial_state if is_gameover else next_state
            t += 1
            if t % log_frequency == 0:
                logger.info(
                    "For t {}: mean score is {} max score is {} mean loss: {} number of episode: {}"
                    .format(t, sum_scores / (episodes + 0.1), max_score,
                            total_loss / 1000, episodes))
                logger.info(
                    "t: {} action_index: {} reward: {} max qvalue: {} total number of eposodes so far: {}"
                    .format(t, action_indx, reward, qvalues.max(),
                            nb_episodes))
                tmp = np.array(lost_action)
                dnc = (tmp == 0).sum()
                logger.info(
                    "Lost actions do_nothing: {} jump: {} length of memory {}".
                    format(dnc,
                           len(tmp) - dnc, len(memory)))
                tmp = np.array(win_actions)
                dnc = (tmp == 0).sum()
                logger.info("Win actions do_nothing: {} jump: {}".format(
                    dnc,
                    len(tmp) - dnc))
                logger.info("Greedy action {} Random action {}".format(
                    action_greedy, action_random))
                action_greedy = 0
                action_random = 0
                lost_action = []
                win_actions = []
                if episodes != 0:
                    sum_scores = 0
                total_loss = 0
                episodes = 0
            if t % save_frequency and not args.exploiting:
                env.pause_game()
                with open("cache.p", "wb") as fh:
                    pickle.dump((t, memory, epsilon, nb_episodes), fh)
                gc.collect()
                torch.save(
                    {
                        "qnetwork": qnetwork.state_dict(),
                        "optimizer": optimizer.state_dict()
                    }, checkpoint_path)
                env.resume_game()
    except KeyboardInterrupt:
        if not args.exploiting:
            torch.save(
                {
                    "qnetwork": qnetwork.state_dict(),
                    "optimizer": optimizer.state_dict()
                }, checkpoint_path)
            with open("cache.p", "wb") as fh:
                pickle.dump((t, memory, epsilon, nb_episodes), fh)
def ddpg(env: UnityEnvironment,
         agent=None,
         n_episodes=500,
         max_t=2000,
         eps_start=1.0,
         eps_end=0.01,
         eps_decay=0.995,
         seed=0,
         target=30.0):
    """
    Deep Deterministic Policy Gradients
    
    Params
    ======
        env (UnityEnvironment): enviroment
        agent (Agent): agent that is responsible for control the actions (if no agent, create a new agent)
        n_episodes (int): maximum number of training episodes
        max_t (int): max number of steps in each episode
        eps_start (float): starting value of epsilon, for attenuate the noise applied to the action
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
        seed (int): random seed
        target (float): value expected to save the model and exit training

    Return
    ======
        scores: List all scores (for all the agents) in each episode
        agent (Agent): Trained agent
    """
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # get size of action and state
    action_size = brain.vector_action_space_size
    state_size = len(env_info.vector_observations[0])

    if not agent:  # create the agent if no agent is provided
        agent = Agent(state_size=state_size,
                      action_size=action_size,
                      random_seed=seed)
    agent.network_summary()  # Print Networks Info (actor and critic)

    all_scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon

    print('*** Starting Training ***')
    for i_episode in tqdm(range(1, n_episodes + 1)):
        scores = run_single_episode(env,
                                    brain_name,
                                    agent,
                                    train_mode=True,
                                    epsilon=eps,
                                    max_t=max_t)
        scores_window.append(scores)  # save most recent score to the window
        all_scores.append(scores)  # save most recent score
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon

        print(score_log(i_episode, scores_window, scores), end="")
        # Save the model each 100 or if the target is achieved
        if i_episode % 100 == 0 or np.mean(scores_window) >= target:
            print(score_log(i_episode, scores_window, scores) + '\tSaved!')
            agent.save()
            if np.mean(scores_window) >= 30:
                break  # Finish the execution if the target is achieved

    return all_scores, agent
Пример #18
0
    def import_input(self):
        print('import_input running')
        to_load = {}
        with open('/tmp/svrdata.pkl', 'rb') as f:
            to_load = pickle.load(f)
        for d in to_load.get('pos_to_put', []):
            try:
                agent = Agent.select(Agent.q.AgentName == d['name'])[0]
            except IndexError:
                try:
                    agent = Agent(**d['data'])
                    # karena 'id'nya tidak standar,
                    # SQLObject bilang Not Found
                except SQLObjectNotFound:
                    agent = Agent.select(
                        Agent.q.AgentName == d['name'])[0]
            # Periksa apakah table pemuat Logs untuk
            # pos ini telah tersedia
            print('incoming:', agent.table_name)
            try:
                rs = conn.queryAll("SELECT SamplingDate FROM %s \
                                   LIMIT 0, 1" % (agent.table_name))
            except:
                try:
                    rs = conn.queryAll("CREATE TABLE %s \
                                   LIKE tpl_agent" % agent.table_name)
                except:
                    pass
            for l in d['logs']:
                sql = "SELECT COUNT(*) FROM %s \
                    WHERE SamplingDate='%s' AND \
                    SamplingTime='%s'" % (agent.table_name,
                                          l['SamplingDate'],
                                          l['SamplingTime'])
                rs = conn.queryAll(sql)
                if rs[0][0] == 0:

                    sql = "INSERT INTO %s (RID, ReceivedDate, \
                        ReceivedTime, DataType, StatusPort, \
                        SamplingDate, SamplingTime, Temperature, \
                        Humidity, Rain, Rain1, Rain2, Rain3, \
                        Rain4, WLevel, Wlevel1, WLevel2, \
                        WLevel3, WLevel4, up_since, sq) VALUES (%s, '%s', \
                        '%s', %s, '%s', '%s', '%s', %s, %s, \
                        %s, %s, %s, %s, %s, %s, %s, %s, %s, \
                        %s, '%s', %s)" % (agent.table_name, l['RID'],
                                l['ReceivedDate'],
                                l['ReceivedTime'],
                                l['DataType'],
                                l['StatusPort'],
                                l['SamplingDate'],
                                l['SamplingTime'],
                                l['Temperature'],
                                l['Humidity'],
                                l['Rain'],
                                l['Rain1'],
                                l['Rain2'],
                                l['Rain3'],
                                l['Rain4'],
                                l['WLevel'],
                                l['WLevel1'],
                                l['WLevel2'],
                                l['WLevel3'],
                                l['WLevel4'],
                                l['up_since'],
                                l['sq']) 
                    rs = conn.query(sql)
                    l = None

            try:
                new_pos_data = d['data']
                for k in new_pos_data.keys():
                    setattr(agent, k, new_pos_data[k])
            except:
                pass
        # POS to Del
        for d in to_load.get('pos_to_del', []):
            try:
                agent = Agent.select(
                    Agent.q.AgentName == d['AgentName'])[0]
                agent.destroySelf()
            except:
                pass
        return "Ok"
def dqn(env: UnityEnvironment,
        n_episodes=2000,
        max_t=1000,
        eps_start=1.0,
        eps_end=0.01,
        eps_decay=0.995,
        seed=0,
        save_threshold=13.0):
    """
    Deep Q-Learning Training
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
        seed (int): random seed
        save_threshold (float): value expected to save the model and exit training
    """
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # get size of action and state
    action_size = brain.vector_action_space_size
    state_size = len(env_info.vector_observations[0])

    # create the agent
    agent = Agent(state_size=state_size, action_size=action_size, seed=seed)

    scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon
    for i_episode in tqdm(range(1, n_episodes + 1)):
        score = run_single_episode(env,
                                   brain_name,
                                   agent,
                                   max_t,
                                   eps,
                                   train_mode=True)
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon

        print(score_log(i_episode, scores_window), end="")
        if i_episode % 100 == 0:
            if np.mean(
                    scores_window
            ) >= save_threshold:  # save if avg score is higher than the threshold
                print(score_log(i_episode, scores_window) + '\tSaved!')
                torch.save(
                    agent.qnetwork_local.state_dict(),
                    os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 '..', 'results', 'checkpoint.pth'))
                # break
            else:
                print(score_log(i_episode, scores_window))

    return scores, agent
from datetime import datetime
import os

from models import FreeEnergyBarrier, Agent

if __name__ == '__main__':
    X = 200
    Y = 200
    LR = 0.003
    GAMMA = 0.95
    BATCH_SIZE = 30000
    MAX_MEM = 300000
    NUMBER_OF_ACTIONS = 4
    EPSILON = 1.0
    env = FreeEnergyBarrier(X, Y)
    agent = Agent(gamma=GAMMA, epsilon=EPSILON, batch_size=BATCH_SIZE, max_mem_size=500000,
                  n_actions=NUMBER_OF_ACTIONS, eps_end=0.01, input_dims=[2], lr=LR)
    scores, eps_history = [], []
    n_runs = 10
    df = pd.DataFrame()
    for i in range(n_runs):
        score = 0
        done = False
        observation = env.reset()
        run = []
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, observation_, done)
            agent.learn()
            observation = observation_
Пример #21
0
 def put(self):
     agent = Agent(**json.decode(self.request.body))
     agent.put()
Пример #22
0
    def POST(self):
        auth = web.ctx.env.get('HTTP_AUTHORIZATION')
        authreq = False
        allowed = [(u.username, u.password) for u in Authuser.select()]
        if auth is None:
            authreq = True
        else:
            auth = re.sub('^Basic', '', auth)
            username, password = base64.decodestring(auth).split(':')
            password = md5(password).hexdigest()
            auth = None
            if (username, password) in allowed:
                x = web.input()
                of = open('/tmp/svrdata.pkl', 'wb')
                of.write(x['svr_data'])
                of.close()
                to_load = {}
                with open('/tmp/svrdata.pkl', 'rb') as f:
                    to_load = cPickle.load(f)
                for d in to_load.get('pos_to_put', []):
                    try:
                        agent = Agent.select(Agent.q.AgentName == d['name'])[0]
                    except IndexError:
                        try:
                            agent = Agent(**d['data'])
                            # karena 'id'nya tidak standar,
                            # SQLObject bilang Not Found
                        except SQLObjectNotFound:
                            agent = Agent.select(
                                Agent.q.AgentName == d['name'])[0]
                    # Periksa apakah table pemuat Logs untuk
                    # pos ini telah tersedia
                    try:
                        rs = conn.queryAll("SELECT SamplingDate FROM %s \
                                           LIMIT 0, 1" % (agent.table_name))
                    except:
                        rs = conn.queryAll("CREATE TABLE %s \
                                           LIKE tpl_agent" % agent.table_name)
                    for l in d['logs']:
                        sql = "SELECT COUNT(*) FROM %s \
                            WHERE SamplingDate='%s' AND \
                            SamplingTime='%s'" % (agent.table_name,
                                                  l['SamplingDate'],
                                                  l['SamplingTime'])
                        rs = conn.queryAll(sql)
                        if rs[0][0] == 0:

                            sql = "INSERT INTO %s (RID, ReceivedDate, \
                                ReceivedTime, DataType, StatusPort, \
                                SamplingDate, SamplingTime, Temperature, \
                                Humidity, Rain, Rain1, Rain2, Rain3, \
                                Rain4, WLevel, Wlevel1, WLevel2, \
                                WLevel3, WLevel4, up_since, sq) VALUES (%s, '%s', \
                                '%s', %s, '%s', '%s', '%s', %s, %s, \
                                %s, %s, %s, %s, %s, %s, %s, %s, %s, \
                                %s, '%s', %s)" % (
                                agent.table_name, l['RID'], l['ReceivedDate'],
                                l['ReceivedTime'], l['DataType'],
                                l['StatusPort'], l['SamplingDate'],
                                l['SamplingTime'], l['Temperature'],
                                l['Humidity'], l['Rain'], l['Rain1'],
                                l['Rain2'], l['Rain3'], l['Rain4'],
                                l['WLevel'], l['WLevel1'], l['WLevel2'],
                                l['WLevel3'], l['WLevel4'], l['up_since'],
                                l['sq'])
                            rs = conn.query(sql)
                            l = None

                    try:
                        new_pos_data = d['data']
                        for k in new_pos_data.keys():
                            setattr(agent, k, new_pos_data[k])
                    except:
                        pass
                # POS to Del
                for d in to_load.get('pos_to_del', []):
                    try:
                        agent = Agent.select(
                            Agent.q.AgentName == d['AgentName'])[0]
                        agent.destroySelf()
                    except:
                        pass
                return "Ok"
            else:
                authreq = True
        if authreq:
            web.header('WWW-Authenticate', 'Basic realm="incoming"')
            web.ctx.status = '401 unauthorized'
            return """<html>
Пример #23
0
    p_rm_ob_enc=p_rm_ob_enc, p_rm_ob_rcl=p_rm_ob_rcl,
    n_hidden=n_hidden, n_hidden_dec=n_hidden_dec,
    lr=learning_rate, eta=eta, cmpt=cmpt,
)
# init env
task = SequenceLearning(
    n_param=p.env.n_param, n_branch=p.env.n_branch, pad_len=p.env.pad_len,
    p_rm_ob_enc=p.env.p_rm_ob_enc, p_rm_ob_rcl=p.env.p_rm_ob_rcl,
    def_path=p.env.def_path, def_prob=p.env.def_prob, def_tps=p.env.def_tps,
    similarity_cap_lag=p.n_event_remember,
    similarity_max=similarity_max, similarity_min=similarity_min,
)
# init agent
agent = Agent(
    input_dim=task.x_dim, output_dim=p.a_dim,
    rnn_hidden_dim=p.net.n_hidden, dec_hidden_dim=p.net.n_hidden_dec,
    dict_len=p.net.dict_len, cmpt=p.net.cmpt
)

optimizer_sup = torch.optim.Adam(agent.parameters(), lr=p.net.lr)
scheduler_sup = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer_sup, factor=1 / 2, patience=30, threshold=1e-3, min_lr=1e-8,
    verbose=True)

optimizer_rl = torch.optim.Adam(agent.parameters(), lr=p.net.lr)
scheduler_rl = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer_rl, factor=1 / 2, patience=30, threshold=1e-3, min_lr=1e-8,
    verbose=True)


# create logging dirs
Пример #24
0
 def test_agent_initialization(self):
     agent = Agent(0)
     self.assertEqual(agent.direction, Direction.NORTH,
                      'incorrect initial agent direction')
     self.assertEqual(agent.move, False, 'incorrect initial agent move')
Пример #25
0
task = SequenceLearning(
    n_param=p.env.n_param,
    n_branch=p.env.n_branch,
    p_rm_ob_enc=p_test,
    p_rm_ob_rcl=p_test,
    similarity_cap_lag=p.n_event_remember,
    similarity_max=similarity_max_test,
    similarity_min=similarity_min_test,
)
x_dim = task.x_dim
if attach_cond != 0:
    x_dim += 1
# load the agent back
agent = Agent(input_dim=x_dim,
              output_dim=p.a_dim,
              rnn_hidden_dim=p.net.n_hidden,
              dec_hidden_dim=p.net.n_hidden_dec,
              dict_len=p.net.dict_len)
agent, optimizer = load_ckpt(epoch_load, log_subpath['ckpts'], agent)

# test the model
np.random.seed(seed)
torch.manual_seed(seed)
[results, metrics, XY] = run_tz(agent,
                                optimizer,
                                task,
                                p,
                                n_examples_test,
                                supervised=False,
                                learning=False,
                                get_data=True,
Пример #26
0
 def test_agent_gridlock2(self):
     world = World(3, 3)
     # agent moving north
     agentN = Agent(0)
     agentN.direction = Direction.NORTH
     agentN.move = True
     # agent moving north2
     agentN2 = Agent(1)
     agentN2.direction = Direction.NORTH
     agentN2.move = True
     # agent moving north3
     agentN3 = Agent(2)
     agentN3.direction = Direction.NORTH
     agentN3.move = True
     # agent moving south
     agentS = Agent(3)
     agentS.direction = Direction.SOUTH
     agentS.move = True
     # agent moving east
     agentE = Agent(4)
     agentE.direction = Direction.EAST
     agentE.move = True
     # agent moving west
     agentW = Agent(5)
     agentW.direction = Direction.WEST
     agentW.move = True
     # agent moving west2
     agentW2 = Agent(6)
     agentW2.direction = Direction.WEST
     agentW2.move = True
     # setup
     world.agents[(1, 1)] = agentN
     world.agents[(0, 0)] = agentN2
     world.agents[(2, 0)] = agentN3
     world.agents[(0, 2)] = agentS
     world.agents[(0, 1)] = agentE
     world.agents[(1, 2)] = agentW
     world.agents[(2, 1)] = agentW2
     # update & test
     world.update()
     self.assertEqual(world.agents[(1, 1)], agentN,
                      '(1) world incorrectly updated north agent')
     self.assertEqual(world.agents[(0, 0)], agentN2,
                      '(1) world incorrectly updated north2 agent')
     self.assertEqual(world.agents[(2, 0)], agentN3,
                      '(1) world incorrectly updated north3 agent')
     self.assertEqual(world.agents[(0, 2)], agentS,
                      '(1) world incorrectly updated south agent')
     self.assertEqual(world.agents[(0, 1)], agentE,
                      '(1) world incorrectly updated east agent')
     self.assertEqual(world.agents[(1, 2)], agentW,
                      '(1) world incorrectly updated west agent')
     self.assertEqual(world.agents[(2, 1)], agentW2,
                      '(1) world incorrectly updated west2 agent')
     world.update()
     self.assertEqual(world.agents[(1, 1)], agentN,
                      '(2) world incorrectly updated north agent')
     self.assertEqual(world.agents[(0, 0)], agentN2,
                      '(2) world incorrectly updated north2 agent')
     self.assertEqual(world.agents[(2, 0)], agentN3,
                      '(2) world incorrectly updated north3 agent')
     self.assertEqual(world.agents[(0, 2)], agentS,
                      '(2) world incorrectly updated south agent')
     self.assertEqual(world.agents[(0, 1)], agentE,
                      '(2) world incorrectly updated east agent')
     self.assertEqual(world.agents[(1, 2)], agentW,
                      '(2) world incorrectly updated west agent')
     self.assertEqual(world.agents[(2, 1)], agentW2,
                      '(2) world incorrectly updated west2 agent')
     self.assertEqual(len(world.agents), 7,
                      'World has incorrect number of agents')