Exemplos de Policy em Python, exemplos de policy.Policy em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_features.py Projeto: sandromello/themis-py

  def test_ip_reputation_feature(self):
    """ This method validates if the ips are updated correctly, and also if the scores are with proper values """
    self.init()
    self.redis.delete('AI:metadata:[email protected]')
    for from_ipaddress in ['189.20.3.1', '200.20.3.100', '200.20.3.100', '201.20.230.10']:
      metadata = self.redis.hgetall('AI:metadata:[email protected]')

      feat = Features(**self.features)
      if not metadata:
        # Here we feed redis for the first time with metadata. Will be executed only one once
        tmetadata = ThemisMetaData(**self.METADATA_ITENS)
        self.redis.hmset('AI:metadata:[email protected]', tmetadata.as_redis)
        # We update 
        tmetadata.update_features(**feat.as_dict)
      else:
        tmetadata = ThemisMetaData(**metadata)
      policy = Policy(self.redis)
      pdata = policy.getpolicy('default')

      milter_object = '*****@*****.**'
      milter_subject = 'Subject does matter'

      # Enable feeder feature
      feat.feederFeaturesEnabled = True

      feat.tmetadata = tmetadata
      feat.call_feeders(self.redis, pdata, milter_object, from_ipaddress, milter_subject)

    score_object = self.redis.zrange(feat.ipReputationFeatureGroupByNamespace, 0, -1, withscores=True)[0][1]
    self.assertTrue(len(self.redis.smembers(feat.ipReputationFeatureNamespace)) == 3 and score_object == 3)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: tester.py Projeto: projectchrono/chrono

def main(env_name, num_episodes, render, VideoSave, gamma, lam, kl_targ, batch_size):
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)
    #aigym_path = os.path.join('/tmp', env_name, now)
    #env = wrappers.Monitor(env, aigym_path, force=True) 
    scaler = Scaler(obs_dim, env_name)
    scaler.resume()
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    episode = 0
    capture = False
    while episode < num_episodes:
        if VideoSave and not capture:
            env.ScreenCapture(5)
            capture = True
        trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        episode += len(trajectories)
        

        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: valueFunctionApproximation.py Projeto: fyabc/MSRAPaperProject

def featureMCControl(mdp, epsilon, alpha, iterNum, maxWalkLen=100, echoSE=False):
    """
    qFunc = g_t
    """

    InitParameter = 0.1

    if echoSE:
        squareErrors = []

    policy = Policy(mdp)

    for i in range(len(policy.parameters)):
        policy.parameters[i] = InitParameter

    for _ in range(iterNum):
        if echoSE:
            squareErrors.append(getSquareErrorPolicy(policy))

        states, sFeatures, actions, rewards = [], [], [], []

        state = random.choice(mdp.states)
        sFeature = mdp.getFeature(state)
        isTerminal = False

        count = 0
        while not isTerminal and count < maxWalkLen:
            action = policy.epsilonGreedy(sFeature, epsilon)
            isTerminal, nextState, reward, nextSFeature = mdp.transform(state, action)

            states.append(state)
            sFeatures.append(sFeature)
            rewards.append(reward)
            actions.append(action)

            state = nextState
            sFeature = nextSFeature
            count += 1

        g = 0.0
        for i in range(len(states) - 1, -1, -1):
            g *= mdp.gamma
            g += rewards[i]

        for i in range(len(states)):
            policy.update(sFeatures[i], actions[i], g, alpha)

            g -= rewards[i]
            g /= mdp.gamma

    if echoSE:
        return policy, squareErrors
    else:
        return policy

Exemplo n.º 4

0

Exibir arquivo

Arquivo: train.py Projeto: projectchrono/chrono

def main(env_name, num_episodes, render, gamma, lam, kl_targ, batch_size):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)

    scaler = Scaler(obs_dim, env_name)
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, episodes=5)
    episode = 0
    #capture = False
    while episode < num_episodes:
        trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        episode += len(trajectories)
        """if episode > 600 and not capture:
               env.ScreenCapture(5)
               capture = True"""
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
        policy.update(observes, actions, advantages, logger)  # update policy
        val_func.fit(observes, disc_sum_rew, logger)  # update value function
        
        logger.write(display=True)  # write logger results to file and stdout
        scaler.save()
        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: obj.py Projeto: shenwei0329/philosophy

    def __init__(self, name, init_x, init_y, ch, color):
        self.name = name
        _fn = "dot%s" % self.name
        self.backup = basic.BackUp(_fn)
        self.chr = ch
        self.color = color
        self.V = (0., 0., 0., 0.,)

        """群体的个人集合
        """
        self.P = []

        """性别情况
        """
        self.male = 0
        self.female = 0

        """怀孕人数
        """
        self.mating = 0

        self.policy = Policy()
        self.X = init_x + self.policy._func()
        self.Y = init_y + self.policy._func()
        self.load()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: runWeb.py Projeto: ptsankov/bellog

def main():
    webStr = None
    queryString = None
    opts, args = getopt.getopt(sys.argv[1:], "i:q:", ["input", "query"])
    for o, a in opts:
        if o == "-i":
            webStr = a
        elif o == "-q":
            queryString = a

    if webStr is None or queryString is None:
        print "Incorrect usage"
        sys.exit(-1)

    xsb = XSB()
    try:
        webStr = webStr.replace("<newline>", "\n")
        polStr = "\n".join([l for l in webStr.split("\n") if ":-" in l])
        policy = Policy.fromString(escapeCharacters(polStr))
        query = Atom.fromElements(Grammar.parseAtom(escapeCharacters(queryString)))
        policy.processPolicy()
        policy.checkQuery(query)
        xsb.loadPolicy(policy)
        print xsb.query(query)
        xsb.close()
    except Exception as e:
        print "Error:", e
        xsb.close()
        sys.exit(-1)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: manager.py Projeto: geans/teaching

 def __init__(self, policy, number_of_tasks=None):
     self.__policy_list = Policy()
     self.__policy = policy
     self.__by_priority = any(self.__policy == pol for pol in self.__policy_list.uses_priority())
     self.__setted_sort = self.__by_priority or policy == self.__policy_list.sjf
     self.__all = []
     self.__ready = []
     self.__finished = 0
     self.__number_of_tasks = number_of_tasks

Exemplo n.º 8

0

Exibir arquivo

Arquivo: lspi.py Projeto: notokay/eecs_491_project

def lspi(maxiter, epsilon, samples, basis, discount, initial_policy):
    """
    Runs the LSPI algorithm
    """

    iteration = -1
    distance = float('inf')
    policy = initial_policy
    all_policies = [initial_policy]
    
    while (iteration < maxiter) and (distance > epsilon):

        # print the number of iterations
        iteration = iteration + 1
        print ('============================')
        print 'LSPI iteration: %i' % iteration
        if iteration == 0:
            firsttime = 1
        else:
            firsttime = 0

        policy = Policy(policy=policy)

        policy.weights = lstdq(samples, all_policies[iteration], policy)[0]

        diff = policy.weights - all_policies[iteration].weights
        LMAXnorm = LA.norm(diff, np.inf)
        L2norm = LA.norm(diff)

        distance = L2norm

        all_policies.append(policy)

    print '================================'
    if distance > epsilon:
        print 'LSPI finished in %i iterations WITHOUT convergence to a fixed point' % iteration
    else:
        print 'LSPI converged in %i iterations' % iteration
    print
    print 'weights'
    print policy.weights
    print

    return policy, all_policies

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_features.py Projeto: sandromello/themis-py

  def init(self):
    with open('../config/config.yaml') as f:
      _, config_features, _ = yaml.load_all(f)

    self.features = config_features
    self.redis = redis.StrictRedis('localhost')
    #self.redis.flushdb()
    self.redis.set_response_callback('HGETALL', self.hgetall_custom_callback)
    self.policy = Policy(self.redis)
    self.add_default_policy()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: run.py Projeto: ptsankov/bellog

def main():  
    bellogFilename = None
    queryString = None
    datalogFilename = None    
    opts, args = getopt.getopt(sys.argv[1:], 'i:q:o:', ['input', 'query'])
    for o, a in opts:
        if o == '-i':
            bellogFilename = a
        elif o == '-q':
            queryString = a
        elif o == '-o':
            datalogFilename = a
            
    if bellogFilename is None and (queryString is None or datalogFilename is None):
        print 'Usage: python', sys.argv[0], '-i <BelLog file> -q <query> [-o <Datalog filename>]'
        sys.exit(-1)
       
    fileStr = open(bellogFilename, 'r').read().strip()
    polStr = '\n'.join([l for l in fileStr.split('\n') if ':-' in l])
    try:                
        policy = Policy.fromString(escapeCharacters(polStr))
        if queryString is not None:
            query = Atom.fromElements(Grammar.parseAtom(escapeCharacters(queryString)))
        policy.processPolicy()            
    except Exception as e:
        print 'Error parsing the policy:', e
        sys.exit(-1)                             
       
       
    if queryString is not None:        
        xsb = XSB()    
        try:
            policy.checkQuery(query)                    
            xsb.loadPolicy(policy)        
            print 'Query', queryString, ':', xsb.query(query)
            xsb.close()
        except Exception as e:
            print 'Error loading the policy:', e
            sys.exit(-1)
            xsb.close()
                  
    if datalogFilename is not None:
        if os.path.isfile(datalogFilename):
            msg = 'Override ' + datalogFilename + '?'
            shall = True if raw_input("%s (y/N) " % msg).lower() == 'y' else False
            if not shall:
                sys.exit(-1)
        outFile = open(datalogFilename, 'w')        
        for rule in policy.rules:
            for datalogRule in rule.toDatalogRules():
                outFile.write(datalogRule + '.\n') 
        for datalogRule in XSB.STATIC_RULES:
            outFile.write(datalogRule + '.\n')        
        outFile.close()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: learning.py Projeto: LeoPerard/a2di

def value_iteration(mdp, gamma=0.9, epsilon=0.0001):
    states = mdp.states
    actions = mdp.actions

    policy = Policy(mdp.states, mdp.actions)
    Vcurrent = np.zeros(len(mdp.states))
    Vprevious = None
    fix_point = False
    while not fix_point:
        Vprevious = deepcopy(Vcurrent)
        for fromstate in states:
            values = []
            for action in mdp.actions:
                value = 0.
                for tostate in mdp.get_neighbors(fromstate):
                    p = mdp.get_probability(action, fromstate, tostate)
                    r = mdp.get_reward(action, fromstate, tostate)
                    v = Vprevious[tostate]
                    value += p * (r + gamma * v)
                values.append(value)
            Vcurrent[fromstate] = max(values)
            del values
        fix_point = np.linalg.norm(Vcurrent - Vprevious, np.inf) < epsilon

        for fromstate in states:
            values = []
            for action in actions:
                value = 0.
                for tostate in mdp.get_neighbors(fromstate):
                    p = mdp.get_probability(action, fromstate, tostate)
                    r = mdp.get_reward(action, fromstate, tostate)
                    v = Vcurrent[tostate]
                    value += p * (r + gamma * v)
                values.append(value)
            acts = np.argwhere(values == np.amax(values)).flatten().tolist()
            for a in acts:
                policy.set_probability(1. / len(acts), fromstate, a)
            for a in [ac for ac in actions if ac not in acts]:
                policy.set_probability(0., fromstate, a)
            del values
    return Vcurrent, policy

Exemplo n.º 12

0

Exibir arquivo

Arquivo: config_loader.py Projeto: gfv/delegate

    def read(self):
        try:
            users_fd = open(self.users_file, "r")
            policies_fd = open(self.policies_file, "r")
        except IOError as e:
            self.log(e, "E")
            return False
        for line in users_fd.readlines():
            l = line.strip().encode().split(b":")
            # TODO: move this check somewhere else
            if len(l) > 2:
                self.log("More than one ':' while parsing users file?", "E")
                return False
            if b"group" in l[0][0:5]:
                self.key_manager.add_group(l[0].split(b" ")[1])
                for u in l[1].strip().split(b" "):
                    self.key_manager.add_group_member(u, l[0].split(b" ")[1])
            else:
                self.key_manager.add_user(l[0].strip(), l[1].strip())
        users_fd.close()

        cnt = 0
        for line in policies_fd.readlines():
            cnt += 1
            tokens = line.strip().encode().split(b" ")
            policy = Policy()
            policy.parameters = []
            prev_token = None
            for token in tokens:
                if prev_token == b'-u':
                    policy.user = token
                elif prev_token == b'-g':
                    policy.group = token
                elif prev_token == b'-p':
                    policy.parameters.append(token)
                prev_token = token
            policy.script = tokens[-1]
            if not self.policy_manager.add_policy(policy):
                self.log("    File %s line %d" % (config["path_to_policies"], cnt), "E")
        policies_fd.close()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: player.py Projeto: clabra/blackjack

class BotPlayer(Player):
    """Player when played by program

    Action to take for a hand is decided according to a policy. At first, the policy is based in MDP
    """
    def __init__(self, policy, dealer):
        super(BotPlayer, self).__init__()
        self.policy = Policy(policy)
        self.dealer = dealer

    def choose_action(self):
        # Policy determines choice
        choice = self.policy.action(self.hand_value, self.dealer.cards[0])
        return choice

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_features.py Projeto: sandromello/themis-py

  def test_subject_feature_reset_time(self):
    """ This method validates if the subject feature is reseted properly. """
    self.init()
    self.redis.delete('AI:metadata:[email protected]')
    for index in range(0, 3):
      metadata = self.redis.hgetall('AI:metadata:[email protected]')

      feat = Features(**self.features)
      if not metadata:
        # Here we feed redis for the first time with metadata. Will be executed only one once
        tmetadata = ThemisMetaData(**self.METADATA_ITENS)
        self.redis.hmset('AI:metadata:[email protected]', tmetadata.as_redis)
        # We update 
        tmetadata.update_features(**feat.as_dict)
      else:
        tmetadata = ThemisMetaData(**metadata)
      policy = Policy(self.redis)
      pdata = policy.getpolicy('default')

      milter_object = '*****@*****.**'
      from_ipaddress = '189.10.21.1'
      milter_subject = 'Repeated Subject'

      # Enable feeder feature
      feat.feederFeaturesEnabled = True

      # 0.5 second in hour 
      pdata.subjectprobation = 0.000138888889
      feat.tmetadata = tmetadata
      feat.call_feeders(self.redis, pdata, milter_object, from_ipaddress, milter_subject)
      tmetadata = ThemisMetaData(**self.redis.hgetall('AI:metadata:[email protected]'))
      #print tmetadata.subject_repeated_count
      # If time sleep is 0.1 the subject_repeated_count will be two (2)
      time.sleep(0.5)
    # Must return 1 because the subject is repeated and always reseted to 1
    score_milter_object = self.redis.zrange('AI:subjectReputationFeature:groupby', 0, -1, withscores=True)[0][1]
    self.assertTrue(tmetadata.subject_repeated_count == 1 and score_milter_object == 1)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_features.py Projeto: sandromello/themis-py

  def test_subject_feature_count(self):
    """ This method validates if the subject feature is counted properly, generating 3 requests with the same subject,
    then changing it to validate if the count is correct """
    self.init()
    self.redis.delete('AI:metadata:[email protected]')
    for index in range(0, 5):
      metadata = self.redis.hgetall('AI:metadata:[email protected]')

      feat = Features(**self.features)
      if not metadata:
        # Here we feed redis for the first time with metadata. Will be executed only one once
        tmetadata = ThemisMetaData(**self.METADATA_ITENS)
        self.redis.hmset('AI:metadata:[email protected]', tmetadata.as_redis)
        # We update 
        tmetadata.update_features(**feat.as_dict)
      else:
        tmetadata = ThemisMetaData(**metadata)
      policy = Policy(self.redis)
      pdata = policy.getpolicy('default')

      milter_object = '*****@*****.**'
      from_ipaddress = '189.10.21.1'
      if index == 4:
        milter_subject = 'Must NOT Update Subject Count, because subject title is different at last loop'
      else:
        milter_subject = 'Repeated Subject until index is 3'

      # Enable feeder feature
      feat.feederFeaturesEnabled = True

      feat.tmetadata = tmetadata
      feat.call_feeders(self.redis, pdata, milter_object, from_ipaddress, milter_subject)
      tmetadata = ThemisMetaData(**self.redis.hgetall('AI:metadata:[email protected]'))
    # Must be null because at the index 4 the subject is changed, so the namespace is deleted by this object
    score_milter_object = self.redis.zrange('AI:subjectReputationFeature:groupby', 0, -1, withscores=True)
    # Should assert True with 3 because at the index 4 I change the subject name
    self.assertTrue(tmetadata.subject_repeated_count == 3 and not score_milter_object)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: train.py Projeto: panserbjorn/ControllerV-REP

def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    killer = GracefulKiller()
    #TODO Change init_gym for one of my functions
    env, obs_dim, act_dim = init_gym(env_name)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H:%M:%S").replace(
        ":", "_")  # create unique directories
    logger = Logger(logname=env_name, now=now)
    aigym_path = os.path.join('/tmp', env_name, now)
    #Change wrappers.Monitor for a class of mine that controls de simulation
    #Creo que el wrapper no sirve de nada para mi ejemplo
    #env = wrappers.Monitor(env, aigym_path, force=True)
    scaler = Scaler(obs_dim)
    val_func = NNValueFunction(obs_dim)
    policy = Policy(obs_dim, act_dim, kl_targ)
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, episodes=5)
    episode = 0
    while episode < num_episodes:
        trajectories = run_policy(env,
                                  policy,
                                  scaler,
                                  logger,
                                  episodes=batch_size)
        episode += len(trajectories)
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories,
                         gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(
            trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, logger,
                        episode)
        policy.update(observes, actions, advantages, logger)  # update policy
        val_func.fit(observes, disc_sum_rew, logger)  # update value function
        logger.write(display=True)  # write logger results to file and stdout
        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 17

0

Exibir arquivo

    def policy_iteration(self, policy=None):

        w = self.world

        if policy == None:
            policy = Policy.build_deterministic([0] * w.width * w.height)

        while True:
            old_policy = policy
            vs = self.iterative_policy_evaluation(policy, 20)
            qvs = self.qvs_from_vs(vs)
            policy = self.qvs_to_policy(qvs)
            if policy == old_policy:
                break

        return policy

Exemplo n.º 18

0

Exibir arquivo

def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, hid1_mult,
         policy_logvar, clipping_range):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
        hid1_mult: hid1 size for policy and value_f (mutliplier of obs dimension)
        policy_logvar: natural log of initial policy variance
    """

    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)

    scaler = Scaler(obs_dim)
    val_func = NNValueFunction(obs_dim, hid1_mult)
    policy = Policy(obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar,
                    clipping_range)
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, episodes=5)
    episode = 0

    while episode < num_episodes:
        # trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        # episode += len(trajectories)
        # add_value(trajectories, val_func)  # add estimated values to episodes
        # add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
        # add_gae(trajectories, gamma, lam)  # calculate advantage
        # # concatenate all episodes into single NumPy arrays
        # observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
        # # add various stats to training log:
        # log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
        # policy.update(observes, actions, advantages, logger)  # update policy
        # val_func.fit(observes, disc_sum_rew, logger)  # update value function
        # logger.write(display=True)  # write logger results to file and stdout

        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False

        if episode % 100 == 0:
            policy.save_sess()

    logger.close()
    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 19

0

Exibir arquivo

def run_cartpole():
    env = gym.make('CartPole-v0')

    obs_dim = np.prod(env.observation_space.shape)
    n_actions = env.action_space.n

    policy_hidden_dim = 256
    policy = Policy(obs_dim, policy_hidden_dim, n_actions)

    exp = Experiment(policy,
                     None,
                     env,
                     exp_name="cartpole_basic",
                     train_model=False,
                     calc_inf_gain=False)
    exp.train()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: dp.py Projeto: BharathMasetty/EE381V-Reinforcement-Learning

def value_prediction(env: EnvWithModel, pi: Policy, initV: np.array,
                     theta: float) -> Tuple[np.array, np.array]:
    """
    inp:
        env: environment with model information, i.e. you know transition dynamics and reward function
        pi: policy
        initV: initial V(s); numpy array shape of [nS,]
        theta: exit criteria
    return:
        V: $v_\pi$ function; numpy array shape of [nS]
        Q: $q_\pi$ function; numpy array shape of [nS,nA]
    """
    n_s = env.spec.nS
    n_a = env.spec.nA
    trans_mat = env.TD
    reward_matrix = env.R

    delta = theta
    v = initV
    q = np.zeros((n_s, n_a))
    while delta >= theta:
        delta = 0
        for s in range(n_s):
            current_state_val = v[s]
            result = 0
            for a in range(n_a):

                trans = trans_mat[s][a]
                sum_val = 0

                for i in range(len(trans)):

                    next_state = i
                    prob = trans[i]

                    sum_val += (prob * (reward_matrix[s][a][next_state] +
                                        (env.spec.gamma * v[next_state])))
                q[s][a] = sum_val

                result += pi.action_prob(s, a) * sum_val

            v[s] = result
            delta = max(delta, abs(v[s] - current_state_val))

    V = v
    Q = q
    return V, Q

Exemplo n.º 21

0

Exibir arquivo

Arquivo: report.py Projeto: PandaBearz/csp-utils

    def parseJsonDict(self, jsonReport):
        """
        Parses the given 'jsonReport' according to the parameters set in the constructor of this ReportParser 
        and returns a Report object. 'jsonReport' is expected to be a Python dict object with attribute names
        and values corresponding to the definition of CSP violation reports. If 'jsonReport' cannot be parsed 
        because it is syntactically invalid (or empty), Report.INVALID() will be returned.

        Depending on the configuration of this ReportParser object, some attributes will be parsed to replace their
        plain string values with a more high-level object representation.
        """
        
        # replace names
        renamedReport = dict(map(lambda (key, val): (self._replaceName(key), val), jsonReport.iteritems()))
                
        # convert data in report
        convertedReport = {}
        deferredSelfURIs = set([]) # all key names that have URIs that are exactly 'self' (handle after parsing everything else)
        for (key, value) in renamedReport.iteritems():
            if key in self._uriKeys:
                if value.lower().strip() == "self":
                    deferredSelfURIs.add(key)
                    continue
                else:
                    value = self._uriParser.parse(value)
            elif key in self._directiveKeys:
                value = self._directiveParser.parse(value)
            elif key in self._policyKeys:
                value = self._policyParser.parse(value)
            
            if value in (URI.INVALID(), Directive.INVALID(), Policy.INVALID()):
                if self._strict:
                    return Report.INVALID()
                else:
                    continue
            convertedReport[key] = value
            
        # handle deferred parsing of 'self' URIs (they represent the document-uri)
        for key in deferredSelfURIs:
            if "document-uri" in self._uriKeys and "document-uri" in convertedReport:
                convertedReport[key] = convertedReport["document-uri"]
            elif self._strict:
                return Report.INVALID()
            
        for requiredKey in self._requiredKeys:
            if not requiredKey in convertedReport:
                return Report.INVALID()
        return Report(convertedReport)

Exemplo n.º 22

0

Exibir arquivo

def test_tile():
    env = gym.make("MountainCar-v0")
    gamma = 1.

    policy = Policy()
    V = ValueFunctionWithTile(env.observation_space.low,
                              env.observation_space.high,
                              num_tilings=10,
                              tile_width=np.array([.45, .035]))

    semi_gradient_n_step_td(env, 1., policy, 10, 0.01, V, 1000)

    Vs = [V(s) for s in testing_states]
    print(Vs)
    assert np.allclose(
        Vs, correct_values, 1e-2,
        3), f'{correct_values} != {Vs}, but it might due to stochasticity'

Exemplo n.º 23

0

Exibir arquivo

Arquivo: main.py Projeto: lianchi/pro-box

    def handle(self):
        # self.request is the TCP socket connected to the client
        self.data = self.request.recv(128).strip()
        data = eval(self.data)

        if data.has_key('host'):
            Reg.regitsger(data)
            return
        plugin_id = data['id']
        MyQueue.heartbeat_queue.put(data)

        print u"连接来自插件:", plugin_id
        ret_list = Policy.if_update(plugin_id)
        print u"ret_list:", ret_list, plugin_id
        strlist = string.join(ret_list, ":")
        self.request.sendall(strlist)
        print u"sendall:", ret_list, plugin_id

Exemplo n.º 24

0

Exibir arquivo

Arquivo: dispatcher.py Projeto: mcolom/ipolDevel

    def set_policy(self, policy):
        """
        Change the current execution policy. If the given name is not a known policy, it will remain unchanged.
        """
        data = {"status": "OK"}

        orig_policy = self.policy

        self.policy = Policy.factory(policy)

        if self.policy is None:
            data["status"] = "KO"
            data["message"] = "Policy {} is not a known policy".format(policy)
            self.error_log("set_policy",
                           "Policy {} is not a known policy".format(policy))
            self.policy = orig_policy

        return json.dumps(data).encode()

Exemplo n.º 25

0

Exibir arquivo

class FinalModel:
    def __init__(self, env):
        # Load your Model here
        self.sess = tf.Session()
        self.saver = tf.train.import_meta_graph('policy_model/test.meta')
        self.saver.restore(self.sess,
                           tf.train.latest_checkpoint('policy_model/'))
        self.action_size = env.action_space.shape[0]
        self.policy = Policy(env.observation_space.shape[0] + 1,
                             self.action_size, 0.003, 10, -1.0, None)

    def get_action(self, state):
        # change to your model
        obs = state.astype(np.float32).reshape((1, -1))
        obs = np.append(obs, [[0]], axis=1)  # add time step feature

        action = self.policy.sample(obs).reshape((1, -1)).astype(np.float32)
        return np.squeeze(action, axis=0)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: manager.py Projeto: oroojlooy/Optimizing-Acceptance-Threshold-in-Credit-Scoring-using-Reinforcement-Learning

 def initAgent(self):
     # micro-loan business simulation environment instance
     env = SimulationEnv()
     # feature transformer instance to convert numerous outputs of environment into simple numeric variables understood by the RL agent
     ft = FeatureTransformer(env)
     # value function model instance - the brain of the RL agent. Approximates value of each action in every state of environment
     lr = 0.0001                                                   # learning rate defines how adaptive the value function is to new input
     model = Model(env, ft, lr) 
     # environment model instance - the planning center of the agent. Predicts future environment states based on the current one
     env_model = EnvironmentModel(env, lr)
     # policy instance - includes different kinds of behaviors the agent can use to interact with the environment
     policy = Policy(env)
     # the agent instance - the guy that uses all of the above in order to optimize whatever you need
     eps = 1                                                       # exploration rate defines how much randomness to use to explore the environment
     gamma = 0.95                                                  # discounting rate defines how quick the agent forgets his previous experience
     agent = Agent(env, model, env_model, policy, eps, gamma, gamma)
     
     return agent

Exemplo n.º 27

0

Exibir arquivo

Arquivo: helper.py Projeto: beelerchris/GA

def mutation(mutate_pop, p=0.05):
    policy = mutate_pop[0]
    new_policy = Policy(policy.shape, policy.hidden_units, policy.num_actions,
                        policy.game)
    for i in range(len(policy.W)):
        w = np.zeros((policy.W[i].shape[0], policy.W[i].shape[1]))
        b = np.zeros(policy.B[i].shape[0])
        for j in range(len(policy.W[i])):
            for k in range(len(policy.W[i][j])):
                w[j][k] = policy.W[i][j][k] + p * np.random.normal()

        for j in range(len(policy.B[i])):
            b[j] = policy.B[i][j] + p * np.random.normal()

        new_policy.W.append(w)
        new_policy.B.append(b)

    return new_policy

Exemplo n.º 28

0

Exibir arquivo

def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, TestNote):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    print('Testing Period:\n')
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))



    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    env.set_goals(0)

    now = datetime.now().strftime("%b-%d_%H:%M:%S")  # create unique directories  格林尼治时间!!!  utcnow改为now
    testname = now+'-'+TestNote
    logger = Logger(logname=env_name, now=testname)
    aigym_path = os.path.join('log-Test-files', env_name, testname)
    env = wrappers.Monitor(env, aigym_path, force=True)
    scaler = Scaler(obs_dim)
    val_func = NNValueFunction(obs_dim)
    policy = Policy(obs_dim, act_dim, kl_targ)
    # run a few episodes of untrained policy to initialize scaler:
    policy.load_model('/home/drl/PycharmProjects/warker_test/log-files/My3LineDirect-v1/Jan-10_07:51:34-A003-SpecGoal-itr15000-g0ExpNo5/checkpoint/My3LineDirect-v1-15000.ckpt')
    episode = 0

    observes, actions, rewards, unscaled_obs, states_x, states_y= rollout(env, policy, scaler, max_path_length=batch_size,animate=True)
    tmp=np.vstack((rewards,states_x,states_y))
    tmp1=np.transpose(tmp)
    data = np.concatenate((observes, actions, tmp1),axis=1)
    trajectory = {}
    for j in range(data.shape[0]):
        for i in range(data.shape[1]):
            trajectory[i] = data[j][i]
        logger.log(trajectory)
        logger.write(display=False)


    logger.close()
    policy.close_sess()
    val_func.close_sess()

    print('End time:\n')
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

Exemplo n.º 29

0

Exibir arquivo

Arquivo: a3c.py Projeto: humorbeing/master-thesis-project

def test(rank, shared_model, counter):
    with open('log.txt', 'w'):
        pass
    env_name = 'Pong-v0'
    env = gym.make(env_name)
    env.seed(rank)
    torch.manual_seed(rank)

    policy = Policy()
    policy.eval()
    policy.cpu()

    start_time = time.time()
    while True:

        policy.load_state_dict(shared_model.state_dict())
        h = rnn_model.init_()
        state = env.reset()
        reward_sum = 0
        episode_length = 0

        while True:
            env.render()
            a, h = wow(state, h, policy, vae_model, rnn_model)

            state, reward, done, _ = env.step(a)

            reward_sum += reward

            if done:
                string = "Time {}, num steps {}, FPS {:.0f}, episode reward {}, episode length {}".format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    counter.value, counter.value / (time.time() - start_time),
                    reward_sum, episode_length)
                print(string)
                with open('log.txt', 'a') as f:
                    f.write(string + '\n')

                time.sleep(5)
                break

Exemplo n.º 30

0

Exibir arquivo

def value_prediction(env:EnvWithModel, pi:Policy, initV:np.array, theta:float) -> Tuple[np.array,np.array]:
    """
    inp:
        env: environment with model information, i.e. you know transition dynamics and reward function
        pi: policy
        initV: initial V(s); numpy array shape of [nS,]
        theta: exit criteria
    return:
        V: $v_\pi$ function; numpy array shape of [nS]
        Q: $q_\pi$ function; numpy array shape of [nS,nA]
    """

    #####################
    # TODO: Implement Value Prediction Algorithm (Hint: Sutton Book p.75)
    #####################
    
    nS = env.spec.nS
    nA = env.spec.nA
    gamma = env.spec.gamma
    TD = env.TD
    R = env.R
    V = initV
    Q = np.zeros((nS,nA))
    
    while True:
        delta = 0;
        for i in range(nS):
            prevVal = V[i]
            action_sum_temp = 0
            for j in range(nA):
                act_pr = pi.action_prob(i,j)
                action_sum_temp = action_sum_temp + act_pr * sum(TD[i,j,:] * (R[i,j,:] + gamma*V))
            V[i] = action_sum_temp
            delta = max(delta, abs(V[i]-prevVal) )
            
        if delta<theta:
            break
    
    for s in range(nS):
        for a in range(nA):
            Q[s,a] = sum(TD[s,a,:] * (R[s,a,:] + gamma*V))
    
    return V, Q

Exemplo n.º 31

0

Exibir arquivo

 def optimal(self, taxes=(), entitlement=(), militaristic=False):
     return Policy(
         self.name + "_optimal",
         taxes,
         self.proImm,
         self.prolgbt,
         self.proWar,
         entitlement,
         (
             self.race["white"],
             self.race["black"],
             self.race["amind"],
             self.race["asian"],
             self.race["hawaii"],
             self.race["other"],
         ),
         self.proWar,
         militaristic,
     )

Exemplo n.º 32

0

Exibir arquivo

def invade(botnet, network, printing=False):
    """
    Let the botnet invade once the network.
    :param botnet:   a (learning) botnet, set up on this network, and which must have been cleared beforehand
    :param network:  the network to invade
    :param printing: if True, prints all the details about the invasions
    :return:         a list of all (action, result), the total reward received, and the expected reward of the induced 
                     policy (which is constructed by taking the successful actions)
    """
    actions = []
    reward = 0
    policy = []

    t = 0
    while not botnet.state.is_full():
        action = botnet.choose_action()

        if printing:
            print("Action ", t)
            print("Remaining nodes = %s" % botnet.state.nb_remaining())
            print("Attack %s!" % action)

        success = network.attempt_hijacking(botnet.state, action)
        if success:
            policy.append(action)
            if printing:
                print("Success\n")
        else:
            if printing:
                print("Failure\n")

        immediate_reward = network.immediate_reward(botnet.state, action)
        if success and botnet.state.add(action).is_full():
            immediate_reward += botnet.gamma * network.final_reward(
                botnet.gamma)
        reward += botnet.time_factor * immediate_reward
        botnet.receive_reward(action, success, immediate_reward)

        actions.append((action, success))
        t += 1

    return actions, reward, Policy(network,
                                   policy).expected_reward(botnet.gamma)

Exemplo n.º 33

0

Exibir arquivo

def value_prediction(env:EnvWithModel, pi:Policy, initV:np.array, theta:float) -> Tuple[np.array,np.array]:

    nA = env.spec.nA        # Number of actions
    nS = env.spec.nS        # Number of states
    
    V = np.zeros(nS)

    R = env.R               # Reward function
    T = env.TD              # State transition function
    gamma = env.spec.gamma  # Gamma

    """
    Iteratively sweep through all states, 
    """
    while True:
        delta = 0
        for s in range(nS):
            value = 0
            for a in range(nA):
                prob = pi.action_prob(s, a)
                for sp in range(nS):
                    value += (prob * (T[s, a, sp] * (R[s, a, sp] + gamma * initV[sp])))
                V[s] = value
            delta = max(delta, abs(initV[s] - V[s]))
        
        """ Check for convergence """
        if delta < theta:
            break 
        
        initV = V.copy()        # Must make an explicit copy
    
    """ 
    With the values function having converged, we can now extract
    the optimal action probabilities for each state, using the 
    Bellman optimality equation.
    """
    Q = np.zeros(shape=(nS, nA))
    for s in range(nS):
        for a in range(nA):
            for sp in range(nS): 
                Q[s, a] += T[s,a,sp] * (R[s,a,sp] + gamma * V[sp])
    
    return V, Q

Exemplo n.º 34

0

Exibir arquivo

def main(env_name, num_episodes, render, gamma, lam, kl_targ, batch_size):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    if time_state:
        obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)

    scaler = Scaler(obs_dim, env_name)
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, episodes=5)
    episode = 0
    #capture = False
    while episode < num_episodes:
        trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        episode += len(trajectories)
        """if episode > 600 and not capture:
               env.ScreenCapture(5)
               capture = True"""
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
        policy.update(observes, actions, advantages, logger)  # update policy
        val_func.fit(observes, disc_sum_rew, logger)  # update value function
        
        logger.write(display=True)  # write logger results to file and stdout
        scaler.save()
        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 35

0

Exibir arquivo

Arquivo: puff.py Projeto: Maxpridy/retro_puffington

def main(num_episodes, gamma, lam, kl_targ, batch_size, hid1_mult,
         policy_logvar):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
        hid1_mult: hid1 size for policy and value_f (mutliplier of obs dimension)
        policy_logvar: natural log of initial policy variance
    """
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym()
    env_name = "retro"
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    #obs_dim = 215041
    obs_dim = 7057
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    #aigym_path = os.path.join('/tmp', env_name, now)
    #env = wrappers.Monitor(env, aigym_path, force=True)
    scaler = Scaler(obs_dim)
    val_func = NNValueFunction(obs_dim, hid1_mult)
    policy = Policy(obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar)
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, episodes=5)
    episode = 0
    while episode < num_episodes:
        trajectories = run_policy(env, policy, scaler, episodes=batch_size)
        episode += len(trajectories)
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories,
                         gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(
            trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, episode)
        policy.update(observes, actions, advantages)  # update policy
        val_func.fit(observes, disc_sum_rew)  # update value function
        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 36

0

Exibir arquivo

def train_and_eval(cfg: BaseConfig):
    if cfg.path is None:
        print('cfg.path is None, so FasterAutoAugment is not used')
        policy = None
    else:
        path = Path(hydra.utils.get_original_cwd()) / cfg.path
        assert path.exists()
        policy_weight = torch.load(path, map_location='cpu')
        policy = Policy.faster_auto_augment_policy(
            num_chunks=cfg.model.num_chunks, **policy_weight['policy_kwargs'])
        policy.load_state_dict(policy_weight['policy'])
    train_loader, test_loader, num_classes = DATASET_REGISTRY(cfg.data.name)(
        batch_size=cfg.data.batch_size,
        drop_last=True,
        download=cfg.data.download,
        return_num_classes=True,
        norm=[
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ],
        num_workers=4)
    model = MODEL_REGISTRY(cfg.model.name)(num_classes)
    optimizer = optim.SGD(cfg.optim.lr,
                          momentum=cfg.optim.momentum,
                          weight_decay=cfg.optim.weight_decay,
                          nesterov=cfg.optim.nesterov)
    scheduler = lr_scheduler.CosineAnnealingWithWarmup(
        cfg.optim.epochs, cfg.optim.scheduler.mul, cfg.optim.scheduler.warmup)
    tqdm = callbacks.TQDMReporter(range(cfg.optim.epochs))
    c = [callbacks.LossCallback(), callbacks.AccuracyCallback(), tqdm]
    with EvalTrainer(model,
                     optimizer,
                     F.cross_entropy,
                     callbacks=c,
                     scheduler=scheduler,
                     policy=policy,
                     cfg=cfg.model,
                     use_cuda_nonblocking=True) as trainer:
        for _ in tqdm:
            trainer.train(train_loader)
            trainer.test(test_loader)
    print(f"Min. Error Rate: {1 - max(c[1].history['test']):.3f}")

Exemplo n.º 37

0

Exibir arquivo

Arquivo: reactive_mimictraining.py Projeto: silverjoda/hopper_experiments

def main():

    RESTORE_MIMIC = False
    TRAIN = False
    EVALUATE_MIMIC = False
    VISUALIZE_MIMIC = True
    VISUALIZE_MASTER = False

    # Open AI gym environment
    env, obs_dim, act_dim = init_gym('Hopper-v1')

    # Make master policy which was trained by R
    masterpolicy = Policy(obs_dim, act_dim, 0.003, stochastic_policy=False)
    masterpolicy.restore_weights()
    scaler = Scaler(obs_dim)

    if VISUALIZE_MASTER:
        masterpolicy.visualize(env)

    print("Loaded masterpolicy. ")

    mimicpolicy = ReactivePolicy(obs_dim, act_dim)

    if RESTORE_MIMIC:
        mimicpolicy.restore_weights()

    if TRAIN:
        # Train the mimic by master
        t1 = time.time()
        train_mimic(mimicpolicy,
                    masterpolicy,
                    env,
                    scaler,
                    n_episodes=25,
                    batchsize=32)
        print("Training time taken: {}".format(time.time() - t1))
        mimicpolicy.save_weights()
        print("Saved mimic weights")

    if EVALUATE_MIMIC:
        # Evaluate the policy by MSE
        print("Starting evaluation...")
        mse = evaluate_mimic(mimicpolicy, masterpolicy, scaler, env, 10)
        print("Average MSE of mimic: {}".format(mse))

    if VISUALIZE_MIMIC:
        # Visualise the policy
        print("Visualizing policy")
        mimicpolicy.visualize(env)

Exemplo n.º 38

0

Exibir arquivo

def value_prediction(env: EnvWithModel, pi: Policy, initV: np.array,
                     theta: float) -> Tuple[np.array, np.array]:
    """
    inputs:
        env: environment with model information, i.e. you know transition dynamics and reward function
        pi: policy
        initV: initial V(s); numpy array shape of [nS,]
        theta: exit criteria
    return:
        V: $v_\pi$ function; numpy array shape of [nS]
        Q: $q_\pi$ function; numpy array shape of [nS,nA]
    """

    #####################
    # TODO: Implement Value Prediction Algorithm (Hint: Sutton Book p.75)
    #####################
    V = initV.copy()  #Arbitrary except terminal states must be 0

    delta = theta
    while delta >= theta:
        delta = 0
        for s in range(env.spec.nS):
            v = V[s]
            #action probabilities: pi.action_prob(s) is an array (size nA)
            #transition dynamics: env.TD[state,action,state_t+1] is an array of probabilities (size nS)
            #rewards: env.R[state,action,state_t+1]
            update = 0
            for a in range(env.spec.nA):
                #Note below after action_prob the extra zero is because the array is wrapped in another array since array[None]
                #puts the array in another array. Though this should always happen since action_prob shouldn't be passed an action
                update += pi.action_prob(s, a) * np.sum(
                    env.TD[s, a, :] * (env.R[s, a, :] + env.spec.gamma * V))
            V[s] = update
            delta = max(delta, np.abs(v - V[s]))
    Q = np.zeros((env.spec.nS, env.spec.nA))
    for s in range(env.spec.nS):
        for a in range(env.spec.nA):
            Q[s, a] = np.sum(env.TD[s, a, :] *
                             (env.R[s, a, :] + env.spec.gamma * V))
            #Note: summing over the actions in pi(a|s)*Q(s,a) gives V(s)
    return V, Q

Exemplo n.º 39

0

Exibir arquivo

def test_botnet(botnet,
                network,
                nb_trials,
                window_size=1,
                real_rewards=False,
                induced_rewards=False,
                policy_rewards=True,
                show=False):
    """
    Plots the expected reward of the induced policy over trainings, and prints the expected reward of the computed policy.
    :param botnet: 
    :param network: 
    :param nb_trials: 
    :param window_size:
    :param real_rewards:    whether to plot the real rewards received during the training
    :param induced_rewards: whether to plot the expected rewards of the induced policy
    :param policy_rewards:  whether to plot the expected rewards of the full-exploitation policy
    :param show:            if True, shows the results
    :return: 
    """
    rewards, expected, policy = train(botnet, network, nb_trials)

    if real_rewards:
        plot_with_legend(list(range(nb_trials)),
                         soften(rewards, window_size),
                         legend=botnet.type + " real")
    if induced_rewards:
        plot_with_legend(list(range(nb_trials)),
                         soften(expected, window_size),
                         legend=botnet.type + " induced")
    if policy_rewards:
        plot_with_legend(list(range(nb_trials)),
                         soften(policy, window_size),
                         legend=botnet.type + " policy")
    print(botnet.compute_policy())
    print(botnet.type,
          Policy(network,
                 botnet.compute_policy()).expected_reward(botnet.gamma),
          sep='\t')
    if show:
        show_with_legend()

Exemplo n.º 40

0

Exibir arquivo

def value_prediction(env:EnvWithModel, pi:Policy, initV:np.array, theta:float) -> Tuple[np.array,np.array]:
    """
    inp:
        env: environment with model information, i.e. you know transition dynamics and reward function
        pi: policy
        initV: initial V(s); numpy array shape of [nS,]
        theta: exit criteria
    return:
        V: $v_\pi$ function; numpy array shape of [nS]
        Q: $q_\pi$ function; numpy array shape of [nS,nA]
    """

    #####################
    # TODO: Implement Value Prediction Algorithm (Hint: Sutton Book p.75)
    #####################
    num_states = env.spec.nS
    num_actions = env.spec.nA
    V = np.array(initV)
    Q = np.zeros((num_states,num_actions))
    R = env.R
    TD = env.TD

    change = theta + 1
    while change > theta:
        change = 0 
        for i in range(num_states):
            old_v = V[i]
            new_v = 0
            for j in range(num_actions):
                sum_a = 0
                for k in range(num_states):
                    sum_a += TD[i,j,k]*(R[i,j,k]+env.spec.gamma*V[k])
                new_v += pi.action_prob(i,j)*sum_a
            change = max(change, abs(new_v-old_v))
            V[i] = new_v

    for i in range(num_states):
        for j in range(num_actions):
            for k in range(num_states):
                Q[i][j] += TD[i,j,k]*(R[i,j,k]+env.spec.gamma*V[k])     
    return V, Q

Exemplo n.º 41

0

Exibir arquivo

Arquivo: reach.py Projeto: wesenu/Interpolated-Policy-Gradient-with-PPO-for-Robotics-Control-

def main(num_episodes, gamma, lam, kl_targ, batch_size, env_name="Hopper-v2"):
    """ Main training loop
    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = (datetime.datetime.utcnow() - datetime.timedelta(hours=4)).strftime("%b-%d_%H:%M:%S")  # create dictionaries based on ets time
    logger = Logger(logname=env_name, now=now)
    plotter = Plot(plotname=env_name+"-Fig", now=now)
    aigym_path = os.path.join('/tmp', env_name, now)
    # env = wrappers.Monitor(env, aigym_path, force=True)  # recording, dir??
    scaler = Scaler(obs_dim)        # obs_dim=377
    val_func = NNValueFunction(obs_dim)
    policy = Policy(obs_dim, act_dim, kl_targ)  # kl target=0.003 by default
    # run a few episodes of untrained policy to initialize scaler:
    run_policy(env, policy, scaler, logger, plotter, episodes=5, plot=False)
    episode = 0
    while episode < num_episodes:
        trajectories = run_policy(env, policy, scaler, logger, plotter, episodes=batch_size)
        episode += len(trajectories)    # length of trajectories equals batch size which by default is 20
        plotter.updateEpisodes(episode)
        add_value(trajectories, val_func)  # add estimated values to episodes
        add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
        add_gae(trajectories, gamma, lam)  # calculate advantage
        # concatenate all episodes into single NumPy arrays
        observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
        # add various stats to training log:
        log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
        policy.update(observes, actions, advantages, logger, plotter)  # update policy
        val_func.fit(observes, disc_sum_rew, logger)  # update value function
        logger.write(display=True)  # write logger results to file and stdout
        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    plotter.plot()
    # plt.show()

    policy.close_sess()
    val_func.close_sess()

Exemplo n.º 42

0

Exibir arquivo

Arquivo: main.py Projeto: anubhav4sachan/madpl

def worker_policy_usr(args, manager, config):
    init_logging_handler(args.log_dir, '_policy_usr')
    if args.config == 'multiwoz':
        print("MultiWoz Agent Usr")
        agent = Policy(None, args, manager, config, 0, 'usr', True)
    elif args.config == 'dstcsgds':
        print("DSTC Agent Usr")
        agent = DSTCPolicy(None, args, manager, config, 0, 'usr', True)
    else:
        raise NotImplementedError(
            'Policy usr of the dataset {} not implemented'.format(args.config))

    best = float('inf')
    for e in range(2):
        agent.imitating(e)
        best = agent.imit_test(e, best)

Exemplo n.º 43

0

Exibir arquivo

Arquivo: finalmodel.py Projeto: parksang21/RL

class FinalModel:
    def __init__(self, env):
        # Load your Model here
        self.action_size = env.action_space.shape[0]
        self.policy = Policy(env.observation_space.shape[0] + 1,
                             self.action_size, 0.003, 10, -1.0, None)
        self.saver = tf.train.import_meta_graph(
            'weights/50score/100000_episodes.meta')
        self.saver.restore(self.policy.sess,
                           tf.train.latest_checkpoint('weights/50score/'))

    def get_action(self, state):

        # scale, offset = Scaler.get()
        # scale[-1] = 1.0
        # offset[-1] = 0.0
        # change to your model
        obs = state.astype(np.float32).reshape((1, -1))
        obs = np.append(obs, [[0]], axis=1)  # add time step feature

        action = self.policy.sample(obs).reshape((1, -1)).astype(np.float32)
        return np.squeeze(action, axis=0)

Exemplo n.º 44

0

Exibir arquivo

def run_cartpole_expl():
    env = gym.make('CartPole-v0')

    obs_dim = np.prod(env.observation_space.shape)
    act_dim = np.prod(env.action_space.shape)
    n_actions = env.action_space.n

    policy_hidden_dim = 256
    policy = Policy(obs_dim, policy_hidden_dim, n_actions)

    input_dim = int(obs_dim + act_dim)
    output_dim = int(obs_dim)
    hidden_dim = 64
    model = BNN(input_dim, hidden_dim, output_dim)

    exp = Experiment(policy,
                     model,
                     env,
                     exp_name="cartpole_expl",
                     train_model=True,
                     calc_inf_gain=True)
    exp.train()

Exemplo n.º 45

0

Exibir arquivo

    def __init__(self, env, sess, horizon, epsilon, learning_rate_policy,
                 learning_rate_value, gamma, lam, logger):
        self.env = env
        self.sess = sess
        self.horizon = horizon
        self.epsilon = epsilon
        self.learning_rate_policy = learning_rate_policy
        self.learning_rate_value = learning_rate_value
        self.gamma = gamma
        self.lam = lam
        self.logger = logger

        self.observation_space = env.observation_space.shape[0]
        self.action_space = env.action_space.shape[0]

        self.policy = Policy(self.observation_space, self.action_space,
                             self.epsilon, self.learning_rate_policy)
        self.value_function = Value_function(self.observation_space,
                                             self.learning_rate_value)

        self.replay_memory = ReplayMemory(self.horizon, self.observation_space,
                                          self.action_space)

Exemplo n.º 46

0

Exibir arquivo

Arquivo: valueFunctionApproximation.py Projeto: fyabc/MSRAPaperProject

def featureQLearning(mdp, epsilon, alpha, iterNum, maxWalkLen=100, echoSE=False):
    """
    qFunc = r + max_{a'}(\gamma * q(\hat{s'}, a'))
    """

    InitParameter = 0.1

    if echoSE:
        squareErrors = []

    policy = Policy(mdp)

    for i in range(len(policy.parameters)):
        policy.parameters[i] = InitParameter

    for _ in range(iterNum):
        if echoSE:
            squareErrors.append(getSquareErrorPolicy(policy))

        state = random.choice(mdp.states)
        sFeature = mdp.getFeature(state)
        action = random.choice(mdp.actions)
        isTerminal = False

        count = 0
        while not isTerminal and count < maxWalkLen:
            isTerminal, nextState, reward, nextSFeature = mdp.transform(state, action)

            maxQ = -1.0
            for nextAction in mdp.actions:
                q = policy.qFunc(nextSFeature, nextAction)
                if maxQ < q:
                    maxQ = q

            policy.update(sFeature, action, reward + mdp.gamma * maxQ, alpha)

            action = policy.epsilonGreedy(nextSFeature, epsilon)
            state = nextState
            sFeature = nextSFeature
            count += 1

    if echoSE:
        return policy, squareErrors
    else:
        return policy

Exemplo n.º 47

0

Exibir arquivo

Arquivo: policyGradient.py Projeto: fyabc/MSRAPaperProject

def policySARSA(mdp, epsilon, alpha, iterNum, maxWalkLen=100, echoSE=False):
    """
    Actor-Critic: actor update the policy, critic update the value.
    """

    InitParameter = 0.1

    if echoSE:
        squareErrors = []

    policy = SoftmaxPolicy(mdp)
    valuePolicy = Policy(mdp)

    for i in range(len(policy.parameters)):
        policy.parameters[i] = InitParameter
        valuePolicy.parameters[i] = 0.0

    for _ in range(iterNum):
        if echoSE:
            squareErrors.append(getSquareErrorPolicy(valuePolicy))

        state = random.choice(mdp.states)
        sFeature = mdp.getFeature(state)
        action = random.choice(mdp.actions)
        isTerminal = False

        count = 0
        while not isTerminal and count < maxWalkLen:
            isTerminal, nextState, reward, nextSFeature = mdp.transform(state, action)
            nextAction = policy.epsilonGreedy(nextSFeature, epsilon)

            valuePolicy.update(sFeature, action,
                               reward + mdp.gamma * valuePolicy.qFunc(nextSFeature, nextAction), alpha)
            policy.update(sFeature, action, valuePolicy.qFunc(sFeature, action), alpha)

            sFeature = nextSFeature
            action = nextAction
            count += 1

    if echoSE:
        return policy, squareErrors
    else:
        return policy

Exemplo n.º 48

0

Exibir arquivo

Arquivo: user.py Projeto: koszuta/CS361

 def savedPolicies(self):
     from policy import Policy
     return Policy.query(ancestor = self.key).filter(Policy.onSyllabus == False).fetch()

Exemplo n.º 49

0

Exibir arquivo

Arquivo: planepolicy.py Projeto: ericjang/adaptive-e2c

      self.u=tf.tanh(sampleNormal(mu,tf.exp(logsigma)),name="u")
    self.policy_vars = [v for v in tf.all_variables() if v.name.startswith(vs.name)]
    print([v.name for v in self.policy_vars])

  def eval(self,sess,x):
    return sess.run(self.u,{self.x:x})
    
  def set_reward(self,r):
    # set objectie to minimize tensor -R
    self.reward = r # scalar
    self.buildTrain(1e-4)
    self.buildSummaries()

  def buildTrain(self,learning_rate):
    with tf.variable_scope("Optimizer"):
      optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
      # maximize reward
      self.train_op=optimizer.minimize(-self.reward, var_list=self.policy_vars)

  def buildSummaries(self):
    tf.scalar_summary("R", self.reward)
    self.all_summaries = tf.merge_all_summaries()

  def update(self,sess,feed_dict, write_summary=False):
    fetches=[self.reward,self.train_op]
    if write_summary:
      fetches.append(self.all_summaries)
    return sess.run(fetches,feed_dict)

Policy.register(PlanePolicy)

Exemplo n.º 50

0

Exibir arquivo

Arquivo: syllabus.py Projeto: koszuta/CS361

 def policies(self):
     from policy import Policy
     return Policy.query(ancestor = self.key).fetch()

Exemplo n.º 51

0

Exibir arquivo

Arquivo: vizpolicy.py Projeto: ericjang/adaptive-e2c

      #self.u=tf.tanh(sampleNormal(mu,tf.exp(logsigma)),name="u")
    self.policy_vars = [v for v in tf.all_variables() if v.name.startswith(vs.name)]
    print([v.name for v in self.policy_vars])

  def eval(self,sess,x):
    return sess.run(self.u,{self.x:x})
    
  def set_reward(self,r):
    # set objectie to minimize tensor -R
    self.reward = r # scalar
    self.buildTrain(1e-4)
    self.buildSummaries()

  def buildTrain(self,learning_rate):
    with tf.variable_scope("Optimizer"):
      optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
      # maximize reward
      self.train_op=optimizer.minimize(-self.reward, var_list=self.policy_vars)

  def buildSummaries(self):
    tf.scalar_summary("R", self.reward)
    self.all_summaries = tf.merge_all_summaries()

  def update(self,sess,feed_dict, write_summary=False):
    fetches=[self.reward,self.train_op]
    if write_summary:
      fetches.append(self.all_summaries)
    return sess.run(fetches,feed_dict)

Policy.register(VisuomotorPolicy)

Exemplo n.º 52

0

Exibir arquivo

Arquivo: randpolicy.py Projeto: ericjang/adaptive-e2c

#!/bin/env python

from policy import Policy

import numpy as np

class RandomPolicy(Policy):
  '''
  random policy - for benchmarking E2C model 
  on a 'static' dataset
  '''
  def __init__(self, batch_size, x_dim, u_dim):
    super(RandomPolicy, self).__init__(batch_size, x_dim, u_dim)

  def eval(self, sess, x):
    return np.random.uniform(low=-1.,high=1.,size=[x.shape[0],self.u_dim])
    #np.random.randn(self.u_dim)

Policy.register(RandomPolicy)

Exemplo n.º 53

0

Exibir arquivo

Arquivo: simplepolicy.py Projeto: ericjang/adaptive-e2c

  def eval(self,sess,x):
    # ergodicity:
    if np.random.rand() < .1:
      return np.random.uniform(low=-1.,high=1.,size=self.u_dim)
    else:
      return sess.run(self.u,{self.x:x})
    
  def set_reward(self,r):
    # set objectie to minimize tensor -R
    self.reward = r # scalar
    self.buildTrain(1e-4)
    self.buildSummaries()

  def buildTrain(self,learning_rate):
    with tf.variable_scope("Optimizer"):
      optimizer=tf.train.AdamOptimizer(learning_rate, beta1=0.1, beta2=0.1) # beta2=0.1
      # maximize reward
      self.train_op=optimizer.minimize(-self.reward, var_list=self.policy_vars)

  def buildSummaries(self):
    tf.scalar_summary("R", self.reward)
    self.all_summaries = tf.merge_all_summaries()

  def update(self,sess,feed_dict, write_summary=False):
    fetches=[self.reward,self.train_op]
    if write_summary:
      fetches.append(self.all_summaries)
    return sess.run(fetches,feed_dict)

Policy.register(SimplePolicy)

Exemplo n.º 54

0

Exibir arquivo

Arquivo: obj.py Projeto: shenwei0329/philosophy

class Obj:
    """
    群体：
    """

    def __init__(self, name, init_x, init_y, ch, color):
        self.name = name
        _fn = "dot%s" % self.name
        self.backup = basic.BackUp(_fn)
        self.chr = ch
        self.color = color
        self.V = (0., 0., 0., 0.,)

        """群体的个人集合
        """
        self.P = []

        """性别情况
        """
        self.male = 0
        self.female = 0

        """怀孕人数
        """
        self.mating = 0

        self.policy = Policy()
        self.X = init_x + self.policy._func()
        self.Y = init_y + self.policy._func()
        self.load()

    def move(self):
        self.V = self.policy.getPolicy()
        self.X = self.X + (self.V[0]-self.V[2])
        self.Y = self.Y + (self.V[1]-self.V[3])

    def getPosition(self):
        return int(self.X), int(self.Y), self.chr, self.color

    def save(self):
        _data = {
            "x": self.X,
            "y": self.Y,
            "ch": self.chr,
            "c": self.color,
            "persons": []
        }
        for _p in self.P:
            _data['persons'].append(_p.show_name())
            _p.save()
        self.backup.save(_data)

    def load(self):
        _info = self.backup.load()
        if _info is not None:
            self.X = _info["x"]
            self.Y = _info["y"]
            self.chr = _info["ch"]
            self.color = _info["c"]
            for _name in _info["persons"]:
                _p = resource.Ps(_name)
                if _p.show_sex() == 'Male':
                    self.male += 1
                else:
                    self.female += 1
                self.P.append(_p)
        else:
            """初值：每个群体100人
            """
            for _i in range(100):
                """个体姓名
                """
                _name = "%s-%s" % (self.name, uuid.uuid4())
                _p = resource.Ps(_name)
                if _p.show_sex() == 'Male':
                    self.male += 1
                else:
                    self.female += 1
                self.P.append(_p)

    def time_scale(self, ts):
        """
        时标处理器
        :param ts: 时标
        :return: 存活个数，需求总量，男性个数，女性个数，怀孕人数
        """
        _alive = 0
        _requirment = 0
        _male = []
        _female = []
        self.mating = 0
        for _p in self.P:
            if ts % 24 == 0:
                _requirment += _p.life_one_day()
                """收集满足交配条件的个人
                """
                if _p.can_mating("Male"):
                    _male.append(_p)
                if _p.can_mating("Female"):
                    _female.append(_p)
            else:
                _requirment += _p.show()
            if _p.alive():
                    _alive += 1
        if len(_male) > 0 and len(_female) > 0:
            """若有满足交配条件的两性，则允许交配
            """
            for _m in _male:
                _m.mating('Male')
            for _f in _female:
                if _f.mating('Female'):
                    self.mating += 0
        for _p in self.P:
            if _p.is_mating():
                self.mating += 1
            if _p.birth():
                """新生一代
                """
                _name = "%s-%s" % (self.name, uuid.uuid4())
                _p = resource.Ps(_name)
                if _p.show_sex() == 'Male':
                    self.male += 1
                else:
                    self.female += 1
                self.P.append(_p)

        return _alive, _requirment, self.male, self.female, self.mating

Exemplo n.º 55

0

Exibir arquivo

Arquivo: player.py Projeto: clabra/blackjack

 def __init__(self, policy, dealer):
     super(BotPlayer, self).__init__()
     self.policy = Policy(policy)
     self.dealer = dealer

Exemplo n.º 56

0

Exibir arquivo

Arquivo: test_features.py Projeto: sandromello/themis-py

class FeaturesTestCase(unittest.TestCase):
  """ Tests for Policy class """    

  FEATURES_CUSTOM_CALLBACK = {
    'statisticsFeature' : bool, 
    'evaluateByRecipientFeature' : bool,
    'policiesByServerPoolFeature' : bool,
    'messagesBySecFeature' : bool,
    'messagesBySecStoreDays' : int,
    'feederFeaturesEnabled' : bool,
    'featuresByServerPool' : bool,
    'learnFeature' : bool,
    'learnPredictSafeValue' : float,
    'learnOnlyOnce' : bool,
    'learnEscalationValue' : float,
    'learnBlockMinutes' : float,
    'rateReputationFeature' : bool,
    'rateReputationBlockHitValue' : int,
    'rateReputationDecreaseValue' : float,
    'rateReputationIncreaseMinutes' : float,
    'ipReputationFeature' : bool,
    'ipReputationHitValue' : int,
    'ipReputationDecreaseValue' : float,
    'ipReputationIncreaseMinutes' : float,
    'subjectReputationFeature' : bool,
    'subjectReputationHitValue' : int,
    'subjectReputationDecreaseValue' : float,
    'subjectReputationIncreaseMinutes' : float,
    'global_custom_block' : float,
  }

  DEFAULT_POLICY_PARAMS = {
    'enable' : 'TRUE',
    'type' : 'regular',
    'priority' : 5,
    'jailby' : 'Sender:user@domain+',
    'jailheader' : 'X-Themis-Quarantine',
    'jailaction' : 'monitor',
    'replydata' : 'Limit reached. Blocking for %s second(s)',
    'countsentprobation' : 1,
    'countrcpt' : 'FALSE',
    'stophere' : 'FALSE',
    'requestsmon' : 'FALSE',
    'subjectprobation' : 0.5,
    'ipprobation' : 0.5,
    'blockprobation' : 0.5,
    'countsentprobation' : 1
  }

  METADATA_ITENS = {
    'learnFeature' : True,
    'learnPredictSafeValue' : 10,
    'learnEscalationValue' : 1.0,
    'learningBlueMode' : True,
    'learningRedMode' : False,
    'blue_creation_date' : time.time(),
    'last_update' : 0,
    'predictBy' : 'BLUE',
    'ip_reputation_lastupdate' : 0,
    'subject_lastupdate' : 0,
    'last_subject' : 0,
    'sentmessages_lastupdate' : 0,
    'manual_block' : False,
    'bypass' : True,
    'subject_repeated_count' : 0,
    'block_count' : 0
  }

  def pairs_to_dict_typed(self, response, type_info):
    it = iter(response)
    result = {}
    for key, value in izip(it, it):
      if key in type_info:
        try:
          value = type_info[key](value)
        except:
          # if for some reason the value can't be coerced, just use
          # the string value
          pass
      result[key] = value
    return result

  def hgetall_custom_callback(self, response):
    data = dict(ThemisMetaData.METADATA_CUSTOM_CALLBACK.items() + self.FEATURES_CUSTOM_CALLBACK.items())
    return response and self.pairs_to_dict_typed(response, data) or {}

  def init(self):
    with open('../config/config.yaml') as f:
      _, config_features, _ = yaml.load_all(f)

    self.features = config_features
    self.redis = redis.StrictRedis('localhost')
    #self.redis.flushdb()
    self.redis.set_response_callback('HGETALL', self.hgetall_custom_callback)
    self.policy = Policy(self.redis)
    self.add_default_policy()

  def add_default_policy(self):
    default_policy = { 'Source' : 'any', 'Destination' : 'any', 'JailSpec' : '1:1000' }
    for default_key, value in self.DEFAULT_POLICY_PARAMS.items():
      if default_key not in default_policy.keys():
        default_policy[default_key] = value
    default_policy['policy_name'] = 'default'
    pdata = PolicyData(**default_policy)
    try:
      self.policy.delete('default')
    except ValueError:
      pass
    self.policy.setpolicy(pdata)

  def test_store_global_features(self):
    """ This method tests a config file with several key features. It must be inserted to redis,
    then fetched back with the proper type. 
    """
    self.init()
    # convert global config features to Features object
    global_features = Features(**self.features)

    # store global config features in redis
    self.redis.hmset('config:themis:features', global_features.as_redis)
    # fetch from redis the global features
    global_features = Features(**self.redis.hgetall('config:themis:features'))
    # if got to here then it MUST be TRUE, the validation are made in the Features object
    self.assertTrue(True)

  def test_themis_metadata_override(self):
    self.init()
    """ This method checks if the metadata overrides the global features. """
    tmetadata = ThemisMetaData(**self.METADATA_ITENS)

    # Custom features for ThemisMetaData
    tmetadata.messagesBySecStoreDays = 500
    tmetadata.learnTimeFrameValue = '104:1000'
    tmetadata.ipReputationFeature = True
    tmetadata.learnOnlyOnce = False

    # Include all the global features in the ThemisMetaData, this will not override what is already set
    tmetadata.update_features(**Features(**self.features).as_dict)

    assertResult = False
    # Custom features MUST NOT be overrided
    for key, value in tmetadata.__dict__.items():
      if key == 'messagesBySecStoreDays' and value == 500:
        assertResult = True
      elif key == 'learnTimeFrameValue' and value == '104:1000':
        assertResult = True
      elif key == 'ipReputationFeature' and value is True:
        assertResult = True
      elif key == 'learnOnlyOnce' and value is False:
        assertResult = True
    self.assertTrue(assertResult)

  def test_subject_feature_count(self):
    """ This method validates if the subject feature is counted properly, generating 3 requests with the same subject,
    then changing it to validate if the count is correct """
    self.init()
    self.redis.delete('AI:metadata:[email protected]')
    for index in range(0, 5):
      metadata = self.redis.hgetall('AI:metadata:[email protected]')

      feat = Features(**self.features)
      if not metadata:
        # Here we feed redis for the first time with metadata. Will be executed only one once
        tmetadata = ThemisMetaData(**self.METADATA_ITENS)
        self.redis.hmset('AI:metadata:[email protected]', tmetadata.as_redis)
        # We update 
        tmetadata.update_features(**feat.as_dict)
      else:
        tmetadata = ThemisMetaData(**metadata)
      policy = Policy(self.redis)
      pdata = policy.getpolicy('default')

      milter_object = '*****@*****.**'
      from_ipaddress = '189.10.21.1'
      if index == 4:
        milter_subject = 'Must NOT Update Subject Count, because subject title is different at last loop'
      else:
        milter_subject = 'Repeated Subject until index is 3'

      # Enable feeder feature
      feat.feederFeaturesEnabled = True

      feat.tmetadata = tmetadata
      feat.call_feeders(self.redis, pdata, milter_object, from_ipaddress, milter_subject)
      tmetadata = ThemisMetaData(**self.redis.hgetall('AI:metadata:[email protected]'))
    # Must be null because at the index 4 the subject is changed, so the namespace is deleted by this object
    score_milter_object = self.redis.zrange('AI:subjectReputationFeature:groupby', 0, -1, withscores=True)
    # Should assert True with 3 because at the index 4 I change the subject name
    self.assertTrue(tmetadata.subject_repeated_count == 3 and not score_milter_object)

  # NOTE: It is not necessary test the behavior of all the other features because has the same implementation
  def test_subject_feature_reset_time(self):
    """ This method validates if the subject feature is reseted properly. """
    self.init()
    self.redis.delete('AI:metadata:[email protected]')
    for index in range(0, 3):
      metadata = self.redis.hgetall('AI:metadata:[email protected]')

      feat = Features(**self.features)
      if not metadata:
        # Here we feed redis for the first time with metadata. Will be executed only one once
        tmetadata = ThemisMetaData(**self.METADATA_ITENS)
        self.redis.hmset('AI:metadata:[email protected]', tmetadata.as_redis)
        # We update 
        tmetadata.update_features(**feat.as_dict)
      else:
        tmetadata = ThemisMetaData(**metadata)
      policy = Policy(self.redis)
      pdata = policy.getpolicy('default')

      milter_object = '*****@*****.**'
      from_ipaddress = '189.10.21.1'
      milter_subject = 'Repeated Subject'

      # Enable feeder feature
      feat.feederFeaturesEnabled = True

      # 0.5 second in hour 
      pdata.subjectprobation = 0.000138888889
      feat.tmetadata = tmetadata
      feat.call_feeders(self.redis, pdata, milter_object, from_ipaddress, milter_subject)
      tmetadata = ThemisMetaData(**self.redis.hgetall('AI:metadata:[email protected]'))
      #print tmetadata.subject_repeated_count
      # If time sleep is 0.1 the subject_repeated_count will be two (2)
      time.sleep(0.5)
    # Must return 1 because the subject is repeated and always reseted to 1
    score_milter_object = self.redis.zrange('AI:subjectReputationFeature:groupby', 0, -1, withscores=True)[0][1]
    self.assertTrue(tmetadata.subject_repeated_count == 1 and score_milter_object == 1)

  def test_ip_reputation_feature(self):
    """ This method validates if the ips are updated correctly, and also if the scores are with proper values """
    self.init()
    self.redis.delete('AI:metadata:[email protected]')
    for from_ipaddress in ['189.20.3.1', '200.20.3.100', '200.20.3.100', '201.20.230.10']:
      metadata = self.redis.hgetall('AI:metadata:[email protected]')

      feat = Features(**self.features)
      if not metadata:
        # Here we feed redis for the first time with metadata. Will be executed only one once
        tmetadata = ThemisMetaData(**self.METADATA_ITENS)
        self.redis.hmset('AI:metadata:[email protected]', tmetadata.as_redis)
        # We update 
        tmetadata.update_features(**feat.as_dict)
      else:
        tmetadata = ThemisMetaData(**metadata)
      policy = Policy(self.redis)
      pdata = policy.getpolicy('default')

      milter_object = '*****@*****.**'
      milter_subject = 'Subject does matter'

      # Enable feeder feature
      feat.feederFeaturesEnabled = True

      feat.tmetadata = tmetadata
      feat.call_feeders(self.redis, pdata, milter_object, from_ipaddress, milter_subject)

    score_object = self.redis.zrange(feat.ipReputationFeatureGroupByNamespace, 0, -1, withscores=True)[0][1]
    self.assertTrue(len(self.redis.smembers(feat.ipReputationFeatureNamespace)) == 3 and score_object == 3)

Exemplo n.º 57

0

Exibir arquivo

Arquivo: manager.py Projeto: geans/teaching

class TaskQueueManager():
    def __init__(self, policy, number_of_tasks=None):
        self.__policy_list = Policy()
        self.__policy = policy
        self.__by_priority = any(self.__policy == pol for pol in self.__policy_list.uses_priority())
        self.__setted_sort = self.__by_priority or policy == self.__policy_list.sjf
        self.__all = []
        self.__ready = []
        self.__finished = 0
        self.__number_of_tasks = number_of_tasks

    def new_task(self, task):
        if type(task) == list:
            self.__all += task
        else:
            self.__all.append(task)
    
    def put_on_ready(self, task):
        index = self.__all.index(task)
        self.__all[index].status = Status.waiting
        self.__ready.append(index)
        
        # Sort ready list according with policy scheduling
        if self.__setted_sort:
            if self.__by_priority:
                self.__ready.sort(key=self.__sort_by_priority(self.__all), reverse=True)
            else:
                self.__ready.sort(key=self.__sort_by_duration(self.__all))
    
    def put_on_finished(self, task):
        index = self.__all.index(task)
        self.__all[index].status = Status.finished
        self.__finished += 1

    def placed_on_processor(self, task):
        index = self.__all.index(task)
        self.__all[index].status = Status.running
        try:
            self.__ready.remove(index)
        except:
            None
    
    def get_next_task(self, amount=1):
        task_to_run = []
        for i in range(0, amount):
            if self.__ready:
                index = self.__ready.pop(0)
                self.__all[index].status = Status.running
                task_to_run.append(self.__all[index])
        return task_to_run
    
    def there_task(self):
        if self.__number_of_tasks is None:
            return True
        else:
            return self.__finished < self.__number_of_tasks
    
    def increase_wait_time(self):
        if self.__policy == self.__policy_list.rrta:
            for i in self.__ready:
                self.__all[i].wait_time += 1
                self.__all[i].priority += 1 # To Task Aging
        else:
            for i in self.__ready:
                self.__all[i].wait_time += 1

    def get_task_list(self):
        return self.__all

    def get_ready_list(self):
        ready_list = []
        for index in self.__ready:
            ready_list.append(self.__all[index])
        return ready_list

    def __sort(self):
        'Sort ready list according with policy scheduling'
        if self.__setted_sort:
            if self.__by_priority:
                self.__ready.sort(key=self.__sort_by_priority(self.__all), reverse=True)
            else:
                self.__ready.sort(key=self.__sort_by_duration(self.__all))

    def __sort_by_priority(self, task_list):
        'Used to sort "self.__ready" by priority of the items of "self.__all"'
        class K(object):
            def __init__(self, obj, *args):
                self.obj = obj
            def __lt__(self, other):
                return task_list[self.obj].priority < task_list[other.obj].priority
            def __gt__(self, other):
                return task_list[self.obj].priority > task_list[other.obj].priority
        return K

    def __sort_by_duration(self, task_list):
        'Used to sort "self.__ready" by time_required of the items of "self.__all"'
        class K(object):
            def __init__(self, obj, *args):
                self.obj = obj
            def __lt__(self, other):
                return task_list[self.obj].time_required < task_list[other.obj].time_required
            def __gt__(self, other):
                return task_list[self.obj].time_required > task_list[other.obj].time_required
        return K

Exemplo n.º 58

0

Exibir arquivo

Arquivo: rebac.py Projeto: tahmina-ahmed/a-simple-rebac

 def __init__(self, relationship_graph_dict={}):
     """ Initializes the relationship graph"""
     #print relationship_graph_dict
     self.relationship_graph = Graph(relationship_graph_dict)
     self.policy = Policy()

Exemplo n.º 59

0

Exibir arquivo

Arquivo: rebac.py Projeto: tahmina-ahmed/a-simple-rebac

class ReBAC(object):
        def __init__(self, relationship_graph_dict={}):
            """ Initializes the relationship graph"""
            #print relationship_graph_dict
            self.relationship_graph = Graph(relationship_graph_dict)
            self.policy = Policy() 
        def user_list(self):
            """ returns the users of the relationship graph """
            return self.relationship_graph.list_nodes()
        
        def relationships(self):
            """ returns the relationships among the users in the relationship graph """
            return self.relationship_graph.list_edges()
        
        def create_user(self,username):
            """ If "username" does not exist  in self.__relationship_graph_dict
            we need to add a key "username" should be added  to the __relationship_graph_dict
            otherwise nothing has to be done"""
            if(self.relationship_graph.create_node(username)):
               print(username+" is created successfully")
            else:
               print(username+" already exists")
            
        
        def compute_all_paths(self, source_user, target_user, paths=[]):
            """returns all paths between two existing users"""
            return self.relationship_graph.find_all_paths(source_user, target_user, paths)
       	
        def add_relationship(self, username1, username2):
            """ Add Relationship between two existing users, if the users doesn't exist through error"""
            if  self.check_user_existence(username1) and self.check_user_existence(username2):
                if self.check_policy('add_relationship', username1, username2):
                    if (self.relationship_graph.add_edge(username1, username2)):
                       print("relationship successfully created between "+username1+" and "+username2)
                       return True
                else:
                    print("Policy doesn't authorize to create relationship between "+ username1+" and "+username2)
                    return False
            else:
                return False 
                

        def check_policy(self, action_type, source_user, target_user):
            """ check policies for different actions"""
            paths = self.compute_all_paths(source_user, target_user)   
            return self.policy.check_policy(action_type, source_user, target_user, paths)

        def delete_relationship(self, username1, username2):
            """ Delete Relationship between two existing users, if any or both of the users doesn't exist through error"""
            if self.check_user_existence(username1) and self.check_user_existence(username2):
               if(self.relationship_graph.check_edge(username1, username2)):
                  if self.check_policy("delete_relationship", username1, username2):
                     if(self.relationship_graph.delete_edge(username1,username2)):
                       print("Relationship successfully Deleted Between"+ username1+" and "+username2)
                       return True
                  else:
                    print(username1+" is not authorized to delete relationship with " + username2)
                    return False
               else:
                    print(username1+" doesn't have any relationship with " + username2)
                    return False
            else:
                return False               
                    

        def access(self, username1, username2):
            """ Check whether a user is allowed to access another user"""
            if self.check_user_existence(username1) and self.check_user_existence(username2):
               if self.check_policy("access", username1, username2):
                   print(username1+" is allowed to access "+username2)
                   return True
               else:
                   print(username1+" is not authorized to access relationship with " + username2 )
                   return False
            else:
               return False
 
        def check_user_existence(self, username):
            """ Check Existence of a user"""
            if(self.relationship_graph.check_node_existence(username)):
               return True
            else:
               print("User doesn't Exist")  
               return True