from timings import Timings

t1 = Timings(title = "collections", setup = "import collections; s = collections.deque()",
                                    statement = "s.appendleft(100)")
t2 = Timings(title = "lists",       setup = "s = []", 
                                    statement = "s.insert(0,100)")

Timings.titles()
t1.run(100000)
t2.run(100000)
t1.run(200000)
t2.run(200000)
from timings import Timings

Timings.titles()
t1 = Timings(title="class",
             setup="from myprogram import Person",
             statement="p = Person('Susan',25,'London')")
t2 = Timings(title="slots",
             setup="from myprogram import PersonSlots",
             statement="p = PersonSlots('Susan',25,'London')")
t3 = Timings(
    title="dict",
    setup="pass",
    statement="p = {'name' : 'Susan', 'age' : 25, 'address' : 'London' }")

t4 = Timings(
    title="class",
    setup="from myprogram import Person\np = Person('Susan',25,'London')",
    statement="s = p.getDetails()")
t5 = Timings(
    title="slots",
    setup=
    "from myprogram import PersonSlots\np = PersonSlots('Susan',25,'London')",
    statement="s = p.getDetails()")
t6 = Timings(
    title="dict",
    setup="p = {'name' : 'Susan', 'age' : 25, 'address' : 'London' }",
    statement="s = p['name'] + ',' + str(p['age']) + ',' + p['address']")

print "\n*** Creating instances ***"
t1.run(10000000)
t2.run(10000000)
Пример #3
0
from timings import Timings

t1 = Timings(title="compiled regex",
             setup=('import re'
                    '\n'
                    'text = "This line contains the numbers 8.73 and 4.67"'
                    '\n'
                    'numberPattern = r"\d+\.\d+"'
                    '\n'
                    'pattern = re.compile(numberPattern)'),
             statement="result = pattern.search(text)")
t2 = Timings(title="uncompiled regex",
             setup=('import re'
                    '\n'
                    'text = "This line contains the numbers 8.73 and 4.67"'
                    '\n'
                    'numberPattern = r"\d+\.\d+"'),
             statement="result = re.search(numberPattern, text)")

Timings.titles()
t1.run(1000000)
t2.run(1000000)
    def __init__(self, parameters: {}, session: tf.Session):
        width = parameters.get('width', 1000)
        height = parameters.get('height', 1000)

        self.session = session
        self.model_type = parameters.get('model_type', 3)
        self.width = parameters.get('width', 1000)
        self.height = parameters.get('height', 1000)
        self.n_apples = parameters.get('n_apples', 100)
        self.n_eyes = parameters.get('n_eyes', 9)
        self.stop = parameters.get('stop', 10000)
        self.speed = parameters.get('speed', 3)
        self.gamma = parameters.get('gamma', .9)
        self.action_buffer = parameters.get('action_buffer', 128)
        self.memory_size = parameters.get('memory_size', 1024 * 10)
        self.eye_length = parameters.get('eye_length', 200)
        self.sensor_levels = parameters.get('sensor_levels', 10)
        self.auto = parameters.get('auto', True)
        self.score_buffer = parameters.get('score_buffer', 10240)
        self.exploration = parameters.get('exploration', .10)
        self.label = parameters.get('label', 'no_label')

        self.repeat = False

        self.layer_size = parameters.get('layer_size')
        self.n_layers = parameters.get('n_layers')
        self.block_size = parameters.get('block_size', 128)
        self.learn_rate = parameters.get('learn_rate', 0.00001)
        self.finished = False
        self.res_blocks = parameters.get('res_blocks', 4)
        self.res_layers = parameters.get('res_layers', 3)
        self.dropout = parameters.get('dropout', False)
        self.layer_norm = parameters.get('layer_norm', False)
        self.tensorboard_dir = parameters.get('tensorboard_dir', "tf-logs")
        self.sensor_width = parameters.get('sensor_width', pi / 2)

        self.width = self.width
        self.height = self.height
        self.board_width = self.width
        self.board_height = self.height / 2
        self.timings = Timings()
        self.status = 'starting'
        self.snap()

        self.ship = Ship(XYPoint(self.board_width / 2, self.board_height / 2, ), 0, self.board_width, self.board_height,
                         self.n_eyes, self.sensor_levels, self.eye_length, self.sensor_width)
        self.red = 'red'
        self.green = 'green'
        self.up = False
        self.right = False
        self.left = False
        self.score = 0
        self.peak_score = 0
        self.sensor = SensorMap(self.ship)
        self.step = 0
        self.n_actions = 3

        self.model = self.build_model()
        self.policy = PolicyGradientOptimizer(self.model, self.sensor.size, 3, self.memory_size, self.action_buffer,
                                              self.gamma, self.exploration, self.timings)
        self.scores = collections.deque()
        self.reset_apples()

        self.adjust_score(0)
        self.sensor.update(self.apples.values())
        self.next_state = []
Пример #5
0
from timings import Timings

t1 = Timings(title="collections",
             setup="import collections; s = collections.deque()",
             statement="s.appendleft(100)")
t2 = Timings(title="lists", setup="s = []", statement="s.insert(0,100)")

Timings.titles()
t1.run(100000)
t2.run(100000)
t1.run(200000)
t2.run(200000)
Пример #6
0
from timings import Timings

t1 = Timings(title = "using xrange", setup = "total = 0", 
                                     statement = "for x in xrange(100 * 1000 * 1000): total = total + x")
t2 = Timings(title = "using range",  setup = "total = 0", 
                                     statement = "for x in range(100 * 1000 * 1000): total = total + x")

Timings.titles()
t1.run(1)
t2.run(1)

class ApplesGame():
    def __init__(self, parameters: {}, session: tf.Session):
        width = parameters.get('width', 1000)
        height = parameters.get('height', 1000)

        self.session = session
        self.model_type = parameters.get('model_type', 3)
        self.width = parameters.get('width', 1000)
        self.height = parameters.get('height', 1000)
        self.n_apples = parameters.get('n_apples', 100)
        self.n_eyes = parameters.get('n_eyes', 9)
        self.stop = parameters.get('stop', 10000)
        self.speed = parameters.get('speed', 3)
        self.gamma = parameters.get('gamma', .9)
        self.action_buffer = parameters.get('action_buffer', 128)
        self.memory_size = parameters.get('memory_size', 1024 * 10)
        self.eye_length = parameters.get('eye_length', 200)
        self.sensor_levels = parameters.get('sensor_levels', 10)
        self.auto = parameters.get('auto', True)
        self.score_buffer = parameters.get('score_buffer', 10240)
        self.exploration = parameters.get('exploration', .10)
        self.label = parameters.get('label', 'no_label')

        self.repeat = False

        self.layer_size = parameters.get('layer_size')
        self.n_layers = parameters.get('n_layers')
        self.block_size = parameters.get('block_size', 128)
        self.learn_rate = parameters.get('learn_rate', 0.00001)
        self.finished = False
        self.res_blocks = parameters.get('res_blocks', 4)
        self.res_layers = parameters.get('res_layers', 3)
        self.dropout = parameters.get('dropout', False)
        self.layer_norm = parameters.get('layer_norm', False)
        self.tensorboard_dir = parameters.get('tensorboard_dir', "tf-logs")
        self.sensor_width = parameters.get('sensor_width', pi / 2)

        self.width = self.width
        self.height = self.height
        self.board_width = self.width
        self.board_height = self.height / 2
        self.timings = Timings()
        self.status = 'starting'
        self.snap()

        self.ship = Ship(XYPoint(self.board_width / 2, self.board_height / 2, ), 0, self.board_width, self.board_height,
                         self.n_eyes, self.sensor_levels, self.eye_length, self.sensor_width)
        self.red = 'red'
        self.green = 'green'
        self.up = False
        self.right = False
        self.left = False
        self.score = 0
        self.peak_score = 0
        self.sensor = SensorMap(self.ship)
        self.step = 0
        self.n_actions = 3

        self.model = self.build_model()
        self.policy = PolicyGradientOptimizer(self.model, self.sensor.size, 3, self.memory_size, self.action_buffer,
                                              self.gamma, self.exploration, self.timings)
        self.scores = collections.deque()
        self.reset_apples()

        self.adjust_score(0)
        self.sensor.update(self.apples.values())
        self.next_state = []

    def snap(self):
        import os
        snapdir = "snapshots/" + self.label
        print("snapdir is %s" % snapdir)
        os.system("mkdir -p " + snapdir )
        os.system("cp *.py %s" % snapdir)

    def reset_apples(self):
        self.apples = {}
        for i in range(self.n_apples):
            self.add_apple(True)
            self.add_apple(False)

    def build_model(self) -> QModel:
            return QModel(self.session,
                           self.label,
                           self.n_actions,
                           self.sensor.size,
                           self.layer_size,
                           self.dropout,
                           self.res_blocks,
                           self.res_layers,
                           self.n_layers,
                           self.layer_norm,
                           self.learn_rate,
                           self.tensorboard_dir)


    def add_apple(self, is_red: bool):
        xy = XYPoint(randrange(self.board_width), randrange(self.board_height))
        while (xy.x, xy.y) in self.apples:
            xy = XYPoint(randrange(self.board_width), randrange(self.board_height))

        apple = Apple(xy, 10, is_red)
        self.apples[(apple.xy.x, apple.xy.y)] = apple

    def update(self, dt):
        pass


    def manual_step(self):
        reward = 0
        action = -1
        if self.up:
            action = 0
        if self.right:
            action = 1
        if self.left:
            action = 2

        if not self.repeat:
            self.up = self.right = self.left = False

        if action != -1:
            self.take_action(action)
            state = self.sensor.as_input()
            action, next_state = self.policy.get_action(state)
            self.next_state = next_state

    def take_action(self, action: int) -> float:
        reward = 0
        if action == 0:
            reward += self.ship.move_forward()
        elif action == 1:
            reward += self.ship.move_right()
        elif action == 2:
            reward += self.ship.move_left()

        t = time.time()
        reward += self.check_for_collision()
        self.timings.add("check_for_collision", time.time() - t)

        t = time.time()
        self.sensor.update(self.apples.values())
        self.timings.add("ship.update_screen", time.time() - t)

        self.step += 1
        self.adjust_score(reward)
        self.model.add_score(self.score_delta(), self.step)
        self.move_random_apple()

        sd = self.score_delta()
        self.status = "score_delta %5.0f peak_score %4.0f memory_size %6d step %6d speed %3d" % (
            sd, self.peak_score, len(self.policy.memory), self.step, self.speed)
        return reward

    def move_random_apple(self):
        for apple in self.apples.values():
            if (random.randrange(0, 10000) == 4):
                del self.apples[(apple.xy.x, apple.xy.y)]
                self.add_apple(apple.ripe())

    def auto_step(self):
        state = self.sensor.as_input()

        # the policy used to return a random action if it was not ready but that
        # does not work well when recovering from a saved game.
        if self.policy.ready():
            action, next_state = self.policy.get_action(state)
        else:
            # randint() is *totally* broken.  things like this should return an element
            # where min <= x < max, i.e. in the range [min, max) not [min, max].  blech.
            action, next_state = random.randint(0, self.n_actions - 1), []

        self.next_state = next_state

        reward = self.take_action(action)

        self.policy.add_action(state, action, reward)
        self.policy.train(1, self.block_size)


    def adjust_score(self, amount: float):
        self.score += amount
        self.scores.appendleft(self.score)
        while len(self.scores) > self.score_buffer:
            self.scores.pop()
        sd = self.score_delta()

        if (sd > self.peak_score):
            self.peak_score = sd

        self.status = "score_delta %5.0f peak_score %4.0f score %6.0f, memory_size %6d step %6d speed %3d" % (
            sd, self.peak_score, self.score, len(self.policy.memory), self.step, self.speed)



    def check_for_collision(self) -> int:
        reward = 0
        for apple in self.apples.values():
            if self.ship.can_eat(apple):
                del self.apples[(apple.xy.x, apple.xy.y)]
                if apple.red:
                    reward += 1
                else:
                    reward -= 1
                self.add_apple(apple.ripe())
        return reward


    def score_delta(self) -> int:
        if len(self.scores) == 0:
            return self.score
        else:
            return self.score - self.scores[-1]

    def finish(self):
        self.model.close_session()
        score_delta = self.score_delta()
        self.status = "%s: final_score %6.0f score_delta %6.0f peak %6.0f step %6.0f" % (self.label, self.score, score_delta, self.peak_score, self.step)
        print(self.status)

        self.finished = True
        self.policy.timings.print()
        sys.stdout.flush()


    def run_no_window(self):
        i = 0
        start = time.time()
        for i in range(self.stop):
            self.auto_step()
            if (i > 0 and i % 1000 == 0):
                now = time.time()
                print("%6.2f %s" % (now - start, self.status))
                start = now


        self.finish()
Пример #8
0
from timings import Timings

t1 = Timings(title="attribute lookup",
             setup="from myprogram import Point; p = Point(15, 33)",
             statement="distance = p.x * p.y")
t2 = Timings(
    title="using locals",
    setup="from myprogram import Point; p = Point(15, 33); x = p.x; y = p.y",
    statement="distance = x * y")

Timings.titles()
t1.run(1000000)
t2.run(1000000)
t1.run(5000000)
t2.run(5000000)
def main():
    # this makes a simple atari pong game.  the -ram- means that we are seeing the 128 bytes of RAM in the
    # atari computer rather than the actual pixels.
    p = gym.make('Pong-ram-v0')
    p._max_episode_steps = 1000000

    # set up a few things:  We need to get the initial state so we can figure out it size and make the
    # neural network match it properly.
    initial_state = p.reset()
    state_size: int = initial_state.size
    n_actions: int = p.action_space.n

    # set up a few initial things about tensorflow.
    session = tf.Session()
    global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
    increment_global_step = global_step_tensor.assign_add(1)

    # find an empty place to store output for tensorboard (a web app to look at results.)
    run_counter = 0
    tensorboard_dir = "../tensorboard/pong-simple-%03d" % run_counter
    while os.path.exists(tensorboard_dir):
        run_counter += 1
        tensorboard_dir = "../tensorboard/pong-simple-%03d" % run_counter

    print("tensorboard_dir: %s" % tensorboard_dir)

    # build a simple model.
    model = QModel(session,
                   "pong-model",
                   n_actions=n_actions,
                   state_size=state_size,
                   layer_size=512,
                   dropout=True,
                   res_blocks=0,
                   res_layers=0,
                   layers=3,
                   layer_norm=True,
                   learn_rate=.00001,
                   tensorboard_dir=tensorboard_dir)

    # this is a little widget to keep track of how much time we spend doing things
    timings = Timings()
    # this is the thing that tracks the State-Action-Reward tuples and trains the network.
    policy_optimizer = PolicyGradientOptimizer(model=model,
                                               input_size=state_size,
                                               n_actions=n_actions,
                                               max_memory_size=1024 * 1024,
                                               action_buffer_size=102400,
                                               gamma=.999,
                                               exploration=.05,
                                               timings=timings)

    # this allows us to restart from wherever we left off.  These checkpoints can also be used by
    # "saved_pong.py" to watch how the game is currently playing.
    saver = tf.train.Saver()
    checkpoint_file = "../checkpoints/pong-simple/ckpt"
    saved_checkpoint_path = tf.train.latest_checkpoint(
        "../checkpoints/pong-simple/")
    from_saved_model = False
    if saved_checkpoint_path is not None:
        print("restoring from %s" % saved_checkpoint_path)
        saver.restore(session, saved_checkpoint_path)
        from_saved_model = True

    # this global_step tensor is really just a counter for the current step but I want to be able to
    # recover it after a restart so I store it and increment it within tensorflow.
    global_step = tf.train.global_step(session, global_step_tensor)
    training_started = True

    # this is the outermost loop.  it will never exit.  each new game will start at the top of this loop.
    while True:
        initial_state = p.reset()
        done = False
        summed_rewards = 0
        raw_score = 0
        steps = 0.0
        state = initial_state.reshape([
            1, 128
        ])  # this starts out as a vector and needs to be a [1x128] matrix.
        # this loop runs one game.  At the end of the game the step function returns done=True
        while not done:
            steps += 1.0

            # uncomment this line to watch the pong games play as it learns.  This will slow things down, a lot.
            # p.render()

            # if we have loaded a saved model or if we have begun training then we can use the model to choose
            # an action.  Otherwise, pick one at random.
            if from_saved_model or policy_optimizer.ready():
                action, _ = policy_optimizer.get_action(state)
            else:
                action = p.action_space.sample()

            new_state, reward, done, info = p.step(action)
            raw_score += reward
            # Using just the reward from the game wasn't very effective.  It did not learn to play well.
            # after thinking about it I considered a situation where the game plays a long point and loses at the end.
            # It may have properly returned the ball 10 or 20 times but those *all* get told that they messed up.
            # I decided that only the actions before missing the ball should get a negative reward and everything else
            # should be positive.  This worked great.  Since it takes about 50 frames (steps) for the ball to go from
            # the left edge to the right edge I put in a rule that if it loses the points the last 50 frames get a
            # negative reward and everything else gets +.5.
            #
            # Winning points get +1 for all frames.
            if reward < 0:
                count = 0
                for sar in policy_optimizer.action_buffer:
                    if (count < 50):
                        sar.add_reward(-1)
                    else:
                        sar.add_reward(.5)
                    policy_optimizer.add_to_memory(sar)
                    count += 1
                policy_optimizer.action_buffer.clear()
            elif reward > 0:
                policy_optimizer.add_action(state, action,
                                            reward + steps / 400)
                policy_optimizer.flush_action_buffer()
            elif reward == 0:
                policy_optimizer.add_action(state, action, reward)

            # This is where the magic happens.  randomly select 32 of the actions we've taken and
            # see what we can learn about them.
            policy_optimizer.train(1, 32)

            state = new_state.reshape([1, 128])
            summed_rewards += reward

        # increment the global step and save our progress after each complete game (not point).
        session.run(increment_global_step)
        global_step = tf.train.global_step(session, global_step_tensor)
        print(
            "iteration %6d steps %6d, training_score: % 6.2f raw_score % 6.2f"
            % (global_step, steps, summed_rewards, raw_score))
        saver.save(session, checkpoint_file, global_step=global_step)

        # this logs things to tensorboard
        model.add_score(steps, global_step)
        # if we are a multiple of 10 steps print out the timings.  I know what these are by now
        # but it's interesting to see.
        if ((global_step + 1) % 10 == 0):
            timings.print()
            timings.reset()
from timings import Timings

t1 = Timings(title="+=",
             setup="s = ''",
             statement="for i in range(100000000): s += 'a'")
t2 = Timings(title="array",
             setup="import array",
             statement="s = ''.join(array.array('c', ['a']*100000000))")
t3 = Timings(title="comprehensions",
             setup="pass",
             statement="s = ''.join(['a' for n in range(100000000)])")

Timings.titles()
t1.run(1)
t2.run(1)
t3.run(1)
from timings import Timings


def listComprehension(n):
    sum = 0
    X = [n * n for n in range(n)]
    for x in X:
        sum += x


def generatorExpression(n):
    sum = 0
    Y = (n * n for n in range(n))
    for y in Y:
        sum += y


t1 = Timings(title="lists",
             setup="from __main__ import listComprehension",
             statement="listComprehension(100000000)")

t2 = Timings(title="generators",
             setup="from __main__ import generatorExpression",
             statement="generatorExpression(100000000)")

Timings.titles()
t1.run(1)
t2.run(1)
Пример #12
0
from timings import Timings

t1 = Timings(title = "import", setup = "import math", 
                               statement = "math.sqrt(50.0)")
t2 = Timings(title = "from",   setup = "from math import sqrt", 
                               statement = "sqrt(50.0)")

Timings.titles()
t1.run(100000)
t2.run(100000)
t1.run(500000)
t2.run(500000)