def foo(): with region('a'): for i in iter_proxy([1, 2, 3], 'iter'): with region('b'): pass with region('b'): pass
def foo(): time.sleep(0.02) with region("a"): time.sleep(0.02) for i in iter_proxy(slow_iter([0.1, 0.2, 0.3]), "iter"): with region("b"): time.sleep(i)
def foo(): with region("a"): for i in iter_proxy([1, 2, 3], "iter"): with region("b"): pass with region("b"): pass
def foo(): time.sleep(0.02) with region('a'): time.sleep(0.02) for i in iter_proxy(slow_iter([0.1, 0.2, 0.3]), 'iter'): with region('b'): time.sleep(i)
def call(self, x): with rp.region('NN', asglobal=True): with rp.region('layer 1'): x = self.layer1(x) with rp.region('layer 2'): x = self.layer2(x) with rp.region('out layer'): x = self.out_layer(x) return x
def bar(): with region('init'): # measure execution time of the next block a = np.arange(1000000) print('A initialized') with region('init'): # Join region b = a.copy('') with region('loop'): for x in iter_proxy(slow_iter([1, 2, 3, 4]), 'iter'): # measure time to retrieve next element a += x with region(): # autoname region return np.sum(a * b)
def test_with_fake_timer(monkeypatch, use_cython): """Integration test with a fake timer. """ reporter = SilentReporter( [cols.name, cols.total_us, cols.total_inner_us, cols.count]) mock_clock = mock.Mock() mock_clock.side_effect = list(range(0, 100, 1)) @func() def foo(): with region('a'): for i in iter_proxy([1, 2, 3], 'iter'): with region('b'): pass with region('b'): pass with fresh_region_profiler(monkeypatch): install_profiler(reporter=reporter, timer_cls=lambda: get_timer_cls(use_cython) (mock_clock)) foo() with region('x'): pass foo() expected = [['name', 'total_us', 'total_inner_us', 'count'], [RegionProfiler.ROOT_NODE_NAME, '54000000', '5000000', '1'], ['foo()', '48000000', '4000000', '2'], ['a', '44000000', '26000000', '2'], ['b', '12000000', '12000000', '12'], ['iter', '6000000', '6000000', '6'], ['x', '1000000', '1000000', '1']] assert reporter.rows == expected
def _getitem(buckets, key): with rp.region("_getitem()", asglobal=True): _, bucket = PMap._get_bucket(buckets, key) if bucket: for k, v in bucket: if k == key: return v raise KeyError(key)
def _contains(buckets, key): with rp.region("_contains()", asglobal=True): _, bucket = PMap._get_bucket(buckets, key) if bucket: for k, _ in bucket: if k == key: return True return False return False
def test_with_real_timer(monkeypatch, use_cython): """Integration test with a real timer.""" reporter = SilentReporter( [cols.name, cols.total_us, cols.total_inner_us, cols.count]) def slow_iter(iterable): for x in iterable: time.sleep(0.1) yield x @func() def foo(): time.sleep(0.02) with region("a"): time.sleep(0.02) for i in iter_proxy(slow_iter([0.1, 0.2, 0.3]), "iter"): with region("b"): time.sleep(i) with fresh_region_profiler(monkeypatch): install_profiler(reporter) foo() with region("x"): time.sleep(0.5) foo() expected: List[List[Any]] = [ [RegionProfiler.ROOT_NODE_NAME, 2380000, 0, "1"], ["foo()", 1880000, 40000, "2"], ["a", 1840000, 40000, "2"], ["b", 1200000, 1200000, "6"], ["iter", 600000, 600000, "6"], ["x", 500000, 500000, "1"], ] # (fresh_region_profiler calls dump_profiler) rows = reporter.rows[1:] # type: ignore[index] lower = 0.99 upper = 1.03 upper_delta = 5000 assert len(rows) == len(expected) print(rows) for i, (r, e) in enumerate(zip(rows, expected)): assert r[0] == e[0] assert r[3] == e[3] if i == 0: assert int(r[1]) > e[1] else: assert e[1] * lower <= int(r[1]) <= e[1] * upper + upper_delta assert e[2] * lower <= int(r[2]) <= e[2] * upper + upper_delta
def test_with_global_regions(monkeypatch, use_cython): """Integration test with regions marked as globals.""" reporter = SilentReporter( [cols.name, cols.total_us, cols.total_inner_us, cols.count]) mock_clock = mock.Mock() mock_clock.side_effect = list(range(0, 100, 1)) @func(asglobal=True) def bar(): with region("a"): with region("bar_global", asglobal=True): for i in iter_proxy([1, 2, 3], "iter", asglobal=True): pass @func() def foo(): with region("a"): for i in iter_proxy([1, 2, 3], "iter"): with region("b"): pass with region("b"): pass bar() with fresh_region_profiler(monkeypatch): install_profiler(reporter=reporter, timer_cls=lambda: get_timer_cls(use_cython) (mock_clock)) foo() with region("x"): pass foo() expected = [ ["name", "total_us", "total_inner_us", "count"], [RegionProfiler.ROOT_NODE_NAME, "84000000", "0", "1"], ["foo()", "78000000", "4000000", "2"], ["a", "74000000", "56000000", "2"], ["b", "12000000", "12000000", "12"], ["iter", "6000000", "6000000", "6"], ["bar()", "28000000", "4000000", "2"], ["a", "24000000", "24000000", "2"], ["bar_global", "20000000", "20000000", "2"], ["iter", "6000000", "6000000", "6"], ["x", "1000000", "1000000", "1"], ] assert reporter.rows == expected
def test_with_real_timer(monkeypatch, use_cython): """Integration test with a real timer. """ reporter = SilentReporter( [cols.name, cols.total_us, cols.total_inner_us, cols.count]) def slow_iter(iterable): for x in iterable: time.sleep(0.1) yield x @func() def foo(): time.sleep(0.02) with region('a'): time.sleep(0.02) for i in iter_proxy(slow_iter([0.1, 0.2, 0.3]), 'iter'): with region('b'): time.sleep(i) with fresh_region_profiler(monkeypatch): install_profiler(reporter) foo() with region('x'): time.sleep(0.5) foo() expected = [[RegionProfiler.ROOT_NODE_NAME, 2380000, 0, '1'], ['foo()', 1880000, 40000, '2'], ['a', 1840000, 40000, '2'], ['b', 1200000, 1200000, '6'], ['iter', 600000, 600000, '6'], ['x', 500000, 500000, '1']] rows = reporter.rows[1:] lower = 0.99 upper = 1.03 upper_delta = 5000 assert len(rows) == len(expected) print(rows) for i, (r, e) in enumerate(zip(rows, expected)): assert r[0] == e[0] assert r[3] == e[3] if i == 0: assert int(r[1]) > e[1] else: assert e[1] * lower <= int(r[1]) <= e[1] * upper + upper_delta assert e[2] * lower <= int(r[2]) <= e[2] * upper + upper_delta
def create(cls, kwargs, _factory_fields=None, ignore_extra=False): """ Factory method. Will create a new PRecord of the current type and assign the values specified in kwargs. :param ignore_extra: A boolean which when set to True will ignore any keys which appear in kwargs that are not in the set of fields on the PRecord. """ with rp.region("create()"): if isinstance(kwargs, cls): return kwargs if ignore_extra: kwargs = { k: kwargs[k] for k in cls._precord_fields if k in kwargs } return cls(_factory_fields=_factory_fields, _ignore_extra=ignore_extra, **kwargs)
def main(p): reps = 30 loop_reps = 100 recursion_depth = 100 for _ in range(reps): for _ in range(loop_reps): with rp.region('a'): x = fact(recursion_depth) for _ in range(loop_reps): with rp.region('b1'): with rp.region('b2'): x = fact(recursion_depth) for _ in range(loop_reps): with rp.region('c1'): with rp.region('c2'): with rp.region('c3'): x = fact(recursion_depth) for _ in range(loop_reps): x = timed_fact(recursion_depth) nodes = p.root.children inner_a = nodes['a'].stats.total / reps # F + r * loop_reps inner_b1 = nodes['b1'].stats.total / reps # F + r * loop_reps * 2 inner_b2 = nodes['b1'].children['b2'].stats.total / reps # F + r * loop_reps inner_c1 = nodes['c1'].stats.total / reps # F + r * loop_reps * 3 inner_c2 = nodes['c1'].children['c2'].stats.total / reps # F + r * loop_reps * 2 inner_c3 = nodes['c1'].children['c2'].children['c3'].stats.total / reps # F + r * loop_reps func = nodes['timed_fact()'].stats.total / reps # F + r * loop_reps * recursion stats = SeqStats() stats.add((inner_b1 - inner_b2) / loop_reps) stats.add((inner_b1 - inner_a) / loop_reps) stats.add((inner_c2 - inner_c3) / loop_reps) stats.add((inner_c1 - inner_c2) / loop_reps) stats.add((func - inner_a) / (loop_reps * (recursion_depth - 1))) print('Region overhead:\n\t{} .. {} .. {}'. format(pretty_print_time(stats.min), pretty_print_time(stats.avg), pretty_print_time(stats.max)))
def foo(): with region(): for i in iter_proxy([1, 2, 3]): pass
def main(): mnist = fetch_mnist() # Using TF Dataset to split data into batches dataset = tf.data.Dataset.from_tensor_slices( (mnist.train.images, mnist.train.labels)) dataset = dataset.repeat().batch(batch_size).prefetch(batch_size) dataset_iter = tfe.Iterator(dataset) # Create NN neural_net = NeuralNet() # SGD Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # Compute gradients grad = tfe.implicit_gradients(loss_fn) # Training average_loss = 0. average_acc = 0. with rp.region('train'): for step in range(num_steps): # Iterate through the dataset with rp.region('fetch_next'): d = dataset_iter.next() # Images x_batch = d[0] # Labels y_batch = tf.cast(d[1], dtype=tf.int64) with rp.region('forward'): # Compute the batch loss batch_loss = loss_fn(neural_net, x_batch, y_batch) average_loss += batch_loss # Compute the batch accuracy batch_accuracy = accuracy_fn(neural_net, x_batch, y_batch) average_acc += batch_accuracy if step == 0: # Display the initial cost, before optimizing print("Initial loss= {:.9f}".format(average_loss)) with rp.region('backward'): # Update the variables following gradients info optimizer.apply_gradients(grad(neural_net, x_batch, y_batch)) # Display info if (step + 1) % display_step == 0 or step == 0: if step > 0: average_loss /= display_step average_acc /= display_step print("Step:", '%04d' % (step + 1), " loss=", "{:.9f}".format(average_loss), " accuracy=", "{:.4f}".format(average_acc)) average_loss = 0. average_acc = 0. # Evaluate model on the test image set testX = mnist.test.images testY = mnist.test.labels with rp.region('test'): test_acc = accuracy_fn(neural_net, testX, testY) print("Testset Accuracy: {:.4f}".format(test_acc))
def bar(): with region("a"): with region("bar_global", asglobal=True): for i in iter_proxy([1, 2, 3], "iter", asglobal=True): pass
def foo(n): with rp.region(n): with rp.region('sleep'): sleep(0.5) with rp.region('static_loop'): for _ in range(1000): with rp.region('outer'): with rp.region('inner'): pass with rp.region('dynamic_loop'): for i in range(1000): with rp.region('outer'): with rp.region('inner' + str(i % 10)): pass with rp.region('static_loop'): for _ in range(1000): with rp.region('outer'): with rp.region('inner'): pass
def _get_bucket(buckets, key): with rp.region("_get_bucket()"): index = hash(key) % len(buckets) bucket = buckets[index] return index, bucket
def bar(): with region('a'): with region('bar_global', asglobal=True): for i in iter_proxy([1, 2, 3], 'iter', asglobal=True): pass
def demo_global_context_mgr(): with rp.region(func_name()): time.sleep(1)