Exemplo n.º 1
0
    def test_composite_reader_builder(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        num_srcs = 3
        names = ["src_{}".format(i) for i in range(num_srcs)]
        size = 100
        offsets = [i * size for i in range(num_srcs)]
        src_ds_builders = [
            TestReaderBuilder(offset=offset, size=size, name=name)
            for (name, offset) in zip(names, offsets)
        ]

        # Make an identically-sized empty destnation dataset
        dst_ds_schema = schema.Struct(
            *[(name, src_ds_builder.schema())
              for name, src_ds_builder in zip(names, src_ds_builders)])
        dst_ds = make_destination_dataset(ws, dst_ds_schema)

        with TaskGroup() as tg:
            reader_builder = CompositeReaderBuilder(names, src_ds_builders)
            reader_builder.setup(ws=ws)
            pipe(reader_builder.new_reader(),
                 dst_ds.writer(),
                 num_runtime_threads=3)
        session.run(tg)

        for name, offset in zip(names, offsets):
            written_data = sorted(
                ws.fetch_blob(str(dst_ds.content()[name].label())))
            npt.assert_array_equal(range(offset, offset + size), written_data,
                                   "name: {}".format(name))
Exemplo n.º 2
0
    def test_composite_reader_builder(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        num_srcs = 3
        names = ["src_{}".format(i) for i in range(num_srcs)]
        size = 100
        offsets = [i * size for i in range(num_srcs)]
        src_ds_builders = [
            TestReaderBuilder(offset=offset, size=size, name=name)
            for (name, offset) in zip(names, offsets)
        ]

        # Make an identically-sized empty destnation dataset
        dst_ds_schema = schema.Struct(
            *[
                (name, src_ds_builder.schema())
                for name, src_ds_builder in zip(names, src_ds_builders)
            ]
        )
        dst_ds = make_destination_dataset(ws, dst_ds_schema)

        with TaskGroup() as tg:
            reader_builder = CompositeReaderBuilder(
                names, src_ds_builders)
            reader_builder.setup(ws=ws)
            pipe(reader_builder.new_reader(), dst_ds.writer(),
                 num_runtime_threads=3)
        session.run(tg)

        for name, offset in zip(names, offsets):
            written_data = sorted(
                ws.fetch_blob(str(dst_ds.content()[name].label())))
            npt.assert_array_equal(range(offset, offset + size), written_data,
                                   "name: {}".format(name))
Exemplo n.º 3
0
    def test_composite_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        num_srcs = 3
        names = ["src_{}".format(i) for i in range(num_srcs)]
        size = 100
        offsets = [i * size for i in range(num_srcs)]
        src_dses = [make_source_dataset(ws, offset=offset, size=size, name=name)
                    for (name, offset) in zip(names, offsets)]

        data = [ws.fetch_blob(str(src.field_blobs[0])) for src in src_dses]
        # Sanity check we didn't overwrite anything
        for d, offset in zip(data, offsets):
            npt.assert_array_equal(d, range(offset, offset + size))

        # Make an identically-sized empty destnation dataset
        dst_ds_schema = schema.Struct(
            *[
                (name, src_ds.content().clone_schema())
                for name, src_ds in zip(names, src_dses)
            ]
        )
        dst_ds = make_destination_dataset(ws, dst_ds_schema)

        with TaskGroup() as tg:
            reader = CompositeReader(names,
                                     [src_ds.reader() for src_ds in src_dses])
            pipe(reader, dst_ds.writer(), num_runtime_threads=3)
        session.run(tg)

        for i in range(num_srcs):
            written_data = sorted(
                ws.fetch_blob(str(dst_ds.content()[names[i]].label())))
            npt.assert_array_equal(data[i], written_data, "i: {}".format(i))
Exemplo n.º 4
0
    def test_composite_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        num_srcs = 3
        names = ["src_{}".format(i) for i in range(num_srcs)]
        size = 100
        offsets = [i * size for i in range(num_srcs)]
        src_dses = [
            make_source_dataset(ws, offset=offset, size=size, name=name)
            for (name, offset) in zip(names, offsets)
        ]

        data = [ws.fetch_blob(str(src.field_blobs[0])) for src in src_dses]
        # Sanity check we didn't overwrite anything
        for d, offset in zip(data, offsets):
            npt.assert_array_equal(d, range(offset, offset + size))

        # Make an identically-sized empty destnation dataset
        dst_ds_schema = schema.Struct(
            *[(name, src_ds.content().clone_schema())
              for name, src_ds in zip(names, src_dses)])
        dst_ds = make_destination_dataset(ws, dst_ds_schema)

        with TaskGroup() as tg:
            reader = CompositeReader(names,
                                     [src_ds.reader() for src_ds in src_dses])
            pipe(reader, dst_ds.writer(), num_runtime_threads=3)
        session.run(tg)

        for i in range(num_srcs):
            written_data = sorted(
                ws.fetch_blob(str(dst_ds.content()[names[i]].label())))
            npt.assert_array_equal(data[i], written_data, "i: {}".format(i))
Exemplo n.º 5
0
    def test_reader_with_limit(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)

        """ 1. feed full dataset """
        src_init = core.Net('src_init')
        src_values = Struct(('label', np.array(range(100))))
        src_blobs = NewRecord(src_init, src_values)
        src_ds = Dataset(src_blobs)
        FeedRecord(src_blobs, src_values, ws)
        ws.run(src_init)

        """ 2. Read with limit smaller than size of dataset """
        dst_init = core.Net('dst_init')
        dst_ds = Dataset(src_values.clone_schema())
        dst_ds.init_empty(dst_init)
        ws.run(dst_init)

        with TaskGroup() as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=10)
            pipe(reader, dst_ds.writer(), num_threads=8)
        session.run(tg)
        self.assertFalse(ws.blobs[str(reader.data_finished())].fetch())
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(10))

        """ 3. Read with limit larger than size of dataset """
        ws.run(dst_init)
        with TaskGroup() as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=110)
            pipe(reader, dst_ds.writer(), num_threads=8)
        session.run(tg)
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(100))
        self.assertTrue(ws.blobs[str(reader.data_finished())].fetch())
Exemplo n.º 6
0
    def test_dequeue_many(self):
        init_net = core.Net('init')
        N = 17
        NUM_DEQUEUE_RECORDS = 3
        src_values = Struct(
            ('uid', np.array(range(N))),
            ('value', 0.1 * np.array(range(N))))
        expected_dst = Struct(
            ('uid', 2 * np.array(range(N))),
            ('value', np.array(N * [0.0])))

        with core.NameScope('init'):
            src_blobs = NewRecord(init_net, src_values)
            dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema())
            counter = init_net.Const(0)
            ONE = init_net.Const(1)

        def proc1(rec):
            with core.NameScope('proc1'):
                out = NewRecord(ops, rec)
            ops.Add([rec.uid(), rec.uid()], [out.uid()])
            out.value.set(blob=rec.value(), unsafe=True)
            return out

        def proc2(rec):
            with core.NameScope('proc2'):
                out = NewRecord(ops, rec)
            out.uid.set(blob=rec.uid(), unsafe=True)
            ops.Sub([rec.value(), rec.value()], [out.value()])
            ops.Add([counter, ONE], [counter])
            return out

        src_ds = Dataset(src_blobs)
        dst_ds = Dataset(dst_blobs)

        with TaskGroup() as tg:
            out1 = pipe(
                src_ds.reader(),
                output=Queue(
                    capacity=11, num_dequeue_records=NUM_DEQUEUE_RECORDS),
                processor=proc1)
            out2 = pipe(out1, processor=proc2)
            pipe(out2, dst_ds.writer())

        ws = workspace.C.Workspace()
        FeedRecord(src_blobs, src_values, ws)
        session = LocalSession(ws)
        session.run(init_net)
        session.run(tg)
        output = FetchRecord(dst_blobs, ws=ws)
        num_dequeues = ws.blobs[str(counter)].fetch()

        self.assertEquals(
            num_dequeues, int(math.ceil(float(N) / NUM_DEQUEUE_RECORDS)))

        for a, b in zip(output.field_blobs(), expected_dst.field_blobs()):
            np.testing.assert_array_equal(a, b)
Exemplo n.º 7
0
    def test_reader_with_limit(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)

        """ 1. feed full dataset """
        src_ds = init_dataset(ws)

        """ 2. Read with limit smaller than size of dataset """
        dst_init = core.Net('dst_init')
        with core.NameScope('dst'):
            dst_ds = Dataset(src_ds.content().clone_schema())
            dst_ds.init_empty(dst_init)
        ws.run(dst_init)

        # WorkspaceType.GLOBAL is required because we are fetching
        # reader.data_finished() after the TaskGroup finishes.
        with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=10)
            pipe(reader, dst_ds.writer(), num_threads=8)
        session.run(tg)

        self.assertFalse(ws.blobs[str(reader.data_finished())].fetch())
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()),
            list(range(10))
        )

        """ 3. Read with limit larger than size of dataset """
        ws.run(dst_init)
        with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=110)
            pipe(reader, dst_ds.writer(), num_runtime_threads=8)
        session.run(tg)
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()),
            list(range(100))
        )
        self.assertTrue(ws.blobs[str(reader.data_finished())].fetch())

        """ 4. Read without counter """
        ws.run(dst_init)
        with TaskGroup(workspace_type=WorkspaceType.GLOBAL) as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=None)
            pipe(reader, dst_ds.writer(), num_threads=8)
        session.run(tg)
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()),
            list(range(100))
        )
        self.assertTrue(ws.blobs[str(reader.data_finished())].fetch())

        """ 5. Read using the same reader without resetting workspace """
        session.run(tg)
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()),
            sorted(list(range(100)) * 2)
        )
Exemplo n.º 8
0
    def test_load_model_from_checkpoints(self):
        try:
            tmpdir = tempfile.mkdtemp()

            for node_id in range(3):
                ws = workspace.C.Workspace()
                session = LocalSession(ws)
                checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
                with Job() as job:
                    build_pipeline(node_id)
                compiled_job = job.compile(LocalSession)
                job_runner = JobRunner(compiled_job, checkpoint)
                num_epochs = job_runner(session)
                self.assertEquals(num_epochs, len(EXPECTED_TOTALS))

                # There are 12 global blobs after finishing up the job runner.
                # (only blobs on init_group are checkpointed)
                self.assertEquals(len(ws.blobs), 12)

            ws = workspace.C.Workspace()
            session = LocalSession(ws)
            self.assertEquals(len(ws.blobs), 0)
            model_blob_names = [
                'reader:1/task/GivenTensorInt64Fill:0',
                'reader:2/task/GivenTensorInt64Fill:0'
            ]
            checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
            with Job() as job:
                for node_id in range(3):
                    build_pipeline(node_id)
            compiled_job = job.compile(LocalSession)
            job_runner = JobRunner(compiled_job, checkpoint)
            job_runner.load_blobs_from_checkpoints(blob_names=model_blob_names,
                                                   epoch=1,
                                                   session=session)

            # Check that we can successfully load from checkpoints of epochs
            # 1 to 4, but not epoch 5.
            for epoch in range(1, 5):
                self.assertTrue(
                    job_runner.load_blobs_from_checkpoints(
                        blob_names=model_blob_names,
                        epoch=epoch,
                        session=session))
                # Check that all the model blobs are loaded.
                for blob_name in model_blob_names:
                    self.assertTrue(ws.has_blob(blob_name))
                    self.assertEquals(ws.fetch_blob(blob_name),
                                      np.array([EXPECTED_TOTALS[epoch - 1]]))
            self.assertFalse(
                job_runner.load_blobs_from_checkpoints(
                    blob_names=model_blob_names, epoch=5, session=session))

        finally:
            shutil.rmtree(tmpdir)
Exemplo n.º 9
0
    def test_runtime_threads(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        src_ds = init_dataset(ws)
        totals = [None] * 3

        def proc(rec):
            # executed once
            with ops.task_init():
                counter1 = ops.CreateCounter([], ['global_counter'])
                counter2 = ops.CreateCounter([], ['global_counter2'])
                counter3 = ops.CreateCounter([], ['global_counter3'])
            # executed once per thread
            with ops.task_instance_init():
                task_counter = ops.CreateCounter([], ['task_counter'])
            # executed on each iteration
            ops.CountUp(counter1)
            ops.CountUp(task_counter)
            # executed once per thread
            with ops.task_instance_exit():
                with ops.loop(ops.RetrieveCount(task_counter)):
                    ops.CountUp(counter2)
                ops.CountUp(counter3)
            # executed once
            with ops.task_exit():
                totals[0] = final_output(ops.RetrieveCount(counter1))
                totals[1] = final_output(ops.RetrieveCount(counter2))
                totals[2] = final_output(ops.RetrieveCount(counter3))
            return rec

        # Read full data set from original reader
        with TaskGroup() as tg:
            pipe(src_ds.reader(), num_runtime_threads=8, processor=proc)
        session.run(tg)
        self.assertEqual(totals[0].fetch(), 100)
        self.assertEqual(totals[1].fetch(), 100)
        self.assertEqual(totals[2].fetch(), 8)

        # Read with a count-limited reader
        with TaskGroup() as tg:
            q1 = pipe(src_ds.reader(), num_runtime_threads=2)
            q2 = pipe(
                ReaderWithLimit(q1.reader(), num_iter=25),
                num_runtime_threads=3)
            pipe(q2, processor=proc, num_runtime_threads=6)
        session.run(tg)
        self.assertEqual(totals[0].fetch(), 25)
        self.assertEqual(totals[1].fetch(), 25)
        self.assertEqual(totals[2].fetch(), 6)
Exemplo n.º 10
0
    def test_reuse_checkpoint_manager(self):
        """
        A simple test that ensures we can reuse a MultiNodeCheckpointManager
        object.
        """
        try:
            tmpdir = tempfile.mkdtemp()
            ws = workspace.C.Workspace()
            session = LocalSession(ws)
            checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')

            with Job() as job:
                outputs = build_pipeline(node_id=0)
            output_fetcher = Task(step=core.Net('empty'), outputs=outputs)
            compiled_job = job.compile(LocalSession)

            def fetch_total(session):
                session.run(output_fetcher)
                return output_fetcher.outputs()[0].fetch()

            num_epochs = JobRunner(compiled_job, checkpoint)(session)
            for initial_epoch in range(1, num_epochs + 1):
                JobRunner(
                    compiled_job,
                    checkpoint, resume_from_epoch=initial_epoch)(session)
                self.assertEquals(fetch_total(session), EXPECTED_TOTALS[-1])

        finally:
            shutil.rmtree(tmpdir)
Exemplo n.º 11
0
    def test_local_session(self):
        init_net = core.Net('init')
        src_values = Struct(
            ('uid', np.array([1, 2, 6])),
            ('value', np.array([1.4, 1.6, 1.7])))
        expected_dst = Struct(
            ('uid', np.array([2, 4, 12])),
            ('value', np.array([0.0, 0.0, 0.0])))

        with core.NameScope('init'):
            src_blobs = NewRecord(init_net, src_values)
            dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema())

        def proc1(rec):
            net = core.Net('proc1')
            with core.NameScope('proc1'):
                out = NewRecord(net, rec)
            net.Add([rec.uid(), rec.uid()], [out.uid()])
            out.value.set(blob=rec.value(), unsafe=True)
            return [net], out

        def proc2(rec):
            net = core.Net('proc2')
            with core.NameScope('proc2'):
                out = NewRecord(net, rec)
            out.uid.set(blob=rec.uid(), unsafe=True)
            net.Sub([rec.value(), rec.value()], [out.value()])
            return [net], out

        src_ds = Dataset(src_blobs)
        dst_ds = Dataset(dst_blobs)

        with TaskGroup() as tg:
            out1 = pipe(src_ds.reader(), processor=proc1)
            out2 = pipe(out1, processor=proc2)
            pipe(out2, dst_ds.writer())

        ws = workspace.C.Workspace()
        FeedRecord(src_blobs, src_values, ws)
        session = LocalSession(ws)
        session.run(init_net)
        session.run(tg)
        output = FetchRecord(dst_blobs, ws=ws)

        for a, b in zip(output.field_blobs(), expected_dst.field_blobs()):
            np.testing.assert_array_equal(a, b)
Exemplo n.º 12
0
 def test_loops(self):
     with Task() as task:
         out_actual = self._actual_loop()
     with LocalSession() as session:
         session.run(task)
         expected = self._expected_loop()
         actual = [o.fetch() for o in out_actual]
         for e, a in zip(expected, actual):
             self.assertEquals(e, a)
Exemplo n.º 13
0
    def test_load_model_from_checkpoints(self):
        try:
            tmpdir = tempfile.mkdtemp()

            for node_id in range(3):
                ws = workspace.C.Workspace()
                session = LocalSession(ws)
                checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
                with Job() as job:
                    build_pipeline(node_id)
                compiled_job = job.compile(LocalSession)
                job_runner = JobRunner(compiled_job, checkpoint)
                num_epochs = job_runner(session)
                self.assertEquals(num_epochs, len(EXPECTED_TOTALS))

                # There are 16 blobs after finishing up the job runner.
                self.assertEquals(len(ws.blobs), 16)

            ws = workspace.C.Workspace()
            session = LocalSession(ws)
            self.assertEquals(len(ws.blobs), 0)
            model_blob_names = [
                'reader:1/task/GivenTensorInt64Fill:0',
                'reader:2/task/GivenTensorInt64Fill:0'
            ]
            checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
            with Job() as job:
                for node_id in range(3):
                    build_pipeline(node_id)
            compiled_job = job.compile(LocalSession)
            job_runner = JobRunner(compiled_job, checkpoint)
            job_runner.load_blobs_from_checkpoints(blob_names=model_blob_names,
                                                   epoch=1,
                                                   session=session)
            # In addition to the two model blobs, we also have 3 output blobs
            # and one runnable blob. So there are 6 blobs in total.
            self.assertEquals(len(ws.blobs), 6)
            # Check that all the model blobs are loaded.
            for blob_name in model_blob_names:
                self.assertTrue(ws.has_blob(blob_name))
                self.assertEquals(ws.fetch_blob(blob_name), np.array([103]))

        finally:
            shutil.rmtree(tmpdir)
Exemplo n.º 14
0
 def test_net_multi_use(self):
     with Task() as task:
         total = ops.Const(0)
         net = Net('my_net')
         net.Add([total, net.Const(1)], [total])
         ops.net(net)
         ops.net(net)
         result = final_output(total)
     with LocalSession() as session:
         session.run(task)
         self.assertEquals(2, result.fetch())
Exemplo n.º 15
0
    def _test_limit_reader_init_shared(self, size):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)

        # Make source dataset
        src_ds = make_source_dataset(ws, size=size)

        # Make an identically-sized empty destination Dataset
        dst_ds = make_destination_dataset(ws, src_ds.content().clone_schema())

        return ws, session, src_ds, dst_ds
Exemplo n.º 16
0
    def test_reader_with_limit(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        """ 1. feed full dataset """
        src_init = core.Net('src_init')
        src_values = Struct(('label', np.array(range(100))))
        src_blobs = NewRecord(src_init, src_values)
        src_ds = Dataset(src_blobs)
        FeedRecord(src_blobs, src_values, ws)
        ws.run(src_init)
        """ 2. Read with limit smaller than size of dataset """
        dst_init = core.Net('dst_init')
        dst_ds = Dataset(src_values.clone_schema())
        dst_ds.init_empty(dst_init)
        ws.run(dst_init)

        with TaskGroup() as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=10)
            pipe(reader, dst_ds.writer(), num_threads=8)
        session.run(tg)
        self.assertFalse(ws.blobs[str(reader.data_finished())].fetch())
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()), range(10))
        """ 3. Read with limit larger than size of dataset """
        ws.run(dst_init)
        with TaskGroup() as tg:
            reader = ReaderWithLimit(src_ds.reader(), num_iter=110)
            pipe(reader, dst_ds.writer(), num_threads=8)
        session.run(tg)
        self.assertEquals(
            sorted(ws.blobs[str(dst_ds.content().label())].fetch()),
            range(100))
        self.assertTrue(ws.blobs[str(reader.data_finished())].fetch())
Exemplo n.º 17
0
    def test_db_file_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        db_path = self._make_temp_path()

        # Build a cache DB file.
        cached_reader = CachedReader(
            self._build_source_reader(ws, 100),
            db_path=db_path,
            db_type='LevelDB',
        )
        build_cache_step = cached_reader.build_cache_step()
        session.run(build_cache_step)

        # Read data from cache DB file.
        db_file_reader = DBFileReader(
            db_path=db_path,
            db_type='LevelDB',
        )
        data = self._read_all_data(ws, db_file_reader, session)
        self.assertEqual(sorted(data), list(range(100)))

        self._delete_path(db_path)
Exemplo n.º 18
0
    def test_dequeue_many(self):
        init_net = core.Net('init')
        N = 17
        NUM_DEQUEUE_RECORDS = 3
        src_values = Struct(
            ('uid', np.array(range(N))),
            ('value', 0.1 * np.array(range(N))))
        expected_dst = Struct(
            ('uid', 2 * np.array(range(N))),
            ('value', np.array(N * [0.0])))

        with core.NameScope('init'):
            src_blobs = NewRecord(init_net, src_values)
            dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema())
            counter = init_net.Const(0)
            ONE = init_net.Const(1)

        def proc1(rec):
            with core.NameScope('proc1'):
                out = NewRecord(ops, rec)
            ops.Add([rec.uid(), rec.uid()], [out.uid()])
            out.value.set(blob=rec.value(), unsafe=True)
            return out

        def proc2(rec):
            with core.NameScope('proc2'):
                out = NewRecord(ops, rec)
            out.uid.set(blob=rec.uid(), unsafe=True)
            ops.Sub([rec.value(), rec.value()], [out.value()])
            ops.Add([counter, ONE], [counter])
            return out

        src_ds = Dataset(src_blobs)
        dst_ds = Dataset(dst_blobs)

        with TaskGroup() as tg:
            out1 = pipe(
                src_ds.reader(),
                output=Queue(
                    capacity=11, num_dequeue_records=NUM_DEQUEUE_RECORDS),
                processor=proc1)
            out2 = pipe(out1, processor=proc2)
            pipe(out2, dst_ds.writer())

        ws = workspace.C.Workspace()
        FeedRecord(src_blobs, src_values, ws)
        session = LocalSession(ws)
        session.run(init_net)
        session.run(tg)
        output = FetchRecord(dst_blobs, ws=ws)
        num_dequeues = ws.blobs[str(counter)].fetch()

        self.assertEquals(
            num_dequeues, int(math.ceil(float(N) / NUM_DEQUEUE_RECORDS)))

        for a, b in zip(output.field_blobs(), expected_dst.field_blobs()):
            np.testing.assert_array_equal(a, b)
Exemplo n.º 19
0
    def test_db_file_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        db_path = self._make_temp_path()

        # Build a cache DB file.
        cached_reader = CachedReader(
            self._build_source_reader(ws, 100),
            db_path=db_path,
            db_type='LevelDB',
        )
        build_cache_step = cached_reader.build_cache_step()
        session.run(build_cache_step)

        # Read data from cache DB file.
        workspace.ResetWorkspace()
        db_file_reader = DBFileReader(
            db_path=db_path,
            db_type='LevelDB',
        )
        data = self._read_all_data(ws, db_file_reader, session)
        self.assertEqual(sorted(data), list(range(100)))

        self._delete_path(db_path)
    def _test_limit_reader_init_shared(self, size):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)

        # Build test dataset
        src_ds = init_dataset(ws, size=size)

        # Create an identically sized empty destnation dataset
        dst_init = core.Net('dst_init')
        with core.NameScope('dst'):
            dst_ds = Dataset(src_ds.content().clone_schema())
            dst_ds.init_empty(dst_init)
        ws.run(dst_init)

        return ws, session, src_ds, dst_init, dst_ds
Exemplo n.º 21
0
 def test_multi_instance_python_op(self):
     """
     When task instances are created at runtime, C++ concurrently creates
     multiple instances of operators in C++, and concurrently destroys them
     once the task is finished. This means that the destructor of PythonOp
     will be called concurrently, so the GIL must be acquired. This
     test exercises this condition.
     """
     with Task(num_instances=64) as task:
         with ops.loop(4):
             ops.Python((python_op_builder, [], {}))([], [])
     with LocalSession() as session:
         PythonOpStats.num_instances = 0
         PythonOpStats.num_calls = 0
         session.run(task)
         self.assertEquals(PythonOpStats.num_instances, 64)
         self.assertEquals(PythonOpStats.num_calls, 256)
Exemplo n.º 22
0
    def test_cached_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        db_path = self._make_temp_path()

        # Read data for the first time.
        cached_reader1 = CachedReader(
            self._build_source_reader(ws, 100),
            db_path,
        )
        build_cache_step = cached_reader1.build_cache_step()
        session.run(build_cache_step)

        data = self._read_all_data(ws, cached_reader1, session)
        self.assertEqual(sorted(data), list(range(100)))

        # Read data from cache.
        workspace.ResetWorkspace()
        cached_reader2 = CachedReader(
            self._build_source_reader(ws, 200),
            db_path,
        )
        build_cache_step = cached_reader2.build_cache_step()
        session.run(build_cache_step)

        data = self._read_all_data(ws, cached_reader2, session)
        self.assertEqual(sorted(data), list(range(100)))

        self._delete_path(db_path)

        # We removed cache so we expect to receive data from original reader.
        workspace.ResetWorkspace()
        cached_reader3 = CachedReader(
            self._build_source_reader(ws, 300),
            db_path,
        )
        build_cache_step = cached_reader3.build_cache_step()
        session.run(build_cache_step)

        data = self._read_all_data(ws, cached_reader3, session)
        self.assertEqual(sorted(data), list(range(300)))

        self._delete_path(db_path)
Exemplo n.º 23
0
    def test_cached_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)

        def build_source_reader(size):
            src_ds = make_source_dataset(ws, size)
            return src_ds.reader()

        # Make a temp file path as cache_path
        with tempfile.NamedTemporaryFile(delete=False) as f:
            cache_path = f.name
            f.close()
            os.remove(cache_path)

        # Read data for the first time.
        cached_reader1 = CachedReader(build_source_reader(100))
        init_step = cached_reader1.build_cache(cache_path)
        session.run(init_step)

        data = read_all_data(ws, cached_reader1, session)
        self.assertEqual(sorted(data), list(range(100)))

        # Read data from cache.
        workspace.ResetWorkspace()
        cached_reader2 = CachedReader(build_source_reader(200))
        init_step = cached_reader2.build_cache(cache_path)
        session.run(init_step)

        data = read_all_data(ws, cached_reader2, session)
        self.assertEqual(sorted(data), list(range(100)))

        shutil.rmtree(cache_path)

        # We removed cache so we expect to receive data from original reader
        workspace.ResetWorkspace()
        cached_reader3 = CachedReader(build_source_reader(300))
        init_step = cached_reader3.build_cache(cache_path)
        session.run(init_step)

        data = read_all_data(ws, cached_reader3, session)
        self.assertEqual(sorted(data), list(range(300)))

        shutil.rmtree(cache_path)
Exemplo n.º 24
0
    def test_download_group_simple(self):
        """
        A simple test that ensures we have download task group
        executed between epoch_group and exit_group.
        """
        model = model_helper.ModelHelper(name="test_model")
        download_net = core.Net("download_net")

        for name in ["input1", "input2", "output", "download_result"]:
            model.param_init_net.ConstantFill([], [name],
                                              shape=[
                                                  8,
                                              ],
                                              value=1.0,
                                              run_once=0)
        model.net.Add(["input1", "input2"], ["output"])
        download_net.Copy(["output"], ["download_result"])

        # All blob values are initialized as 1.0, after download_net executed
        # we expect to see download result is the same as training result.
        with Job() as job:
            with Node("trainer:0"):
                with job.init_group:
                    Task(step=model.param_init_net)
                with job.epoch_group:
                    with Task():
                        with ops.loop(1):
                            ops.net(model.net)
                with job.download_group:
                    Task(step=download_net)

                epoch_limiter(job, 1)

        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        job_runner = JobRunner(job)
        job_runner.train(session)

        expected_result = np.full(8, 2.0).astype(np.float32)
        self.assertTrue(
            np.array_equal(expected_result, ws.fetch_blob("output")))
        self.assertTrue(
            np.array_equal(expected_result, ws.fetch_blob("download_result")))
Exemplo n.º 25
0
    def test_runtime_threads(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        src_ds = init_dataset(ws)
        totals = [None] * 3

        def proc(rec):
            # executed once
            with ops.task_init():
                counter1 = ops.CreateCounter([], ['global_counter'])
                counter2 = ops.CreateCounter([], ['global_counter2'])
                counter3 = ops.CreateCounter([], ['global_counter3'])
            # executed once per thread
            with ops.task_instance_init():
                task_counter = ops.CreateCounter([], ['task_counter'])
            # executed on each iteration
            ops.CountUp(counter1)
            ops.CountUp(task_counter)
            # executed once per thread
            with ops.task_instance_exit():
                with ops.loop(ops.RetrieveCount(task_counter)):
                    ops.CountUp(counter2)
                ops.CountUp(counter3)
            # executed once
            with ops.task_exit():
                totals[0] = final_output(ops.RetrieveCount(counter1))
                totals[1] = final_output(ops.RetrieveCount(counter2))
                totals[2] = final_output(ops.RetrieveCount(counter3))
            return rec

        """ 1. Feed full dataset """
        with TaskGroup() as tg:
            pipe(src_ds.reader(), num_runtime_threads=8, processor=proc)
        session.run(tg)
        self.assertEquals(totals[0].fetch(), 100)
        self.assertEquals(totals[1].fetch(), 100)
        self.assertEquals(totals[2].fetch(), 8)

        """ 2. Add a few steps in between """
        with TaskGroup() as tg:
            q1 = pipe(src_ds.reader(), num_runtime_threads=2)
            q2 = pipe(
                ReaderWithLimit(q1.reader(), num_iter=25),
                num_runtime_threads=3)
            pipe(q2, processor=proc, num_runtime_threads=6)
        session.run(tg)
        self.assertEquals(totals[0].fetch(), 25)
        self.assertEquals(totals[1].fetch(), 25)
        self.assertEquals(totals[2].fetch(), 6)
Exemplo n.º 26
0
    def test_cached_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)

        def build_source_reader(size):
            src_ds = init_dataset(ws, size)
            return src_ds.reader()

        with tempfile.NamedTemporaryFile(delete=False) as f:
            path = f.name
            f.close()
            os.remove(path)

            """ 1. Read data for the first time. """
            cached_reader1 = CachedReader(build_source_reader(100))
            init_step = cached_reader1.build_cache(path)
            session.run(init_step)

            data = read_all_data(ws, cached_reader1, session)
            self.assertEqual(sorted(data), list(range(100)))

            """ 2. Read data from cache. """
            workspace.ResetWorkspace()
            cached_reader2 = CachedReader(build_source_reader(200))
            init_step = cached_reader2.build_cache(path)
            session.run(init_step)

            data = read_all_data(ws, cached_reader2, session)
            self.assertEqual(sorted(data), list(range(100)))

            shutil.rmtree(path)

            """ 3. We removed cache so we expect to receive data from original
            reader. """
            workspace.ResetWorkspace()
            cached_reader3 = CachedReader(build_source_reader(300))
            init_step = cached_reader3.build_cache(path)
            session.run(init_step)

            data = read_all_data(ws, cached_reader3, session)
            self.assertEqual(sorted(data), list(range(300)))

            shutil.rmtree(path)
Exemplo n.º 27
0
    def test_multi_instance(self):
        NUM_INSTANCES = 10
        NUM_ITERS = 15
        with TaskGroup() as tg:
            with Task(num_instances=NUM_INSTANCES):
                with ops.task_init():
                    counter1 = ops.CreateCounter([], ['global_counter'])
                    counter2 = ops.CreateCounter([], ['global_counter2'])
                    counter3 = ops.CreateCounter([], ['global_counter3'])
                # both task_counter and local_counter should be thread local
                with ops.task_instance_init():
                    task_counter = ops.CreateCounter([], ['task_counter'])
                local_counter = ops.CreateCounter([], ['local_counter'])
                with ops.loop(NUM_ITERS):
                    ops.CountUp(counter1)
                    ops.CountUp(task_counter)
                    ops.CountUp(local_counter)
                # gather sum of squares of local counters to make sure that
                # each local counter counted exactly up to NUM_ITERS, and
                # that there was no false sharing of counter instances.
                with ops.task_instance_exit():
                    count2 = ops.RetrieveCount(task_counter)
                    with ops.loop(ops.Mul([count2, count2])):
                        ops.CountUp(counter2)
                # This should have the same effect as the above
                count3 = ops.RetrieveCount(local_counter)
                with ops.loop(ops.Mul([count3, count3])):
                    ops.CountUp(counter3)
                # The code below will only run once
                with ops.task_exit():
                    total1 = final_output(ops.RetrieveCount(counter1))
                    total2 = final_output(ops.RetrieveCount(counter2))
                    total3 = final_output(ops.RetrieveCount(counter3))

        with LocalSession() as session:
            session.run(tg)
            self.assertEquals(total1.fetch(), NUM_INSTANCES * NUM_ITERS)
            self.assertEquals(total2.fetch(), NUM_INSTANCES * (NUM_ITERS**2))
            self.assertEquals(total3.fetch(), NUM_INSTANCES * (NUM_ITERS**2))
Exemplo n.º 28
0
    def test_local_session(self):
        init_net = core.Net('init')
        src_values = Struct(
            ('uid', np.array([1, 2, 6])),
            ('value', np.array([1.4, 1.6, 1.7])))
        expected_dst = Struct(
            ('uid', np.array([2, 4, 12])),
            ('value', np.array([0.0, 0.0, 0.0])))

        with core.NameScope('init'):
            src_blobs = NewRecord(init_net, src_values)
            dst_blobs = InitEmptyRecord(init_net, src_values.clone_schema())

        def proc1(rec):
            net = core.Net('proc1')
            with core.NameScope('proc1'):
                out = NewRecord(net, rec)
            net.Add([rec.uid(), rec.uid()], [out.uid()])
            out.value.set(blob=rec.value(), unsafe=True)
            return [net], out

        def proc2(rec):
            net = core.Net('proc2')
            with core.NameScope('proc2'):
                out = NewRecord(net, rec)
            out.uid.set(blob=rec.uid(), unsafe=True)
            net.Sub([rec.value(), rec.value()], [out.value()])
            return [net], out

        src_ds = Dataset(src_blobs)
        dst_ds = Dataset(dst_blobs)

        with TaskGroup() as tg:
            out1 = pipe(src_ds.reader(), processor=proc1)
            out2 = pipe(out1, processor=proc2)
            pipe(out2, dst_ds.writer())

        ws = workspace.C.Workspace()
        FeedRecord(src_blobs, src_values, ws)
        session = LocalSession(ws)
        session.run(init_net)
        session.run(tg)
        output = FetchRecord(dst_blobs, ws=ws)

        for a, b in zip(output.field_blobs(), expected_dst.field_blobs()):
            np.testing.assert_array_equal(a, b)
Exemplo n.º 29
0
    def test_upload_checkpoint(self):
        try:
            tmpdir = tempfile.mkdtemp()
            upload_dir = os.path.join(tmpdir, "upload")
            os.mkdir(upload_dir)
            num_nodes = 3

            # The uploaded files do not exist yet.
            for node_id in range(num_nodes):
                node_name = 'trainer_%d' % node_id
                upload_path = os.path.join(upload_dir, node_name)
                self.assertFalse(os.path.exists(upload_path))

            # Create and run the job runner.
            for node_id in range(3):
                ws = workspace.C.Workspace()
                session = LocalSession(ws)
                checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
                with Cluster():
                    with Job() as job:
                        build_pipeline(node_id)
                    compiled_job = job.compile(LocalSession)
                    local_upload_builder = UploadToLocalFile(upload_dir)
                    job_runner = JobRunner(
                        compiled_job,
                        checkpoint,
                        upload_task_group_builder=local_upload_builder)
                    num_epochs = job_runner(session)
                    self.assertEquals(num_epochs, len(EXPECTED_TOTALS))

            # The uploaded files should exist now.
            for node_id in range(num_nodes):
                node_name = 'trainer_%d' % node_id
                upload_path = os.path.join(upload_dir, node_name)
                self.assertTrue(os.path.exists(upload_path))

        finally:
            shutil.rmtree(tmpdir)
Exemplo n.º 30
0
 def test_setup(self):
     with Task() as task:
         with ops.task_init():
             one = ops.Const(1)
         two = ops.Add([one, one])
         with ops.task_init():
             three = ops.Const(3)
         accum = ops.Add([two, three])
         # here, accum should be 5
         with ops.task_exit():
             # here, accum should be 6, since this executes after lines below
             seven_1 = ops.Add([accum, one])
         six = ops.Add([accum, one])
         ops.Add([accum, one], [accum])
         seven_2 = ops.Add([accum, one])
         o6 = final_output(six)
         o7_1 = final_output(seven_1)
         o7_2 = final_output(seven_2)
     with LocalSession() as session:
         session.run(task)
         self.assertEquals(o6.fetch(), 6)
         self.assertEquals(o7_1.fetch(), 7)
         self.assertEquals(o7_2.fetch(), 7)
Exemplo n.º 31
0
    def test_cached_reader(self):
        ws = workspace.C.Workspace()
        session = LocalSession(ws)
        db_path = self._make_temp_path()

        # Read data for the first time.
        cached_reader1 = CachedReader(
            self._build_source_reader(ws, 100), db_path,
        )
        build_cache_step = cached_reader1.build_cache_step()
        session.run(build_cache_step)

        data = self._read_all_data(ws, cached_reader1, session)
        self.assertEqual(sorted(data), list(range(100)))

        # Read data from cache.
        workspace.ResetWorkspace()
        cached_reader2 = CachedReader(
            self._build_source_reader(ws, 200), db_path,
        )
        build_cache_step = cached_reader2.build_cache_step()
        session.run(build_cache_step)

        data = self._read_all_data(ws, cached_reader2, session)
        self.assertEqual(sorted(data), list(range(100)))

        self._delete_path(db_path)

        # We removed cache so we expect to receive data from original reader.
        workspace.ResetWorkspace()
        cached_reader3 = CachedReader(
            self._build_source_reader(ws, 300), db_path,
        )
        build_cache_step = cached_reader3.build_cache_step()
        session.run(build_cache_step)

        data = self._read_all_data(ws, cached_reader3, session)
        self.assertEqual(sorted(data), list(range(300)))

        self._delete_path(db_path)
Exemplo n.º 32
0
    def test_ckpt_save_failure(self):
        num_nodes = 3
        # The goal of this test is to ensure that the job runs
        # successfully even if saving a checkpoint fails.
        # Hence tmpdir is a non existent directory to emulate a failure
        # while saving checkpoints
        tmpdir = "/tmp/path_does_not_exist/"

        # Check the saving checkpoint failure does not cause job failure
        workspace.ResetWorkspace()
        for node_id in range(num_nodes):
            ws = workspace.C.Workspace()
            session = LocalSession(ws)
            checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
            with Cluster():
                with Job() as job:
                    build_pipeline(node_id)
                compiled_job = job.compile(LocalSession)
                job_runner = JobRunner(compiled_job, checkpoint)
                num_epochs = job_runner(session)
            # make sure all epochs are executed even though saving the checkpoint failed
            # Saving checkpoint failure should not cause job failure
            self.assertEquals(num_epochs, len(EXPECTED_TOTALS))
Exemplo n.º 33
0
 def builder():
     ws = workspace.C.Workspace()
     session = LocalSession(ws)
     snapshot = SnapshotManager(tmp.name, 'minidb')
     return session, snapshot
Exemplo n.º 34
0
 def builder():
     ws = workspace.C.Workspace()
     session = LocalSession(ws)
     snapshot = MultiNodeSnapshotManager(tmpdir, 'minidb')
     return session, snapshot
Exemplo n.º 35
0
    def test_ckpt_name_and_load_model_from_ckpts(self):
        try:
            num_nodes = 3
            tmpdir = tempfile.mkdtemp()
            # First, check if the checkpoint name generation mechanism is
            # correct.
            checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
            with Cluster():
                with Job() as job:
                    for node_id in range(num_nodes):
                        build_pipeline(node_id)
                compiled_job = job.compile(LocalSession)
                checkpoint.init(compiled_job.nodes_to_checkpoint())

                for node_id in range(num_nodes):
                    epoch = 5
                    node_name = 'trainer_%d' % node_id
                    expected_db_name = tmpdir + '/' + node_name + '.5'
                    self.assertEquals(
                        checkpoint.get_ckpt_db_name(node_name, epoch),
                        expected_db_name)
            shutil.rmtree(tmpdir)

            # Next, check mechanism to load model from checkpoints.
            tmpdir = tempfile.mkdtemp()
            workspace.ResetWorkspace()
            for node_id in range(num_nodes):
                ws = workspace.C.Workspace()
                session = LocalSession(ws)
                checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
                with Cluster():
                    with Job() as job:
                        build_pipeline(node_id)
                    compiled_job = job.compile(LocalSession)
                    job_runner = JobRunner(compiled_job, checkpoint)
                    num_epochs = job_runner(session)
                self.assertEquals(num_epochs, len(EXPECTED_TOTALS))

                # There are 12 global blobs after finishing up the job runner.
                # (only blobs on init_group are checkpointed)
                self.assertEquals(len(ws.blobs), 12)

            ws = workspace.C.Workspace()
            session = LocalSession(ws)
            self.assertEquals(len(ws.blobs), 0)
            model_blob_names = ['trainer_1/task_2/GivenTensorInt64Fill:0',
                                'trainer_2/task_2/GivenTensorInt64Fill:0']
            checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
            with Cluster():
                with Job() as job:
                    for node_id in range(num_nodes):
                        build_pipeline(node_id)
                compiled_job = job.compile(LocalSession)
                job_runner = JobRunner(compiled_job, checkpoint)
                job_runner.load_blobs_from_checkpoints(
                    blob_names=model_blob_names, epoch=1, session=session)

                # Check that we can successfully load from checkpoints of epochs
                # 1 to 4, but not epoch 5.
                for epoch in range(1, 5):
                    self.assertTrue(
                        job_runner.load_blobs_from_checkpoints(
                            blob_names=model_blob_names, epoch=epoch,
                            session=session))
                    # Check that all the model blobs are loaded.
                    for blob_name in model_blob_names:
                        self.assertTrue(ws.has_blob(blob_name))
                        self.assertEquals(
                            ws.fetch_blob(blob_name),
                            np.array([EXPECTED_TOTALS[epoch - 1]]))
                self.assertFalse(
                    job_runner.load_blobs_from_checkpoints(
                        blob_names=model_blob_names, epoch=5, session=session))

        finally:
            shutil.rmtree(tmpdir)
Exemplo n.º 36
0
 def builder():
     ws = workspace.C.Workspace()
     session = LocalSession(ws)
     checkpoint = MultiNodeCheckpointManager(tmpdir, 'minidb')
     return session, checkpoint
Exemplo n.º 37
0
 def builder():
     ws = workspace.C.Workspace()
     session = LocalSession(ws)
     checkpoint = CheckpointManager(tmp.name, 'minidb')
     return session, checkpoint