示例#1
0
    def test_flow_add_set(self):
        f = (Flow(check_version=False, route_table=True).add_preprocessor(
            name='prep', yaml_path='SentSplitPreprocessor',
            replicas=4).add_encoder(yaml_path='PyTorchTransformers',
                                    replicas=3).add_indexer(
                                        name='vec_idx',
                                        yaml_path='NumpyIndexer',
                                        replicas=2).add_indexer(
                                            name='doc_idx',
                                            yaml_path='DictIndexer',
                                            recv_from='prep',
                                            replicas=2).add_router(
                                                name='sync_barrier',
                                                yaml_path='BaseReduceRouter',
                                                num_part=2,
                                                recv_from=[
                                                    'vec_idx', 'doc_idx'
                                                ]).build(backend=None))

        print(f.to_url())
        print(f.set('prep', replicas=1).build(backend=None).to_url())
        # make it as query flow

        f1 = (f.remove('sync_barrier').remove('doc_idx').set_last_service(
            'vec_idx').add_router('scorer',
                                  yaml_path=os.path.join(
                                      self.dirname,
                                      'yaml/flow-score.yml')).add_indexer(
                                          'doc_idx',
                                          yaml_path='DictIndexer',
                                          replicas=2).build(backend=None))

        print(f1.to_url())

        # another way to convert f to an index flow

        f2 = (f.set_last_service('vec_idx').add_router(
            'scorer',
            yaml_path=os.path.join(self.dirname, 'yaml/flow-score.yml')).set(
                'doc_idx',
                recv_from='scorer',
                yaml_path='DictIndexer',
                replicas=2,
                clear_old_attr=True).remove('sync_barrier').set_last_service(
                    'doc_idx').build(backend=None))

        print(f2.to_url())

        self.assertEqual(f1, f2)

        self.assertNotEqual(f1, f2.add_router('dummy', yaml_path='BaseRouter'))

        print(f1.to_python_code())
        print(f.to_python_code())

        f1.dump(self.dump_flow_path)
        f3 = Flow.load(self.dump_flow_path)
        self.assertEqual(f1, f3)

        print(f1.to_swarm_yaml())
示例#2
0
 def test_query_flow_plot(self):
     flow = (Flow(check_version=False, route_table=False).add(
         gfs.Preprocessor,
         name='prep',
         yaml_path='SentSplitPreprocessor',
         replicas=2).add(
             gfs.Encoder,
             yaml_path=os.path.join(self.dirname,
                                    'yaml/flow-transformer.yml'),
             replicas=3).add(
                 gfs.Indexer,
                 name='vec_idx',
                 yaml_path=os.path.join(self.dirname,
                                        'yaml/flow-vecindex.yml'),
                 replicas=4).add(gfs.Router,
                                 name='scorer',
                                 yaml_path=os.path.join(
                                     self.dirname,
                                     'yaml/flow-score.yml')).add(
                                         gfs.Indexer,
                                         name='doc_idx',
                                         yaml_path=os.path.join(
                                             self.dirname,
                                             'yaml/flow-dictindex.yml')))
     print(flow.build(backend=None).to_url())
示例#3
0
    def _test_index_flow(self):
        for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]:
            self.assertFalse(os.path.exists(k))

        flow = (Flow(check_version=False,
                     route_table=False).add(
                         gfs.Preprocessor,
                         name='prep',
                         yaml_path='SentSplitPreprocessor').add(
                             gfs.Encoder,
                             yaml_path='yaml/flow-transformer.yml').add(
                                 gfs.Indexer,
                                 name='vec_idx',
                                 yaml_path='yaml/flow-vecindex.yml').add(
                                     gfs.Indexer,
                                     name='doc_idx',
                                     yaml_path='yaml/flow-dictindex.yml',
                                     service_in='prep').add(
                                         gfs.Router,
                                         name='sync_barrier',
                                         yaml_path='BaseReduceRouter',
                                         num_part=2,
                                         service_in=['vec_idx', 'doc_idx']))

        with flow.build(backend='thread') as f:
            f.index(txt_file=self.test_file, batch_size=20)

        for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]:
            self.assertTrue(os.path.exists(k))
示例#4
0
    def _test_index_flow(self, backend):
        for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]:
            self.assertFalse(os.path.exists(k))

        flow = (Flow(check_version=False, route_table=False).add_preprocessor(
            name='prep', yaml_path='SentSplitPreprocessor').add_encoder(
                yaml_path=os.path.join(self.dirname,
                                       'yaml/flow-transformer.yml'),
                replicas=3).add_indexer(
                    name='vec_idx',
                    yaml_path=os.path.join(
                        self.dirname, 'yaml/flow-vecindex.yml')).add_indexer(
                            name='doc_idx',
                            yaml_path=os.path.join(self.dirname,
                                                   'yaml/flow-dictindex.yml'),
                            recv_from='prep').add_router(
                                name='sync_barrier',
                                yaml_path='BaseReduceRouter',
                                num_part=2,
                                recv_from=['vec_idx', 'doc_idx']))

        with flow.build(backend=backend) as f:
            f.index(txt_file=self.test_file, batch_size=20)

        for k in [self.indexer1_bin, self.indexer2_bin]:
            self.assertTrue(os.path.exists(k))
示例#5
0
 def test_flow_replica_pot(self):
     f = (Flow(check_version=False, route_table=True).add(
         gfs.Preprocessor,
         name='prep',
         yaml_path='SentSplitPreprocessor',
         replicas=4).add(gfs.Encoder,
                         yaml_path='PyTorchTransformers',
                         replicas=3).add(
                             gfs.Indexer,
                             name='vec_idx',
                             yaml_path='NumpyIndexer',
                             replicas=2).add(
                                 gfs.Indexer,
                                 name='doc_idx',
                                 yaml_path='DictIndexer',
                                 recv_from='prep',
                                 replicas=2).add(
                                     gfs.Router,
                                     name='sync_barrier',
                                     yaml_path='BaseReduceRouter',
                                     num_part=2,
                                     recv_from=['vec_idx', 'doc_idx'
                                                ]).build(backend=None))
     print(f.to_mermaid())
     print(f.to_url(left_right=False))
     print(f.to_url(left_right=True))
示例#6
0
    def _test_query_flow(self, backend):
        flow = (Flow(check_version=False, route_table=False).add(
            gfs.Preprocessor, name='prep',
            yaml_path='SentSplitPreprocessor').add(
                gfs.Encoder,
                yaml_path=os.path.join(self.dirname,
                                       'yaml/flow-transformer.yml'),
                replicas=3).add(
                    gfs.Indexer,
                    name='vec_idx',
                    yaml_path=os.path.join(
                        self.dirname, 'yaml/flow-vecindex.yml')).add(
                            gfs.Router,
                            name='scorer',
                            yaml_path=os.path.join(
                                self.dirname, 'yaml/flow-score.yml')).add(
                                    gfs.Indexer,
                                    name='doc_idx',
                                    yaml_path=os.path.join(
                                        self.dirname,
                                        'yaml/flow-dictindex.yml')))

        with flow.build(backend=backend) as f, open(self.test_file,
                                                    encoding='utf8') as fp:
            f.query(bytes_gen=[v.encode() for v in fp][:3])
示例#7
0
 def test_flow3(self):
     f = (Flow(check_version=False,
               route_table=True).add_router(
                   name='r0', send_to=Flow.Frontend,
                   yaml_path='BaseRouter').add_router(
                       name='r1',
                       recv_from=Flow.Frontend,
                       yaml_path='BaseRouter').build(backend=None))
     print(f._service_edges)
     print(f.to_mermaid())
示例#8
0
 def test_flow3(self):
     f = (Flow(check_version=False, route_table=True).add(
         gfs.Router,
         name='r0',
         service_out=gfs.Frontend,
         yaml_path='BaseRouter').add(
             gfs.Router,
             name='r1',
             service_in=gfs.Frontend,
             yaml_path='BaseRouter').build(backend=None))
     print(f._service_edges)
     print(f.to_mermaid())
示例#9
0
 def test_flow2(self):
     f = (Flow(check_version=False, route_table=True).add(
         gfs.Router, yaml_path='BaseRouter').add(
             gfs.Router, yaml_path='BaseRouter').add(
                 gfs.Router, yaml_path='BaseRouter').add(
                     gfs.Router, yaml_path='BaseRouter').add(
                         gfs.Router, yaml_path='BaseRouter').add(
                             gfs.Router, yaml_path='BaseRouter').add(
                                 gfs.Router, yaml_path='BaseRouter').add(
                                     gfs.Router,
                                     yaml_path='BaseRouter').build(
                                         backend=None))
     print(f._service_edges)
     print(f.to_mermaid())
示例#10
0
 def test_flow5(self):
     f = (Flow(check_version=False, route_table=True).add_preprocessor(
         name='prep', yaml_path='SentSplitPreprocessor').add_encoder(
             yaml_path='PyTorchTransformers').add_indexer(
                 name='vec_idx', yaml_path='NumpyIndexer').add_indexer(
                     name='doc_idx',
                     yaml_path='DictIndexer',
                     recv_from='prep').add_router(
                         name='sync_barrier',
                         yaml_path='BaseReduceRouter',
                         num_part=2,
                         recv_from=['vec_idx',
                                    'doc_idx']).build(backend=None))
     print(f._service_edges)
     print(f.to_mermaid())
示例#11
0
    def test_flow1(self):
        f = (Flow(check_version=False,
                  route_table=True).add(gfs.Router, yaml_path='BaseRouter'))
        g = f.add(gfs.Router, yaml_path='BaseRouter')

        print('f: %r g: %r' % (f, g))
        g.build()
        print(g.to_mermaid())

        f = f.add(gfs.Router, yaml_path='BaseRouter')
        g = g.add(gfs.Router, yaml_path='BaseRouter')

        print('f: %r g: %r' % (f, g))
        f.build()
        print(f.to_mermaid())
        self.assertRaises(FlowTopologyError, g.build)
示例#12
0
    def test_flow1(self):
        f = (Flow(check_version=False,
                  route_table=True).add_router(yaml_path='BaseRouter'))
        g = f.add_router(yaml_path='BaseRouter')

        print('f: %r g: %r' % (f, g))
        g.build()
        print(g.to_mermaid())

        f = f.add_router(yaml_path='BaseRouter')
        g = g.add_router(yaml_path='BaseRouter')

        print('f: %r g: %r' % (f, g))
        f.build()
        print(f.to_mermaid())
        g.build()
示例#13
0
    def test_flow1_ctx(self):
        flow = (Flow(check_version=False,
                     route_table=False).add_router(yaml_path='BaseRouter'))
        with flow(backend='process',
                  copy_flow=True) as f, open(self.test_file) as fp:
            f.index(txt_file=self.test_file, batch_size=4)
            f.train(txt_file=self.test_file, batch_size=4)

        with flow(backend='process', copy_flow=True) as f:
            # change the flow inside build shall fail
            f = f.add_router(yaml_path='BaseRouter')
            self.assertRaises(FlowBuildLevelMismatch,
                              f.index,
                              txt_file=self.test_file,
                              batch_size=4)

        print(flow.build(backend=None).to_mermaid())
示例#14
0
    def _test_query_flow(self):
        flow = (Flow(check_version=False,
                     route_table=False).add(
                         gfs.Preprocessor,
                         name='prep',
                         yaml_path='SentSplitPreprocessor').add(
                             gfs.Encoder,
                             yaml_path='yaml/flow-transformer.yml').add(
                                 gfs.Indexer,
                                 name='vec_idx',
                                 yaml_path='yaml/flow-vecindex.yml').add(
                                     gfs.Router,
                                     name='scorer',
                                     yaml_path='yaml/flow-score.yml').add(
                                         gfs.Indexer,
                                         name='doc_idx',
                                         yaml_path='yaml/flow-dictindex.yml'))

        with flow.build(backend='thread') as f:
            f.query(txt_file=self.test_file)
示例#15
0
import os

from gnes.flow import Flow

from helper import read_flowers

os.environ['TEST_WORKDIR'] = '/tmp/gnes-flow-demo'

# I didn't implement "incremental indexing" in this simple demo.
# So make sure you clean the existing index before doing `python index.py`
# $ rm $TEST_WORKDIR/*.bin

flow = (Flow(check_version=False, ctrl_with_ipc=True).add_preprocessor(
    name='prep', yaml_path='yaml/prep.yml', replicas=5).add_encoder(
        yaml_path='yaml/incep.yml', replicas=6).add_indexer(
            name='vec_idx', yaml_path='yaml/vec.yml').add_indexer(
                name='doc_idx', yaml_path='yaml/doc.yml',
                recv_from='prep').add_router(name='sync',
                                             yaml_path='BaseReduceRouter',
                                             num_part=2,
                                             recv_from=['vec_idx', 'doc_idx']))

# checkout how the flow looks like (...and post it on Twitter, but hey what do I know about promoting OSS)
print(flow.build(backend=None).to_url())

with flow(backend='process') as fl:
    fl.index(bytes_gen=read_flowers(), batch_size=64)
示例#16
0
 def test_flow1_ctx_empty(self):
     f = (Flow(check_version=False,
               route_table=True).add(gfs.Router, yaml_path='BaseRouter'))
     with f(backend='process'):
         pass
示例#17
0
import os

from gnes.flow import Flow

from helper import read_flowers, bytes2ndarray

os.environ['TEST_WORKDIR'] = '/tmp/gnes-flow-demo'

flow = (Flow(check_version=False).add_preprocessor(
    name='prep', yaml_path='yaml/prep.yml').add_encoder(
        yaml_path='yaml/incep.yml').add_indexer(
            name='vec_idx', yaml_path='yaml/vec.yml').add_router(
                name='scorer', yaml_path='yaml/score.yml').add_indexer(
                    name='doc_idx', yaml_path='yaml/doc.yml'))

# checkout how the flow looks like
print(flow.build(backend=None).to_url())

num_q = 20
topk = 10
sample_rate = 0.05

# do the query
results = []
with flow.build(backend='process') as fl:
    for q, r in fl.query(bytes_gen=read_flowers(sample_rate)):
        q_img = q.search.query.raw_bytes
        r_imgs = [k.doc.raw_bytes for k in r.search.topk_results]
        r_scores = [k.score.value for k in r.search.topk_results]
        results.append((q_img, r_imgs, r_scores))
        if len(results) > num_q: