def test_flow_add_set(self): f = (Flow(check_version=False, route_table=True).add_preprocessor( name='prep', yaml_path='SentSplitPreprocessor', replicas=4).add_encoder(yaml_path='PyTorchTransformers', replicas=3).add_indexer( name='vec_idx', yaml_path='NumpyIndexer', replicas=2).add_indexer( name='doc_idx', yaml_path='DictIndexer', recv_from='prep', replicas=2).add_router( name='sync_barrier', yaml_path='BaseReduceRouter', num_part=2, recv_from=[ 'vec_idx', 'doc_idx' ]).build(backend=None)) print(f.to_url()) print(f.set('prep', replicas=1).build(backend=None).to_url()) # make it as query flow f1 = (f.remove('sync_barrier').remove('doc_idx').set_last_service( 'vec_idx').add_router('scorer', yaml_path=os.path.join( self.dirname, 'yaml/flow-score.yml')).add_indexer( 'doc_idx', yaml_path='DictIndexer', replicas=2).build(backend=None)) print(f1.to_url()) # another way to convert f to an index flow f2 = (f.set_last_service('vec_idx').add_router( 'scorer', yaml_path=os.path.join(self.dirname, 'yaml/flow-score.yml')).set( 'doc_idx', recv_from='scorer', yaml_path='DictIndexer', replicas=2, clear_old_attr=True).remove('sync_barrier').set_last_service( 'doc_idx').build(backend=None)) print(f2.to_url()) self.assertEqual(f1, f2) self.assertNotEqual(f1, f2.add_router('dummy', yaml_path='BaseRouter')) print(f1.to_python_code()) print(f.to_python_code()) f1.dump(self.dump_flow_path) f3 = Flow.load(self.dump_flow_path) self.assertEqual(f1, f3) print(f1.to_swarm_yaml())
def test_query_flow_plot(self): flow = (Flow(check_version=False, route_table=False).add( gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor', replicas=2).add( gfs.Encoder, yaml_path=os.path.join(self.dirname, 'yaml/flow-transformer.yml'), replicas=3).add( gfs.Indexer, name='vec_idx', yaml_path=os.path.join(self.dirname, 'yaml/flow-vecindex.yml'), replicas=4).add(gfs.Router, name='scorer', yaml_path=os.path.join( self.dirname, 'yaml/flow-score.yml')).add( gfs.Indexer, name='doc_idx', yaml_path=os.path.join( self.dirname, 'yaml/flow-dictindex.yml'))) print(flow.build(backend=None).to_url())
def _test_index_flow(self): for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]: self.assertFalse(os.path.exists(k)) flow = (Flow(check_version=False, route_table=False).add( gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor').add( gfs.Encoder, yaml_path='yaml/flow-transformer.yml').add( gfs.Indexer, name='vec_idx', yaml_path='yaml/flow-vecindex.yml').add( gfs.Indexer, name='doc_idx', yaml_path='yaml/flow-dictindex.yml', service_in='prep').add( gfs.Router, name='sync_barrier', yaml_path='BaseReduceRouter', num_part=2, service_in=['vec_idx', 'doc_idx'])) with flow.build(backend='thread') as f: f.index(txt_file=self.test_file, batch_size=20) for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]: self.assertTrue(os.path.exists(k))
def _test_index_flow(self, backend): for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]: self.assertFalse(os.path.exists(k)) flow = (Flow(check_version=False, route_table=False).add_preprocessor( name='prep', yaml_path='SentSplitPreprocessor').add_encoder( yaml_path=os.path.join(self.dirname, 'yaml/flow-transformer.yml'), replicas=3).add_indexer( name='vec_idx', yaml_path=os.path.join( self.dirname, 'yaml/flow-vecindex.yml')).add_indexer( name='doc_idx', yaml_path=os.path.join(self.dirname, 'yaml/flow-dictindex.yml'), recv_from='prep').add_router( name='sync_barrier', yaml_path='BaseReduceRouter', num_part=2, recv_from=['vec_idx', 'doc_idx'])) with flow.build(backend=backend) as f: f.index(txt_file=self.test_file, batch_size=20) for k in [self.indexer1_bin, self.indexer2_bin]: self.assertTrue(os.path.exists(k))
def test_flow_replica_pot(self): f = (Flow(check_version=False, route_table=True).add( gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor', replicas=4).add(gfs.Encoder, yaml_path='PyTorchTransformers', replicas=3).add( gfs.Indexer, name='vec_idx', yaml_path='NumpyIndexer', replicas=2).add( gfs.Indexer, name='doc_idx', yaml_path='DictIndexer', recv_from='prep', replicas=2).add( gfs.Router, name='sync_barrier', yaml_path='BaseReduceRouter', num_part=2, recv_from=['vec_idx', 'doc_idx' ]).build(backend=None)) print(f.to_mermaid()) print(f.to_url(left_right=False)) print(f.to_url(left_right=True))
def _test_query_flow(self, backend): flow = (Flow(check_version=False, route_table=False).add( gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor').add( gfs.Encoder, yaml_path=os.path.join(self.dirname, 'yaml/flow-transformer.yml'), replicas=3).add( gfs.Indexer, name='vec_idx', yaml_path=os.path.join( self.dirname, 'yaml/flow-vecindex.yml')).add( gfs.Router, name='scorer', yaml_path=os.path.join( self.dirname, 'yaml/flow-score.yml')).add( gfs.Indexer, name='doc_idx', yaml_path=os.path.join( self.dirname, 'yaml/flow-dictindex.yml'))) with flow.build(backend=backend) as f, open(self.test_file, encoding='utf8') as fp: f.query(bytes_gen=[v.encode() for v in fp][:3])
def test_flow3(self): f = (Flow(check_version=False, route_table=True).add_router( name='r0', send_to=Flow.Frontend, yaml_path='BaseRouter').add_router( name='r1', recv_from=Flow.Frontend, yaml_path='BaseRouter').build(backend=None)) print(f._service_edges) print(f.to_mermaid())
def test_flow3(self): f = (Flow(check_version=False, route_table=True).add( gfs.Router, name='r0', service_out=gfs.Frontend, yaml_path='BaseRouter').add( gfs.Router, name='r1', service_in=gfs.Frontend, yaml_path='BaseRouter').build(backend=None)) print(f._service_edges) print(f.to_mermaid())
def test_flow2(self): f = (Flow(check_version=False, route_table=True).add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').add( gfs.Router, yaml_path='BaseRouter').build( backend=None)) print(f._service_edges) print(f.to_mermaid())
def test_flow5(self): f = (Flow(check_version=False, route_table=True).add_preprocessor( name='prep', yaml_path='SentSplitPreprocessor').add_encoder( yaml_path='PyTorchTransformers').add_indexer( name='vec_idx', yaml_path='NumpyIndexer').add_indexer( name='doc_idx', yaml_path='DictIndexer', recv_from='prep').add_router( name='sync_barrier', yaml_path='BaseReduceRouter', num_part=2, recv_from=['vec_idx', 'doc_idx']).build(backend=None)) print(f._service_edges) print(f.to_mermaid())
def test_flow1(self): f = (Flow(check_version=False, route_table=True).add(gfs.Router, yaml_path='BaseRouter')) g = f.add(gfs.Router, yaml_path='BaseRouter') print('f: %r g: %r' % (f, g)) g.build() print(g.to_mermaid()) f = f.add(gfs.Router, yaml_path='BaseRouter') g = g.add(gfs.Router, yaml_path='BaseRouter') print('f: %r g: %r' % (f, g)) f.build() print(f.to_mermaid()) self.assertRaises(FlowTopologyError, g.build)
def test_flow1(self): f = (Flow(check_version=False, route_table=True).add_router(yaml_path='BaseRouter')) g = f.add_router(yaml_path='BaseRouter') print('f: %r g: %r' % (f, g)) g.build() print(g.to_mermaid()) f = f.add_router(yaml_path='BaseRouter') g = g.add_router(yaml_path='BaseRouter') print('f: %r g: %r' % (f, g)) f.build() print(f.to_mermaid()) g.build()
def test_flow1_ctx(self): flow = (Flow(check_version=False, route_table=False).add_router(yaml_path='BaseRouter')) with flow(backend='process', copy_flow=True) as f, open(self.test_file) as fp: f.index(txt_file=self.test_file, batch_size=4) f.train(txt_file=self.test_file, batch_size=4) with flow(backend='process', copy_flow=True) as f: # change the flow inside build shall fail f = f.add_router(yaml_path='BaseRouter') self.assertRaises(FlowBuildLevelMismatch, f.index, txt_file=self.test_file, batch_size=4) print(flow.build(backend=None).to_mermaid())
def _test_query_flow(self): flow = (Flow(check_version=False, route_table=False).add( gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor').add( gfs.Encoder, yaml_path='yaml/flow-transformer.yml').add( gfs.Indexer, name='vec_idx', yaml_path='yaml/flow-vecindex.yml').add( gfs.Router, name='scorer', yaml_path='yaml/flow-score.yml').add( gfs.Indexer, name='doc_idx', yaml_path='yaml/flow-dictindex.yml')) with flow.build(backend='thread') as f: f.query(txt_file=self.test_file)
import os from gnes.flow import Flow from helper import read_flowers os.environ['TEST_WORKDIR'] = '/tmp/gnes-flow-demo' # I didn't implement "incremental indexing" in this simple demo. # So make sure you clean the existing index before doing `python index.py` # $ rm $TEST_WORKDIR/*.bin flow = (Flow(check_version=False, ctrl_with_ipc=True).add_preprocessor( name='prep', yaml_path='yaml/prep.yml', replicas=5).add_encoder( yaml_path='yaml/incep.yml', replicas=6).add_indexer( name='vec_idx', yaml_path='yaml/vec.yml').add_indexer( name='doc_idx', yaml_path='yaml/doc.yml', recv_from='prep').add_router(name='sync', yaml_path='BaseReduceRouter', num_part=2, recv_from=['vec_idx', 'doc_idx'])) # checkout how the flow looks like (...and post it on Twitter, but hey what do I know about promoting OSS) print(flow.build(backend=None).to_url()) with flow(backend='process') as fl: fl.index(bytes_gen=read_flowers(), batch_size=64)
def test_flow1_ctx_empty(self): f = (Flow(check_version=False, route_table=True).add(gfs.Router, yaml_path='BaseRouter')) with f(backend='process'): pass
import os from gnes.flow import Flow from helper import read_flowers, bytes2ndarray os.environ['TEST_WORKDIR'] = '/tmp/gnes-flow-demo' flow = (Flow(check_version=False).add_preprocessor( name='prep', yaml_path='yaml/prep.yml').add_encoder( yaml_path='yaml/incep.yml').add_indexer( name='vec_idx', yaml_path='yaml/vec.yml').add_router( name='scorer', yaml_path='yaml/score.yml').add_indexer( name='doc_idx', yaml_path='yaml/doc.yml')) # checkout how the flow looks like print(flow.build(backend=None).to_url()) num_q = 20 topk = 10 sample_rate = 0.05 # do the query results = [] with flow.build(backend='process') as fl: for q, r in fl.query(bytes_gen=read_flowers(sample_rate)): q_img = q.search.query.raw_bytes r_imgs = [k.doc.raw_bytes for k in r.search.topk_results] r_scores = [k.score.value for k in r.search.topk_results] results.append((q_img, r_imgs, r_scores)) if len(results) > num_q: