def sample_remote_indexes(): print() print("SimIndexCollection with remote backend indexes") processes = [] for i in range(2): port = 9000 + i process = Process(target=sim_server.start_sim_index_server, kwargs={'port': port, 'logRequests': False}) process.daemon = True processes.append(process) for process in processes: process.start() print("Waiting for servers to start") time.sleep(1) remote_index_coll = SimIndexCollection() for i in range(2): port = 9000 + i remote_index_coll.add_shards( RemoteSimIndex("http://localhost:{}/RPC2".format(port))) remote_index_coll.set_query_scorer('tfidf') remote_index_coll.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') pprint(remote_index_coll.query('stanford university')) for process in processes: process.terminate()
def sample_sim_index_collection(): # SimIndexCollection print() print("SimIndexCollection: build a collection, index some urls, and query it") indexes = (MemorySimIndex(), MemorySimIndex()) index_coll = SimIndexCollection() index_coll.add_shards(*indexes) index_coll.set_query_scorer('tfidf') index_coll.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') pprint(index_coll.query('stanford university'))
def sample_sim_index_collection(): # SimIndexCollection print() print( "SimIndexCollection: build a collection, index some urls, and query it" ) indexes = (MemorySimIndex(), MemorySimIndex()) index_coll = SimIndexCollection() index_coll.add_shards(*indexes) index_coll.set_query_scorer('tfidf') index_coll.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') pprint(index_coll.query('stanford university'))
class SimIndexCollectionTest(SimIndexTest, unittest.TestCase): ''' All tests hitting the SimIndex interface are in the parent class, SimIndexTest Tests for api's not in parent class are tested separately here. This is so we can reuse test code across all implementations of SimIndex. ''' def setUp(self): print("SimIndexCollectionTest") self.sim_index = SimIndexCollection() for i in range(2): self.sim_index.add_shards(MemorySimIndex()) super(SimIndexCollectionTest, self).setUp() def tearDown(self): pass
def sample_remote_indexes(): print() print("SimIndexCollection with remote backend indexes") processes = [] for i in range(2): port = 9000 + i process = Process(target=sim_server.start_sim_index_server, kwargs={ 'port': port, 'logRequests': False }) process.daemon = True processes.append(process) for process in processes: process.start() print("Waiting for servers to start") time.sleep(1) remote_index_coll = SimIndexCollection() for i in range(2): port = 9000 + i remote_index_coll.add_shards( RemoteSimIndex("http://localhost:{}/RPC2".format(port))) remote_index_coll.set_query_scorer('tfidf') remote_index_coll.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') pprint(remote_index_coll.query('stanford university')) for process in processes: process.terminate()
class SimIndexRemoteCollectionTest(SimIndexTest, unittest.TestCase): ''' All tests hitting the SimIndex interface are in the parent class, SimIndexTest Tests for api's not in parent class are tested separately here. This is so we can reuse test code across all implementations of SimIndex. ''' processes = None def setUp(self): # setUpClass() may be more efficient for spinning up the servers, # but this way is more robust (since we'll start each test from a # clean slate). Otherwise we'd need clear() functionality added. print("SimIndexRemoteCollectionTest") # We will create a collection tree of the form: # # Root # / \ # A B # /\ /\ # 1 2 3 4 self.processes = [] # start leaves for i in range(4): port = 9100 + i process = Process(target=sim_server.start_sim_index_server, kwargs={'port': port, 'logRequests': False}) process.daemon = True process.start() self.processes.append(process) print("Waiting for leaf servers to start") time.sleep(0.1) leaf_nodes = [[],[]] for i in range(4): port = 9100 + i leaf_nodes[i//2].append(RemoteSimIndex( "http://localhost:{}/RPC2".format(port))) # start interior nodes (A, B) for i in range(2): port = 9200 + i process = Process( target=sim_server.start_sim_index_server, kwargs={ 'port': port, 'backends': leaf_nodes[i], 'root': False, 'logRequests': False } ) process.daemon = True process.start() self.processes.append(process) print("Waiting for intermediate servers to start") time.sleep(0.1) interior_nodes = [] for i in range(2): port = 9200 + i interior_nodes.append( RemoteSimIndex("http://localhost:{}/RPC2".format(port))) # root node self.sim_index = SimIndexCollection(root=True) self.sim_index.add_shards(*interior_nodes) super(SimIndexRemoteCollectionTest, self).setUp() def tearDown(self): for process in self.processes: process.terminate() time.sleep(0.1)