def setUp(self): print("SimIndexCollectionTest") self.sim_index = SimIndexCollection() for i in range(2): self.sim_index.add_shards(MemorySimIndex()) super(SimIndexCollectionTest, self).setUp()
def sample_sim_index_collection(): # SimIndexCollection print() print( "SimIndexCollection: build a collection, index some urls, and query it" ) indexes = (MemorySimIndex(), MemorySimIndex()) index_coll = SimIndexCollection() index_coll.add_shards(*indexes) index_coll.set_query_scorer('tfidf') index_coll.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') pprint(index_coll.query('stanford university'))
def sample_remote_indexes(): print() print("SimIndexCollection with remote backend indexes") processes = [] for i in range(2): port = 9000 + i process = Process(target=sim_server.start_sim_index_server, kwargs={ 'port': port, 'logRequests': False }) process.daemon = True processes.append(process) for process in processes: process.start() print("Waiting for servers to start") time.sleep(1) remote_index_coll = SimIndexCollection() for i in range(2): port = 9000 + i remote_index_coll.add_shards( RemoteSimIndex("http://localhost:{}/RPC2".format(port))) remote_index_coll.set_query_scorer('tfidf') remote_index_coll.index_urls('http://www.stanford.edu/', 'http://www.berkeley.edu', 'http://www.ucla.edu', 'http://www.mit.edu') pprint(remote_index_coll.query('stanford university')) for process in processes: process.terminate()
def setUp(self): # setUpClass() may be more efficient for spinning up the servers, # but this way is more robust (since we'll start each test from a # clean slate). Otherwise we'd need clear() functionality added. print("SimIndexRemoteCollectionTest") # We will create a collection tree of the form: # # Root # / \ # A B # /\ /\ # 1 2 3 4 self.processes = [] # start leaves for i in range(4): port = 9100 + i process = Process(target=sim_server.start_sim_index_server, kwargs={'port': port, 'logRequests': False}) process.daemon = True process.start() self.processes.append(process) print("Waiting for leaf servers to start") time.sleep(0.1) leaf_nodes = [[],[]] for i in range(4): port = 9100 + i leaf_nodes[i//2].append(RemoteSimIndex( "http://localhost:{}/RPC2".format(port))) # start interior nodes (A, B) for i in range(2): port = 9200 + i process = Process( target=sim_server.start_sim_index_server, kwargs={ 'port': port, 'backends': leaf_nodes[i], 'root': False, 'logRequests': False } ) process.daemon = True process.start() self.processes.append(process) print("Waiting for intermediate servers to start") time.sleep(0.1) interior_nodes = [] for i in range(2): port = 9200 + i interior_nodes.append( RemoteSimIndex("http://localhost:{}/RPC2".format(port))) # root node self.sim_index = SimIndexCollection(root=True) self.sim_index.add_shards(*interior_nodes) super(SimIndexRemoteCollectionTest, self).setUp()