示例#1
0
def sample_remote_indexes():    
    print()
    print("SimIndexCollection with remote backend indexes")
    
    processes = []
    for i in range(2):
        port = 9000 + i
        process = Process(target=sim_server.start_sim_index_server,
                          kwargs={'port': port, 'logRequests': False})
        process.daemon = True
        processes.append(process)
        
    for process in processes:
        process.start()
        
    print("Waiting for servers to start")
    time.sleep(1)

    remote_index_coll = SimIndexCollection()        
    for i in range(2):
        port = 9000 + i
        remote_index_coll.add_shards(
            RemoteSimIndex("http://localhost:{}/RPC2".format(port)))
        
    remote_index_coll.set_query_scorer('tfidf')

    remote_index_coll.index_urls('http://www.stanford.edu/',
                                 'http://www.berkeley.edu',
                                 'http://www.ucla.edu',
                                 'http://www.mit.edu')
    
    pprint(remote_index_coll.query('stanford university'))
        
    for process in processes:
        process.terminate()
示例#2
0
def sample_sim_index_collection():
    # SimIndexCollection
    print()
    print("SimIndexCollection: build a collection, index some urls, and query it")
    indexes = (MemorySimIndex(), MemorySimIndex())
    index_coll = SimIndexCollection()
    index_coll.add_shards(*indexes)
    index_coll.set_query_scorer('tfidf')
    index_coll.index_urls('http://www.stanford.edu/',
                          'http://www.berkeley.edu',
                          'http://www.ucla.edu',
                          'http://www.mit.edu')
    
    pprint(index_coll.query('stanford university'))
示例#3
0
def sample_sim_index_collection():
    # SimIndexCollection
    print()
    print(
        "SimIndexCollection: build a collection, index some urls, and query it"
    )
    indexes = (MemorySimIndex(), MemorySimIndex())
    index_coll = SimIndexCollection()
    index_coll.add_shards(*indexes)
    index_coll.set_query_scorer('tfidf')
    index_coll.index_urls('http://www.stanford.edu/',
                          'http://www.berkeley.edu', 'http://www.ucla.edu',
                          'http://www.mit.edu')

    pprint(index_coll.query('stanford university'))
示例#4
0
class SimIndexCollectionTest(SimIndexTest, unittest.TestCase):
    '''
    All tests hitting the SimIndex interface are in the parent class, SimIndexTest
    
    Tests for api's not in parent class are tested separately here.  This is
    so we can reuse test code across all implementations of SimIndex.    
    '''

    def setUp(self):
        print("SimIndexCollectionTest")
        self.sim_index = SimIndexCollection()
        for i in range(2):
            self.sim_index.add_shards(MemorySimIndex())

        super(SimIndexCollectionTest, self).setUp()
    
    def tearDown(self):
        pass
示例#5
0
def sample_remote_indexes():
    print()
    print("SimIndexCollection with remote backend indexes")

    processes = []
    for i in range(2):
        port = 9000 + i
        process = Process(target=sim_server.start_sim_index_server,
                          kwargs={
                              'port': port,
                              'logRequests': False
                          })
        process.daemon = True
        processes.append(process)

    for process in processes:
        process.start()

    print("Waiting for servers to start")
    time.sleep(1)

    remote_index_coll = SimIndexCollection()
    for i in range(2):
        port = 9000 + i
        remote_index_coll.add_shards(
            RemoteSimIndex("http://localhost:{}/RPC2".format(port)))

    remote_index_coll.set_query_scorer('tfidf')

    remote_index_coll.index_urls('http://www.stanford.edu/',
                                 'http://www.berkeley.edu',
                                 'http://www.ucla.edu', 'http://www.mit.edu')

    pprint(remote_index_coll.query('stanford university'))

    for process in processes:
        process.terminate()
示例#6
0
class SimIndexRemoteCollectionTest(SimIndexTest, unittest.TestCase):
    '''
    All tests hitting the SimIndex interface are in the parent class, SimIndexTest
    
    Tests for api's not in parent class are tested separately here.  This is
    so we can reuse test code across all implementations of SimIndex.    
    '''

    processes = None
    
    def setUp(self):
        # setUpClass() may be more efficient for spinning up the servers,
        # but this way is more robust (since we'll start each test from a
        # clean slate). Otherwise we'd need clear() functionality added.

        print("SimIndexRemoteCollectionTest")
        
        # We will create a collection tree of the form:
        #
        #      Root
        #     /   \
        #    A     B
        #   /\     /\
        #  1  2   3  4
        self.processes = []

        # start leaves
        for i in range(4):
            port = 9100 + i
            process = Process(target=sim_server.start_sim_index_server,
                              kwargs={'port': port, 'logRequests': False})
            process.daemon = True
            process.start()
            self.processes.append(process)
            
        print("Waiting for leaf servers to start")
        time.sleep(0.1)
        
        leaf_nodes = [[],[]]
        for i in range(4):
            port = 9100 + i
            leaf_nodes[i//2].append(RemoteSimIndex(
                "http://localhost:{}/RPC2".format(port)))

        # start interior nodes (A, B)
        for i in range(2):
            port = 9200 + i
            process = Process(
                target=sim_server.start_sim_index_server,
                kwargs={ 'port': port,
                         'backends': leaf_nodes[i],
                         'root': False,
                         'logRequests': False
                        }
            )
            process.daemon = True
            process.start()
            self.processes.append(process)

        print("Waiting for intermediate servers to start")
        time.sleep(0.1)        

        interior_nodes = []
        for i in range(2):
            port = 9200 + i
            interior_nodes.append(
                RemoteSimIndex("http://localhost:{}/RPC2".format(port)))

        # root node
        self.sim_index = SimIndexCollection(root=True)
        self.sim_index.add_shards(*interior_nodes)
        
        super(SimIndexRemoteCollectionTest, self).setUp()
    
    def tearDown(self):
        for process in self.processes:
            process.terminate()
        time.sleep(0.1)