Пример #1
0
    def setUp(self):
        print("SimIndexCollectionTest")
        self.sim_index = SimIndexCollection()
        for i in range(2):
            self.sim_index.add_shards(MemorySimIndex())

        super(SimIndexCollectionTest, self).setUp()
Пример #2
0
def sample_sim_index_collection():
    # SimIndexCollection
    print()
    print(
        "SimIndexCollection: build a collection, index some urls, and query it"
    )
    indexes = (MemorySimIndex(), MemorySimIndex())
    index_coll = SimIndexCollection()
    index_coll.add_shards(*indexes)
    index_coll.set_query_scorer('tfidf')
    index_coll.index_urls('http://www.stanford.edu/',
                          'http://www.berkeley.edu', 'http://www.ucla.edu',
                          'http://www.mit.edu')

    pprint(index_coll.query('stanford university'))
Пример #3
0
def sample_remote_indexes():
    print()
    print("SimIndexCollection with remote backend indexes")

    processes = []
    for i in range(2):
        port = 9000 + i
        process = Process(target=sim_server.start_sim_index_server,
                          kwargs={
                              'port': port,
                              'logRequests': False
                          })
        process.daemon = True
        processes.append(process)

    for process in processes:
        process.start()

    print("Waiting for servers to start")
    time.sleep(1)

    remote_index_coll = SimIndexCollection()
    for i in range(2):
        port = 9000 + i
        remote_index_coll.add_shards(
            RemoteSimIndex("http://localhost:{}/RPC2".format(port)))

    remote_index_coll.set_query_scorer('tfidf')

    remote_index_coll.index_urls('http://www.stanford.edu/',
                                 'http://www.berkeley.edu',
                                 'http://www.ucla.edu', 'http://www.mit.edu')

    pprint(remote_index_coll.query('stanford university'))

    for process in processes:
        process.terminate()
Пример #4
0
    def setUp(self):
        # setUpClass() may be more efficient for spinning up the servers,
        # but this way is more robust (since we'll start each test from a
        # clean slate). Otherwise we'd need clear() functionality added.

        print("SimIndexRemoteCollectionTest")
        
        # We will create a collection tree of the form:
        #
        #      Root
        #     /   \
        #    A     B
        #   /\     /\
        #  1  2   3  4
        self.processes = []

        # start leaves
        for i in range(4):
            port = 9100 + i
            process = Process(target=sim_server.start_sim_index_server,
                              kwargs={'port': port, 'logRequests': False})
            process.daemon = True
            process.start()
            self.processes.append(process)
            
        print("Waiting for leaf servers to start")
        time.sleep(0.1)
        
        leaf_nodes = [[],[]]
        for i in range(4):
            port = 9100 + i
            leaf_nodes[i//2].append(RemoteSimIndex(
                "http://localhost:{}/RPC2".format(port)))

        # start interior nodes (A, B)
        for i in range(2):
            port = 9200 + i
            process = Process(
                target=sim_server.start_sim_index_server,
                kwargs={ 'port': port,
                         'backends': leaf_nodes[i],
                         'root': False,
                         'logRequests': False
                        }
            )
            process.daemon = True
            process.start()
            self.processes.append(process)

        print("Waiting for intermediate servers to start")
        time.sleep(0.1)        

        interior_nodes = []
        for i in range(2):
            port = 9200 + i
            interior_nodes.append(
                RemoteSimIndex("http://localhost:{}/RPC2".format(port)))

        # root node
        self.sim_index = SimIndexCollection(root=True)
        self.sim_index.add_shards(*interior_nodes)
        
        super(SimIndexRemoteCollectionTest, self).setUp()