def __init__(self, solr_hosts, zookeeper_hosts=None, timeout=15): self.client = requests.Session() self.master_hosts = solr_hosts self.current_hosts = solr_hosts self.servers = [] self.timeout = timeout if zookeeper_hosts is not None: self.zookeeper = Zookeeper(zookeeper_hosts) else: self.zookeeper = None self.last_error = None # time to revert to old host list (in minutes) after an error self.check_hosts = 5
class SolrRequest(object): """ Handle requests to SOLR and response from SOLR """ def __init__(self, solr_hosts, zookeeper_hosts=None, timeout=15): self.client = requests.Session() self.master_hosts = solr_hosts self.current_hosts = solr_hosts self.servers = [] self.timeout = timeout if zookeeper_hosts is not None: self.zookeeper = Zookeeper(zookeeper_hosts) else: self.zookeeper = None self.last_error = None # time to revert to old host list (in minutes) after an error self.check_hosts = 5 def request(self, path, params, method, body=None): """ Prepare data and send request to SOLR servers """ def handle_error(): # if the sdk knows where zookeeper lives if self.zookeeper is not None: self.last_error = time.time() # get the current list of active nodes from zookeeper curr_hosts = self.zookeeper.get_active_hosts() if len(curr_hosts) == 0: # all nodes are down - so raise an error raise SolrError("SOLR reporting all nodes as down") else: # if all nodes are not down, update the current_hosts list # and the servers list self.current_hosts = curr_hosts self.servers = curr_hosts host = self.servers.pop(0) return make_request(host, path) headers = {'content-type': 'application/json'} extraparams = {'wt': 'json', 'omitHeader': 'true', 'json.nl': 'map'} if params is None: params = {} params.update(extraparams) # if there hasn't been an error in 5 minutes, reset the solr_hosts if self.last_error is not None and \ (time.time() - self.last_error) % 60 > self.check_hosts: self.current_hosts = self.master_hosts self.last_error = None def make_request(host, path): fullpath = urljoin(host, path) try: response = self.client.request(method, fullpath, params=params, headers=headers, data=body, timeout=self.timeout) # Connected to the node, but didn't get a successful response if (len(self.servers) > 0 and hasattr(response, 'status_code') and response.status_code != 200): # try with another node handle_error() return response # Didn't successfully connect to the node except ConnectionError as e: if len(self.servers) > 0: # try with another node handle_error() raise SolrError(str(e)) self.servers = list(self.current_hosts) if len(self.servers) == 0: handle_error() random.shuffle(self.servers) host = self.servers.pop(0) response = make_request(host, path) return process_response(response) def post(self, path, params=None, body=None): """ Send a POST request to the SOLR servers """ return self.request(path, params, 'POST', body=body) def get(self, path, params=None): """ Send a GET request to the SOLR servers """ return self.request(path, params, 'GET')
def __init__(self, solr_hosts, solr_collection, zookeeper_hosts=None, timeout=15, zookeeper_timeout=5): """ Do all the interactions with SOLR server (e.g. update, select, get and delete) :param solr_hosts: the hosts for SOLR. :type server: str :param solr_collection: the name of the collection in SOLR. :type solr_collection: str :param zookeeper_hosts: the hosts for zookeeper. :type zookeeper_hosts: str :param timeout: the timeout for request to SOLR. :type timeout: int """ if solr_hosts is None and zookeeper_hosts is not None: logger.info('Getting solr hosts from zookeeper for collection %s', solr_collection) zk = Zookeeper(zookeeper_hosts, zookeeper_timeout) solr_hosts = zk.get_active_hosts(collection_name=solr_collection) if solr_hosts is None or solr_collection is None: logger.error('Neither solr_hosts nor solr_collection has been set') raise solr_errors.SolrError( "Either solr_hosts or solr_collection can not be None") if not isinstance(solr_hosts, list): solr_hosts = solr_hosts.split(",") if zookeeper_hosts is not None: hostnames, sep, chroot = zookeeper_hosts.rpartition('/') # If hostnames is empty then there is no chroot. Set it to empty. if not hostnames: chroot = '' else: chroot = '/%s' % chroot logger.debug('Using solr via zookeeper at chroot %s', chroot) self.zookeeper_hosts = [ "http://%s%s" % ( host, chroot, ) for host in zookeeper_hosts.split(",") ] logger.info('Connected to zookeeper hosts at %s', self.zookeeper_hosts) else: logger.debug('Not using zookeeper for SolrCloud') self.zookeeper_hosts = None logger.info('Connected to solr hosts %s', solr_hosts) self.solr_hosts = [_format_solr_url(host) for host in solr_hosts] self.solr_collection = solr_collection self.client = SolrRequest(solr_hosts=self.solr_hosts, zookeeper_hosts=zookeeper_hosts, timeout=timeout)
def zookeeper(self): if self._zookeeper is None and self.zookeeper_hosts: self._zookeeper = Zookeeper(self.zookeeper_hosts, self.zookeeper_timeout) return self._zookeeper
class TestZookeeper(unittest.TestCase): zook_client = Zookeeper("http://*****:*****@patch('kazoo.client.KazooClient') def test_valid_clusterstate_file(self, mock_kazoo): class MockKazoo(object): def __init__(self): pass def start(self, *args, **kwargs): return True def get_children(self, *args, **kwargs): return ['test_collection_one'] def get(self, *args, **kwargs): if args and args[0] == '/aliases.json': data = None else: data = json.dumps({ 'test_collection_one':{ 'shards':{ 'shard1':{ 'range': '80000000-7fffffff', 'state': 'active', 'replicas':{ 'core_node1':{ 'state': 'active', 'core': 'test_collection_one_shard1_replica2', 'node_name': '127.0.0.1:8080_solr', 'base_url': 'http://127.0.0.1:8080/solr', 'leader': 'true', }, 'core_node2':{ 'state': 'active', 'core': 'test_collection_one_shard1_replica1', 'node_name': '127.0.0.1:9090_solr', 'base_url': 'http://127.0.0.1:9090/solr', } } } }, 'maxShardsPerNode': '1', 'router':{'name': 'compositeId'}, 'replicationFactor': '2', 'autoAddReplicas': 'false'} }) return (data,) # Note that this is a tuple on purpose def stop(self): return True mock_kazoo.return_value = MockKazoo() result = self.zook_client.get_active_hosts() self.assertEqual( result.sort(), [u'http://127.0.0.1:8080', u'http://127.0.0.1:9090'].sort() ) @patch('kazoo.client.KazooClient') def test_no_clusterstate_file(self, mock_kazoo): class MockKazoo(object): def __init__(self): pass def start(self, *args, **kwargs): return True def get_children(self, *args, **kwargs): return [] def get(self, *args, **kwargs): return None def stop(self): return True mock_kazoo.return_value = MockKazoo() result = self.zook_client.get_active_hosts() self.assertEqual(result, []) @patch('kazoo.client.KazooClient') def test_bad_connection(self, mock_kazoo): class MockKazoo(object): def __init__(self): pass def start(self, *args, **kwargs): raise requests.exceptions.ConnectionError def get_children(self, *args, **kwargs): return [] def get(self, *args, **kwargs): return None def stop(self): return True mock_kazoo.return_value = MockKazoo() result = self.zook_client.get_active_hosts() self.assertEqual(result, []) @patch('kazoo.client.KazooClient') def test_get_aliases(self, mock_kazoo): class MockKazoo(object): def __init__(self): pass def start(self, *args, **kwargs): return True def get_children(self, *args, **kwargs): return ['test_collection_one'] def get(self, *args, **kwargs): data = '' if args and args[0] == '/aliases.json': data = json.dumps({ 'collection': { 'my_alias': 'test_collection_one', } }) else: data = json.dumps({ 'test_collection_one':{ 'shards':{ 'shard1':{ 'range': '80000000-7fffffff', 'state': 'active', 'replicas':{ 'core_node1':{ 'state': 'active', 'core': 'test_collection_one_shard1_replica2', 'node_name': '127.0.0.1:8080_solr', 'base_url': 'http://127.0.0.1:8080/solr', 'leader': 'true', }, 'core_node2':{ 'state': 'active', 'core': 'test_collection_one_shard1_replica1', 'node_name': '127.0.0.1:9090_solr', 'base_url': 'http://127.0.0.1:9090/solr', } } } }, 'maxShardsPerNode': '1', 'router':{'name': 'compositeId'}, 'replicationFactor': '2', 'autoAddReplicas': 'false'} }) return (data,) # Note that this is a tuple on purpose def stop(self): return True mock_kazoo.return_value = MockKazoo() result = self.zook_client._get_active_hosts() print(result) self.assertEqual( result['test_collection_one'], result['my_alias'] )