class LRUCache(): ''' A least-recently used cache * distributed, composed of server nodes (each server is a node) * resize cluster dynamically * each server node can have a custom MAX_SIZE and TIMEOUT * data entries can expire if they are not called within TIMEOUT * the LRU data entries are evicted first if MAX_SIZE is exceeded * roughly even distribution of keys between server nodes This implementation combines a hash ring with consistent hashing and a doubly-linked list. ''' def __init__(self, load_function): ''' params: load_function: on a cache miss, this function will be used to load a value into the cache, given a key ''' self.load_function = load_function self.hr = HashRing(nodes=[]) self.servers = {} def add_server(self, id, MAX_SIZE, TIMEOUT): ''' Add a server to the ring. params: id: to identify the server MAX_SIZE: int, max number of entries in the server TIMEOUT: int or float, seconds after use before an entry times out and is removed from the cache ''' self.servers[id] = ServerNode(id, self.load_function, MAX_SIZE, TIMEOUT) self.hr.add_node(id) def remove_server(self, id): ''' Remove a server from the ring. ''' del self.servers[id] self.hr.remove_node(id) def get(self, key): ''' Return the value corresponding with a given key. ''' target_server_id = self.hr.get_node(key) return self.servers[target_server_id].get(key) def get_state(self): ''' Return dictionary in form {server_id: list of server nodes} ''' server_contents = {} if len(self.servers) > 0: for server in self.servers: server_contents[server] = self.servers[server].get_state() return server_contents
def test_hash_fn(): """ """ hash_fn = (lambda k: k + '_hash') nodes = ["172.31.1.0", "172.31.1.125", "172.31.1.202"] ring = HashRing(nodes, hash_fn=hash_fn) assert ring.hashi('coconut') == 'coconut_hash'
def test_ring_growth(ring): add_ring = HashRing() for nodename in ring.nodes: add_ring.add_node(nodename) assert ring.ring == add_ring.ring assert ring.distribution == add_ring.distribution
def node_to_contact(name): node_status = status_check() alive_nodes = [x for x in NODE_LIST if node_status[x] == 'Live'] hr = HashRing(nodes=alive_nodes, hash_fn=mmh) node = hr.get_node(name) print(node) return node
def test_range(ring): r = list(ring.range("test")) r.sort() c = ring.conf.values() c.sort() assert r == c r = list(ring.range("test", size=2, unique=True)) assert len(r) == 2 r = list(ring.range("test", size=2, unique=False)) assert len(r) == 2 r = list(ring.range("1800")) # 1800 is at position 0 r = list(ring.range("849")) # 849 is at position -2 r = list(ring.range("test", size=None, unique=False)) assert len(r) == ring.size n = [] for node in ring.iterate_nodes("test"): n.append(node) assert len(n) == 3 ring_empty = HashRing() for node in ring_empty.iterate_nodes("test"): assert node is None
def test_range(ring): r = list(ring.range('test')) r.sort() c = ring.conf.values() c.sort() assert r == c r = list(ring.range('test', size=2, unique=True)) assert len(r) == 2 r = list(ring.range('test', size=2, unique=False)) assert len(r) == 2 r = list(ring.range('1800')) # 1800 is at position 0 r = list(ring.range('849')) # 849 is at position -2 r = list(ring.range('test', size=None, unique=False)) assert len(r) == ring.size l = [] for node in ring.iterate_nodes('test'): l.append(node) assert len(l) == 3 ring_empty = HashRing() for node in ring_empty.iterate_nodes('test'): assert node is None
class MetadataService(abstractMetadataService): def __init__(self, nodes = ["node_1"]): self._nodes = nodes self._hr = HashRing(nodes=nodes) self._key_to_node = {} log.debug(f"Init of Metadata Service is complete. Nodes are: {nodes}") def get_node(self, key): hashed_key = self._hr.get_key(key) log.debug(f"Retrieved hashed key: {hashed_key} from key: {key} from the hashring") node = self._hr.get_node(hashed_key) log.debug(f"The node to associated with the hashed key: {hashed_key} is: {node}") self._key_to_node[key] = node return node def create_node(self, node_name): self._hr.add_node(node_name) self.nodes.append(node_name) log.debug(f"Added the node: {node_name} to the hashring") def get_all_metadata(self): all_items = self._key_to_node.items() log.debug(f"All the items in the metadata service are: {all_items}") return all_items def get_all_keys_for_node(self, node_name): key_list = [key for key,node in self._nodes if node == node_name ] log.debug(f"The mapping of keys to nodes in the metadata service is: {key_list}") #TODO, allow reconstruction from a config file in case of power loss def reconstruct_from_config(self, config): pass
def main(args): filename = args[0] l = int(args[1]) k = int(args[2]) n = int(args[3]) # create a consistent hash ring of n nodes node_names = [str(node) for node in xrange(n)] hr = HashRing(nodes=node_names) family_map = {str(node): [] for node in xrange(n)} for f in open(filename, "r"): family = f.replace(" ", "").replace("\n", "") fam = family.split(",") # get k hashes for this family hash_codes = lsh(fam, l, k) # assign k hashes to nodes for h in hash_codes: node_id = hr.get_node(h) family_map[node_id].append(family) for node_id in family_map: print("Node_" + str(int(node_id) + 1) + "-0_gspfams=" + ";".join(family_map[node_id]) + ";")
def test_ring_growth_meta(ring_fast): add_ring = HashRing(compat=False) for nodename in ring_fast.nodes: add_ring.add_node(nodename) assert ring_fast._nodes == add_ring._nodes assert ring_fast.ring == add_ring.ring assert ring_fast.distribution == add_ring.distribution
def test_ring_growth_ketama(ring): add_ring = HashRing(hash_fn='ketama') for nodename in ring.nodes: add_ring.add_node(nodename) assert ring._nodes == add_ring._nodes assert ring.ring == add_ring.ring assert ring.distribution == add_ring.distribution
def test_hash_fn(): """""" def hash_fn(k): return k + "_hash" nodes = ["172.31.1.0", "172.31.1.125", "172.31.1.202"] ring = HashRing(nodes, hash_fn=hash_fn) assert ring.hashi("coconut") == "coconut_hash"
def __init__(self, nodes): """ init :param : :return: :rtype: """ self.nodes = nodes self.hr = HashRing(nodes=nodes, hash_fn='ketama')
def __init__(self, load_function): ''' params: load_function: on a cache miss, this function will be used to load a value into the cache, given a key ''' self.load_function = load_function self.hr = HashRing(nodes=[]) self.servers = {}
def consistentHashing(trace, nodeNum): nodes = setUp("consistent", nodeNum) # create a new consistent hash ring with the nodes hr = HashRing(nodes, vnodes=200) servers = np.zeros(len(nodes)) for each in trace: index = int(hr.get_node(each)) - 1 servers[index] += 1 print "consistentHashing" print servers
def get_nodes(self, bucket_name: str) -> str: hash_ring = HashRing(self._nodes) member_nodes = [] for _ in range(0,3): target_node = hash_ring.get(bucket_name)['hostname'] member_nodes.append(target_node) hash_ring.remove_node(target_node) return member_nodes
def __init__(self, servers=('127.0.0.1:11211', ), username=None, password=None, compression=None, socket_timeout=SOCKET_TIMEOUT, pickle_protocol=0, pickler=pickle.Pickler, unpickler=pickle.Unpickler): super(DistributedClient, self).__init__(servers, username, password, compression, socket_timeout, pickle_protocol, pickler, unpickler) self._ring = HashRing(self._servers)
class PartitionHashing: consistent_hashing = HashRing(nodes=list(range(PARTITIONS))) @classmethod def get_partition(cls, key): return cls.consistent_hashing.get_node(key)
def init_multiarg(*allargs): l = list(allargs) global multiarg global multiarg_host global multiarg_cs temp = [[]] temph = [[]] host = [[]] cs = HashRing([], replicas=0, vnodes=20) cs = [cs] for arg in l[::-1]: multiarg = [] multiarg_host = [] multiarg_cs = [] multiarg.append(temp * arg) multiarg_host.append(temph * arg) multiarg_cs.append(cs * arg) temp = multiarg temph = multiarg_host cs = multiarg_cs #print(res) multiarg = multiarg[0] multiarg_host = multiarg_host[0] multiarg_cs = multiarg_cs[0]
def test_distribution(): ring = HashRing() numnodes = 10 numhits = 1000 numvalues = 10000 for i in range(1, numnodes + 1): ring["node{}".format(i)] = {"instance": "node_value{}".format(i)} distribution = Counter() for i in range(numhits): key = str(randint(1, numvalues)) node = ring[key] distribution[node] += 1 # count of hits matches what is observed assert sum(distribution.values()) == numhits # usually under 20 standard_dev = _pop_std_dev(distribution.values()) assert standard_dev <= 20 # all nodes should be used assert len(distribution) == numnodes # just to test getting keys, see that we got the values # back and not keys or indexes or whatever. assert set(distribution.keys()) == set("node_value{}".format(i) for i in range(1, 1 + numnodes))
def config_client(registries_input, testmode, gettype, wait, accelerater): global ring global rediscli_dbrecipe global rjpool_dbNoBFRecipe global registries global Testmode global Gettype global Wait global Accelerater registries = registries_input ring = HashRing(nodes = registries) Testmode = testmode Gettype = gettype Wait = wait Accelerater = accelerater print("The testmode is: ", Testmode) print("The Gettype is: ", Gettype) print("The Wait is: ", Wait) print("The Accelerater is: ", Accelerater) print registries if "192.168.0.170:5000" in registries: startup_nodes = startup_nodes_hulks print("==========> Testing HULKS <============: ", startup_nodes) else: startup_nodes = startup_nodes_thors print("==========> Testing THORS <============: ", startup_nodes) rediscli_dbrecipe = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
def get_hash_ring(cls, event_type): if event_type in cls._hash_rings_dict: return cls._hash_rings_dict[event_type] cls._hash_rings_dict[event_type] = HashRing( nodes=list(range(EventPartition[event_type].value)) ) return cls._hash_rings_dict[event_type]
def __init__(self, args): super(LbDip, self).__init__(args) self.nodes_names = [x for x in range(0, self.number_of_servers)] self.collector = DipCollector(args) self.hr = HashRing(nodes=self.nodes_names, vnodes=args.vnodes) #TODO modify vnodes self.connection_2_bucket = {} self.past_connection = { } # dictionay, used to record the bucket that each connection connected to, find connections are moved unnecessarily self.server_2_buckets_connections = { x: {} for x in self.hr.get_nodes() } for bucket_id, node in self.hr.get_points(): self.server_2_buckets_connections[node][bucket_id] = []
def ring(): ring = HashRing(nodes={ 'node1': 1, 'node2': 1, 'node3': 1 }, hash_fn='ketama') return ring
def __init__(self,address): self.addr = address logger.info('EventServer addr: ' + address) self.repSocket.bind("tcp://"+address) self.pubSocket.bind("tcp://"+getPubFromAddress(address)) self.mHashRing = HashRing(nodes=[address]) self.mRingOrganizer = ringOrganizer(self.mHashRing, getRingOrgFromAddress(address)) self.mRingOrganizer.nodes.add(address)
def test_ketama_compatibility(ketama_config_file): if not ketama: return ring = HashRing( nodes={"127.0.0.1:11211": 600, "127.0.0.1:11212": 400}, replicas=4, vnodes=40, compat=True, ) continuum = ketama.Continuum(ketama_config_file) assert ring.get_points() == continuum.get_points() numhits = 1000 numvalues = 10000 for i in range(numhits): key = str(randint(1, numvalues)) assert ring.get_server(key) == continuum.get_server(key)
def ring(): ring = HashRing(nodes={ 'node1': 1, 'node2': 1, 'node3': 1 }, replicas=4, vnodes=40, compat=True) return ring
def ring_fast(): ring = HashRing(nodes={ 'node1': 1, 'node2': 1, 'node3': 1 }, replicas=4, vnodes=40, compat=False) return ring
class ConsistentHashingRouter(Router): ("""Applies mapfunc to each message and based on the result, """ """routes equal messages to always the same child""") def __init__(self, name=None, mapfunc=None, *args, **kwargs): self._hashring = HashRing() self._map = mapfunc if callable(mapfunc) else lambda msg: msg super().__init__(name=name, *args, **kwargs) def _route(self, msg): return self._hashring.get_node(self._map(msg)) def register_child(self, child): super().register_child(child) self._hashring.add_node(child) def unregister_child(self, child): super().unregister_child(child) self._hashring.remove_node(child)
def test_ketama_compatibility(ketama_config_file): if not ketama: return ring = HashRing( nodes={'127.0.0.1:11211': 600, '127.0.0.1:11212': 400}, replicas=4, vnodes=40, compat=True) continuum = ketama.Continuum(ketama_config_file) assert ring.get_points() == continuum.get_points() numhits = 1000 numvalues = 10000 for i in range(numhits): key = str(randint(1, numvalues)) assert ring.get_server(key) == continuum.get_server(key)
class ShardDb: @staticmethod def get_engine(port: int): return psycopg2.connect( "dbname='postgres' user='******' host='localhost' password='******'", port=port ) def __init__(self): self._shard1_engine = self.get_engine(5432) self._shard2_engine = self.get_engine(5433) self._shard3_engine = self.get_engine(5434) self._shard1_cur: psycopg2.extensions.cursor = self._shard1_engine.cursor() self._shard2_cur: psycopg2.extensions.cursor = self._shard2_engine.cursor() self._shard3_cur: psycopg2.extensions.cursor = self._shard3_engine.cursor() shard1, shard2, shard3 = "shard1", "shard2", "shard3" self.ring = HashRing(nodes=[shard1, shard2, shard3]) self.shard_connections = { shard1: self._shard1_engine, shard2: self._shard2_engine, shard3: self._shard3_engine, } self.shard_cursors = { shard1: self._shard1_cur, shard2: self._shard2_cur, shard3: self._shard3_cur, } def select_node(self, url: str): return self.ring.get_node(url) @staticmethod def digest_url(url: str): return md5(url.encode()).hexdigest()[:5] def insert_url(self, url: str): url_id = self.digest_url(url=url) node = self.select_node(url=url_id) print(f"inserting '{url}' to: {node}") selected_shard = self.shard_cursors[node] selected_shard.execute(""" insert into url_table(url, url_id) values (%s, %s) """, (url, url_id)) selected_con = self.shard_connections[node] selected_con.commit() def get_url(self, url: str): url_id = self.digest_url(url=url) node = self.select_node(url=url_id) selected_node = self.shard_cursors[node] selected_node.execute( "select * from url_table where url_id = %s", ((url_id,)) ) return selected_node.fetchall()
def test_weight_fn(): ring = HashRing(nodes={ 'node1': 1, 'node2': 1, 'node3': 1 }, replicas=4, vnodes=40, hash_fn='ketama', weight_fn=weight_fn) assert ring.distribution['node1'] == 80 assert ring.distribution['node2'] == 160 assert ring.distribution['node3'] == 240 ring.regenerate assert ring.distribution['node1'] == 80 assert ring.distribution['node2'] == 160 assert ring.distribution['node3'] == 240 with pytest.raises(TypeError): ring = HashRing(nodes={ 'node1': 1, 'node2': 1, 'node3': 1 }, replicas=4, vnodes=40, hash_fn='ketama', weight_fn=12) with pytest.raises(TypeError): ring = HashRing(nodes={ 'node1': 1, 'node2': 1, 'node3': 1 }, replicas=4, vnodes=40, hash_fn='ketama', weight_fn='coconut')
def test_empty_ring(): ring = HashRing() assert ring.get('test') == None
num = 1000000 print('running {} key generation comparison'.format(num)) # ketama C binding if ketama: with NamedTemporaryFile(prefix='benchmark_') as ketama_config_file: ketama_config_file.write("127.0.0.1:11211\t600\n") ketama_config_file.write("127.0.0.1:11212\t400\n") ketama_config_file.flush() kt = ketama.Continuum(ketama_config_file.name) pt = time() for i in range(num): key = 'myval-{}'.format(i) kt.get_server(key) print('ketama took {} ms'.format(time() - pt)) # pure python implementation ring = HashRing( nodes={'127.0.0.1:11211': 600, '127.0.0.1:11212': 400}, replicas=4, vnodes=40, compat=True) pt = time() for i in range(num): key = 'myval-{}'.format(i) ring.get_server(key) print('HashRing took {} ms'.format(time() - pt))
class DistributedClient(ClientMixin): """This is intended to be a client class which implement standard cache interface that common libs do... It tries to distribute keys over the specified servers using `HashRing` consistent hash. """ def __init__(self, servers=('127.0.0.1:11211',), username=None, password=None, compression=None, socket_timeout=SOCKET_TIMEOUT, pickle_protocol=0, pickler=pickle.Pickler, unpickler=pickle.Unpickler): super(DistributedClient, self).__init__(servers, username, password, compression, socket_timeout, pickle_protocol, pickler, unpickler) self._ring = HashRing(self._servers) def _get_server(self, key): return self._ring.get_node(key) def delete(self, key, cas=0): """ Delete a key/value from server. If key does not exist, it returns True. :param key: Key's name to be deleted :param cas: CAS of the key :return: True in case o success and False in case of failure. """ server = self._get_server(key) return server.delete(key, cas) def delete_multi(self, keys): servers = defaultdict(list) for key in keys: server_key = self._get_server(key) servers[server_key].append(key) return all([server.delete_multi(keys_) for server, keys_ in servers.items()]) def set(self, key, value, time=0, compress_level=-1): """ Set a value for a key on server. :param key: Key's name :type key: str :param value: A value to be stored on server. :type value: object :param time: Time in seconds that your key will expire. :type time: int :param compress_level: How much to compress. 0 = no compression, 1 = fastest, 9 = slowest but best, -1 = default compression level. :type compress_level: int :return: True in case of success and False in case of failure :rtype: bool """ server = self._get_server(key) return server.set(key, value, time, compress_level) def set_multi(self, mappings, time=0, compress_level=-1): """ Set multiple keys with it's values on server. :param mappings: A dict with keys/values :type mappings: dict :param time: Time in seconds that your key will expire. :type time: int :param compress_level: How much to compress. 0 = no compression, 1 = fastest, 9 = slowest but best, -1 = default compression level. :type compress_level: int :return: True in case of success and False in case of failure :rtype: bool """ returns = [] if not mappings: return False server_mappings = defaultdict(dict) for key, value in mappings.items(): server_key = self._get_server(key) server_mappings[server_key].update([(key, value)]) for server, m in server_mappings.items(): returns.append(server.set_multi(m, time, compress_level)) return all(returns) def add(self, key, value, time=0, compress_level=-1): """ Add a key/value to server ony if it does not exist. :param key: Key's name :type key: six.string_types :param value: A value to be stored on server. :type value: object :param time: Time in seconds that your key will expire. :type time: int :param compress_level: How much to compress. 0 = no compression, 1 = fastest, 9 = slowest but best, -1 = default compression level. :type compress_level: int :return: True if key is added False if key already exists :rtype: bool """ server = self._get_server(key) return server.add(key, value, time, compress_level) def replace(self, key, value, time=0, compress_level=-1): """ Replace a key/value to server ony if it does exist. :param key: Key's name :type key: six.string_types :param value: A value to be stored on server. :type value: object :param time: Time in seconds that your key will expire. :type time: int :param compress_level: How much to compress. 0 = no compression, 1 = fastest, 9 = slowest but best, -1 = default compression level. :type compress_level: int :return: True if key is replace False if key does not exists :rtype: bool """ server = self._get_server(key) return server.replace(key, value, time, compress_level) def get(self, key, get_cas=False): """ Get a key from server. :param key: Key's name :type key: six.string_types :param get_cas: If true, return (value, cas), where cas is the new CAS value. :type get_cas: boolean :return: Returns a key data from server. :rtype: object """ server = self._get_server(key) value, cas = server.get(key) if value is not None: if get_cas: return value, cas return value if get_cas: return None, None def get_multi(self, keys, get_cas=False): """ Get multiple keys from server. :param keys: A list of keys to from server. :type keys: list :param get_cas: If get_cas is true, each value is (data, cas), with each result's CAS value. :type get_cas: boolean :return: A dict with all requested keys. :rtype: dict """ servers = defaultdict(list) d = {} for key in keys: server_key = self._get_server(key) servers[server_key].append(key) for server, keys in servers.items(): results = server.get_multi(keys) if not get_cas: # Remove CAS data for key, (value, cas) in results.items(): results[key] = value d.update(results) return d def gets(self, key): server = self._get_server(key) return server.get(key) def cas(self, key, value, cas, time=0, compress_level=-1): """ Set a value for a key on server if its CAS value matches cas. :param key: Key's name :type key: six.string_types :param value: A value to be stored on server. :type value: object :param cas: The CAS value previously obtained from a call to get*. :type cas: int :param time: Time in seconds that your key will expire. :type time: int :param compress_level: How much to compress. 0 = no compression, 1 = fastest, 9 = slowest but best, -1 = default compression level. :type compress_level: int :return: True in case of success and False in case of failure :rtype: bool """ server = self._get_server(key) return server.cas(key, value, cas, time, compress_level) def incr(self, key, value): """ Increment a key, if it exists, returns it's actual value, if it don't, return 0. :param key: Key's name :type key: six.string_types :param value: Number to be incremented :type value: int :return: Actual value of the key on server :rtype: int """ server = self._get_server(key) return server.incr(key, value) def decr(self, key, value): """ Decrement a key, if it exists, returns it's actual value, if it don't, return 0. Minimum value of decrement return is 0. :param key: Key's name :type key: six.string_types :param value: Number to be decremented :type value: int :return: Actual value of the key on server :rtype: int """ server = self._get_server(key) return server.decr(key, value)
def __init__(self, servers=('127.0.0.1:11211',), username=None, password=None, compression=None, socket_timeout=SOCKET_TIMEOUT, pickle_protocol=0, pickler=pickle.Pickler, unpickler=pickle.Unpickler): super(DistributedClient, self).__init__(servers, username, password, compression, socket_timeout, pickle_protocol, pickler, unpickler) self._ring = HashRing(self._servers)
def test_empty_ring(): ring = HashRing() assert ring.get("test") == None
def test_print_without_error(ring): assert ring.print_continuum() == None ring = HashRing() assert ring.print_continuum() == None
def test_ketama_hashi(): if not ketama: return ring = HashRing() assert ring.hashi('test') == ketama.hashi('test')