def check_cache_miss(keys, mhb, before=None):
    before = Counter() if before is None else before
    before_keys = before.keys()

    cnt = OrderedDict()
    log = logger.get_logger(check_cache_miss)
    was_not_cached_count = 0
    for key in keys:
        node = mhb.find_node_for_key(key=key)
        value, was_cached = node.get_or_if_not_present_set(key=key)
        if was_cached is False:
            if node.name not in cnt:
                cnt[node.name] = 1
            else:
                cnt[node.name] += 1
            was_not_cached_count += 1

    after = mhb.calculate_distribution()
    after_keys = after.keys()

    out = []

    for before_key in before_keys:
        if before_key not in after_keys:
            out.append("REMOVE {} -{}".format(before_key, before[before_key]))
    for after_key in after_keys:
        if after_key in before_keys and before[after_key] != after[after_key]:
            out.append("CHANGE {} {}".format(
                after_key, after[after_key] - before[after_key]))
        if after_key not in before_keys:
            out.append("ADD {} +{}".format(after_key, after[after_key]))

    log.warn("CACHE TRANSFER(%) -> {} => {}".format(
        was_not_cached_count * 100.0 / len(keys), out))
    return after
def main(number_of_nodes=15, dataset_size=10**5, number_of_virtual_nodes=1):
    log = logger.get_logger(main)
    log.info("NUMBER OF NODES: {}".format(number_of_nodes))
    log.info("DATASET SIZE: {}".format(dataset_size))
    log.info("NUMBER OF VIRTUAL NODES: {}".format(number_of_virtual_nodes))
    log.info("NUMBER OF TOTAL NODES: {} * {} = {}".format(
        number_of_nodes, number_of_virtual_nodes,
        number_of_nodes * number_of_virtual_nodes))

    mhb = ConsistentHashingBenchmark(
        number_of_nodes=number_of_nodes,
        cache_size=int(math.ceil(dataset_size * 1.2 / (number_of_nodes - 1))),
        number_of_virtual_nodes=number_of_virtual_nodes)
    keys = range(0, dataset_size)

    init_benchmark_data(keys, mhb)
    cnt = check_cache_miss(keys, mhb)
    mhb.add_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.remove_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.add_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.remove_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.add_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.remove_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.add_node()
    cnt = check_cache_miss(keys, mhb, cnt)
    mhb.remove_node()
    cnt = check_cache_miss(keys, mhb, cnt)
示例#3
0
def main(number_of_nodes=15, dataset_size=10**5):

    log = logger.get_logger(main)
    log.info("Number of nodes - {}".format(number_of_nodes))
    log.info("Dataset Size - {}".format(dataset_size))

    mhb = ModuloHashingBenchmark(number_of_nodes=number_of_nodes, cache_size=int(math.ceil(dataset_size/(number_of_nodes-1))))
    keys = range(0, dataset_size)

    init_benchmark_data(keys, mhb)
    check_cache_miss(keys, mhb)
    mhb.add_node()
    check_cache_miss(keys, mhb)
    mhb.remove_node()
    check_cache_miss(keys, mhb)
    mhb.add_node()
    check_cache_miss(keys, mhb)
    mhb.remove_node()
    check_cache_miss(keys, mhb)
    mhb.add_node()
    check_cache_miss(keys, mhb)
    mhb.remove_node()
    check_cache_miss(keys, mhb)
    mhb.add_node()
    check_cache_miss(keys, mhb)
    mhb.remove_node()
    check_cache_miss(keys, mhb)
 def remove_node(self):
     index = random.randint(0, len(self.nodes) - 1)
     self.nodes.pop(index)
     log = logger.get_logger(self.remove_node)
     log.info(
         "Removed a random node from the cluster, Total Nodes - {} Index - {}"
         .format(len(self.nodes), index))
 def create_node(self, cache_size):
     node = Node(name=fake.user_name(),
                 ip=fake.ipv4(),
                 cache_size=cache_size)
     log = logger.get_logger(self.create_node)
     log.info("Created a new node, Name - {}, IP - {}".format(
         node.name, node.ip))
     return node
示例#6
0
def check_cache_miss(keys, mhb):

    was_not_cached_count = 0
    for key in keys:
        node = mhb.find_node_for_key(key=key)
        value, was_cached = node.get_or_if_not_present_set(key=key)
        if was_cached is False:
            was_not_cached_count += 1

    log = logger.get_logger(check_cache_miss)
    log.info("Cache Miss % - {}".format(was_not_cached_count * 100.0 / len(keys)))
    def remove_node(self):
        index = random.randint(0, len(self.ordered_node_locations) - 1)
        location_hash = self.ordered_node_locations[index]
        node = self.location_to_node_map[location_hash]

        # Remove all virtual node's locations from list and remove all virtual node mappings to
        #  node from dict
        for virtual_location in node.virtual_node_locations:
            self.ordered_node_locations.remove(virtual_location)
            del self.location_to_node_map[virtual_location]

        log = logger.get_logger(self.remove_node)
        log.error("REMOVED NODE - {}".format(node.name))
        log.error("ORDER - {}".format([self.location_to_node_map[i].name for i in self.ordered_node_locations]))
        log.error("KEY-SPACE DISTRIBUTION - {}".format(self.calculate_distribution()))
        log.error("CACHE FILL(%) - {}".format([(n.name, len(n.cache)*100.0/n.cache_size) for n in set(self.location_to_node_map.values())]))
    def add_node(self):
        node = self.create_node(cache_size=self.cache_size)

        for i in range(self.number_of_virtual_nodes):
            node_location_hash = long(hashlib.sha256(str(node.ip + "virtual_node:" + str(i))).hexdigest(),
                                     16) % KEY_SPACE
            self.location_to_node_map[node_location_hash] = node

            # Populate node's virtual_node_locations list
            node.virtual_node_locations.append(node_location_hash)

            bisect.insort_right(self.ordered_node_locations,
                                node_location_hash)
        log = logger.get_logger(self.add_node)
        log.debug("ADDED NODE - {}".format(node.name))
        log.debug("ORDER - {}".format([self.location_to_node_map[i].name for i in self.ordered_node_locations]))
        log.debug("KEY-SPACE DISTRIBUTION - {}".format(self.calculate_distribution()))
        log.debug("CACHE FILL(%) - {}".format([(n.name, len(n.cache)*100.0/n.cache_size) for n in set(self.location_to_node_map.values())]))
def init_benchmark_data(keys, mhb):
    for key in keys:
        node = mhb.find_node_for_key(key=key)
        node.get_or_if_not_present_set(key=key)
    log = logger.get_logger(init_benchmark_data)
    log.warn("CREATING INITIAL BENCHMARK DATA COMPLETE")
__author__ = 'aj'
# -*- coding: UTF-8 -*-

# Use MySQL Connector Module : pyMySQL (0.6.2)
import pymysql
import unittest
from base import baseutil
from base import logger

_log = logger.get_logger(baseutil.get_filename(__file__))



# Test Class : OpenServiceDataImporter 를 테스트한다.
#
class _DbConnectionTest(unittest.TestCase):

    def test_dbConnectionTest(self):
        conn = pymysql.connect(host='127.0.0.1', port=3306, user='******', passwd='subway', db='apidata_subway')
        cur = conn.cursor()
        cur.execute("SELECT vid,value FROM testTable")
        print(cur.description)
        print()
        for row in cur:
           print(row)

        cur.close()
        conn.close()
        # self.assertNotEqual(None, None)

 def add_node(self):
     self.nodes.append(self.create_node(cache_size=self.cache_size))
     log = logger.get_logger(self.add_node)
     log.info("Added a new node to the cluster, Total Nodes - {}".format(
         len(self.nodes)))
__author__ = 'aj'
# -*- coding: UTF-8 -*-

from externapi.seoul.imports.import_base import *
from externapi.seoul.SubwayOpenApi import *
from externapi.seoul.DbSubwaySeoul import DbSeoulSubway
import unittest
from base import logger

_log = logger.get_logger("IMPORTER-SEOUL")

# 모든 지하철역 코드와 WEEK, INOUT 을 조합하여 REST 요청하고, 해당결과를 DB 에 저장한다.
def importStart():

    dss = DbSeoulSubway()
    if dss.open_api_service_connection() is False:
        raise OpenServiceDataImporterException("Can't not Open Database Connection")

    svc_nm = SB_SERVICE.SVC_SEARCHVIASTNARRIVALTIMEBYTRAINSERVICE
    dss.tables_truncate(svc_nm)

    trains      = dss.get_all_train_no()
    inouts      = SB_API_PARAM.INOUT_TAG.list()
    weeks       = SB_API_PARAM.WEEK_TAG.list()


    for train in trains:
        for inout in inouts:
            for week in weeks:
                sub_result = _importSpecific(dss, svc_nm, train, week, inout)
                if sub_result is False:
示例#13
0
def init_benchmark_data(keys, mhb):
    for key in keys:
        node = mhb.find_node_for_key(key=key)
        node.get_or_if_not_present_set(key=key)
    log = logger.get_logger(init_benchmark_data)
    log.info("Creating Initial Benchmark Data Complete")