示例#1
0
def queryRunner():

    hosts = None
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG + "_status")

    if clusterStatus:
        hosts = clusterStatus.get_all_hosts()

    # retreive all active query workloads
    queries = CacheHelper.active_queries()
    for query in queries:

        # async update query workload object
        updateQueryWorkload.apply_async(args=[query])

        count = int(query.qps)
        filters = list(set(query.include_filters) -\
                       set(query.exclude_filters))
        params = generateQueryParams(query.indexed_key, query.bucket, filters,
                                     query.limit, query.startkey, query.endkey,
                                     query.startkey_docid, query.endkey_docid)
        multi_query.delay(count,
                          query.ddoc,
                          query.view,
                          params,
                          query.bucket,
                          query.password,
                          hosts=hosts)
    def setitup(self):
        # if user forget to assign the number of initial nodes for any cluster
        # use 1 node as default
        if len(self._num_initial_nodes) < len(self._clusters_keys_olst):
            diff = len(self._clusters_keys_olst) - len(self._num_initial_nodes)
            for i in range(diff):
                self._num_initial_nodes.append('1')

        for key in self._clusters_keys_olst:
            clusterStatus = None
            if key == 0:
                clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or ClusterStatus()
            else:
                clusterStatus = CacheHelper.clusterstatus(cfg.CB_REMOTE_CLUSTER_TAG[key-1]+"_status") or\
                    ClusterStatus(cfg.CB_REMOTE_CLUSTER_TAG[key-1]+"_status")

            clusterStatus.all_available_hosts = ["%s:%s" % (node.ip, node.port) for node in self._clusters_dic[key]]

            self.set_the_cluster_up(self._clusters_dic[key][:int(self._num_initial_nodes[key])])

        time.sleep(20)

        if self._xdcr:
            self._link_create_replications(self._s_master, self._d_master, "cluster1")
            if self._rdirection == "bidirection":
                self._link_create_replications(self._d_master, self._s_master, "cluster0")
示例#3
0
    def setitup(self):
        # if user forget to assign the number of initial nodes for any cluster
        # use 1 node as default
        if len(self._num_initial_nodes) < len(self._clusters_keys_olst):
            diff = len(self._clusters_keys_olst) - len(self._num_initial_nodes)
            for i in range(diff):
                self._num_initial_nodes.append('1')

        for key in self._clusters_keys_olst:
            clusterStatus = None
            if key == 0:
                clusterStatus = CacheHelper.clusterstatus(
                    cfg.CB_CLUSTER_TAG + "_status") or ClusterStatus()
            else:
                clusterStatus = CacheHelper.clusterstatus(cfg.CB_REMOTE_CLUSTER_TAG[key-1]+"_status") or\
                    ClusterStatus(cfg.CB_REMOTE_CLUSTER_TAG[key-1]+"_status")

            clusterStatus.all_available_hosts = [
                "%s:%s" % (node.ip, node.port)
                for node in self._clusters_dic[key]
            ]

            self.set_the_cluster_up(
                self._clusters_dic[key][:int(self._num_initial_nodes[key])])

        time.sleep(20)

        if self._xdcr:
            self._link_create_replications(self._s_master, self._d_master,
                                           "cluster1")
            if self._rdirection == "bidirection":
                self._link_create_replications(self._d_master, self._s_master,
                                               "cluster0")
示例#4
0
def queryRunner():

    hosts = None
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status")

    if clusterStatus:
        hosts = clusterStatus.get_all_hosts()

    # retreive all active query workloads
    queries = CacheHelper.active_queries()
    for query in queries:

        # async update query workload object
        updateQueryWorkload.apply_async(args=[query])

        count = int(query.qps)
        filters = list(set(query.include_filters) -\
                       set(query.exclude_filters))
        params = generateQueryParams(query.indexed_key,
                                     query.bucket,
                                     filters,
                                     query.limit,
                                     query.startkey,
                                     query.endkey,
                                     query.startkey_docid,
                                     query.endkey_docid)
        multi_query.delay(count,
                          query.ddoc,
                          query.view,
                          params,
                          query.bucket,
                          query.password,
                          hosts = hosts)
示例#5
0
def report_kv_latency(bucket = "default"):

    if cfg.SERIESLY_IP == '':
        # seriesly not configured
        return

    rabbitHelper = report_kv_latency.rabbitHelper
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or\
        ClusterStatus()

    host = clusterStatus.get_random_host()
    if host is None: return

    ip, port = host.split(':')

    workloads = CacheHelper.workloads()
    for workload in workloads:
        if workload.active and workload.bucket == bucket:

            # read workload params
            bucket = str(workload.bucket)
            password = str(workload.password)

            # read template from active workload
            template = Template.from_cache(str(workload.template))
            template = template.__dict__
            client.decodeMajgicStrings(template)

            # setup key/val to use for timing
            key = _random_string(12)
            value = json.dumps(template['kv'])
            get_key = key


            # for get op, try to pull from consume_queue
            # so that we can calc impact of dgm
            consume_queue = workload.consume_queue
            if consume_queue is not None:
                keys = rabbitHelper.getJsonMsg(str(consume_queue), requeue = True)
                if len(keys) > 0:
                    get_key = str(keys[0])

            # collect op latency
            set_latency = client.mc_op_latency('set', key, value, ip, port, bucket, password)
            get_latency = client.mc_op_latency('get', get_key, value, ip, port, bucket, password)
            delete_latency = client.mc_op_latency('delete', key, value, ip, port, bucket, password)


            # report to seriessly
            seriesly = Seriesly(cfg.SERIESLY_IP, 3133)
            db='fast'
            seriesly[db].append({'set_latency' : set_latency,
                                 'get_latency' : get_latency,
                                 'delete_latency' : delete_latency})
示例#6
0
def perform_admin_tasks(adminMsg, cluster_id=cfg.CB_CLUSTER_TAG + "_status"):
    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id)
    if clusterStatus is None:
        logger.error("Unable to fetch clusterStatus from cache")
        return

    rest = clusterStatus.node_rest()

    # Add nodes
    servers = adminMsg["rebalance_in"]
    add_nodes(rest, servers, cluster_id)

    # Get all nodes
    allNodes = []
    for node in rest.node_statuses():
        allNodes.append(node.id)

    # Remove nodes
    servers = adminMsg["rebalance_out"]
    toBeEjectedNodes = remove_nodes(rest, servers, adminMsg["involve_orchestrator"], cluster_id)

    # Failover Node
    servers = adminMsg["failover"]
    auto_failover_servers = adminMsg["auto_failover"]
    only_failover = adminMsg["only_failover"]
    add_back_servers = adminMsg["add_back"]
    failoverNodes = failover_nodes(rest, servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)
    autoFailoverNodes = auto_failover_nodes(
        rest, auto_failover_servers, only_failover, adminMsg["involve_orchestrator"], cluster_id
    )

    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id) or ClusterStatus(cluster_id)
    rest = clusterStatus.node_rest()
    addBackNodes = add_back_nodes(rest, add_back_servers, autoFailoverNodes + failoverNodes)
    toBeEjectedNodes.extend(failoverNodes)
    toBeEjectedNodes.extend(autoFailoverNodes)
    for node in addBackNodes:
        toBeEjectedNodes.remove(node)

    # SoftRestart a node
    servers = adminMsg["soft_restart"]
    restart(servers, cluster_id=cluster_id)

    # HardRestart a node
    servers = adminMsg["hard_restart"]
    restart(servers, type="hard", cluster_id=cluster_id)

    if not only_failover and (len(allNodes) > 0 or len(toBeEjectedNodes) > 0):
        logger.error("Rebalance")
        logger.error(allNodes)
        logger.error(toBeEjectedNodes)
        rest.rebalance(otpNodes=allNodes, ejectedNodes=toBeEjectedNodes)
示例#7
0
def perform_admin_tasks(adminMsg, cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id)
    if clusterStatus is None:
        logger.error("Unable to fetch clusterStatus from cache")
        return

    rest = clusterStatus.node_rest()

    # Add nodes
    servers = adminMsg["rebalance_in"]
    add_nodes(rest, servers, cluster_id)

    # Get all nodes
    allNodes = []
    for node in rest.node_statuses():
        allNodes.append(node.id)

    # Remove nodes
    servers = adminMsg["rebalance_out"]
    toBeEjectedNodes  = remove_nodes(rest, servers, adminMsg["involve_orchestrator"], cluster_id)

    # Failover Node
    servers = adminMsg["failover"]
    auto_failover_servers = adminMsg["auto_failover"]
    only_failover = adminMsg["only_failover"]
    add_back_servers = adminMsg["add_back"]
    failoverNodes = failover_nodes(rest, servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)
    autoFailoverNodes = auto_failover_nodes(rest, auto_failover_servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)

    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id) or ClusterStatus(cluster_id)
    rest = clusterStatus.node_rest()
    addBackNodes = add_back_nodes(rest, add_back_servers, autoFailoverNodes+failoverNodes)
    toBeEjectedNodes.extend(failoverNodes)
    toBeEjectedNodes.extend(autoFailoverNodes)
    for node in addBackNodes:
        toBeEjectedNodes.remove(node)

    # SoftRestart a node
    servers = adminMsg["soft_restart"]
    restart(servers, cluster_id=cluster_id)

    # HardRestart a node
    servers = adminMsg["hard_restart"]
    restart(servers, type='hard', cluster_id=cluster_id)

    if not only_failover and (len(allNodes) > 0 or len(toBeEjectedNodes) > 0):
        logger.error("Rebalance")
        logger.error(allNodes)
        logger.error(toBeEjectedNodes)
        rest.rebalance(otpNodes=allNodes, ejectedNodes=toBeEjectedNodes)
示例#8
0
def updateQueryWorkload(query):
    workloads = CacheHelper.workloads()

    for workload in workloads:
        if workload.active and workload.bucket == query.bucket:
            key = query.indexed_key
            workload.updateIndexKeys(key)
示例#9
0
def updateQueryWorkload(query):
    workloads = CacheHelper.workloads()

    for workload in workloads:
        if workload.active and workload.bucket == query.bucket:
            key = query.indexed_key
            workload.updateIndexKeys(key)
示例#10
0
def pick_nodesToRemove(servers='',
                       involve_orchestrator=False,
                       cluster_id=cfg.CB_CLUSTER_TAG + "_status"):
    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id)
        count = int(servers)
        temp_count = count
        servers = []
        if involve_orchestrator:
            servers.append("%s:%s" % (clusterStatus.orchestrator.ip,
                                      clusterStatus.orchestrator.port))
            temp_count = temp_count - 1

        if len(clusterStatus.nodes) > count:
            non_orchestrator_servers = list(
                set(clusterStatus.get_all_hosts()) - set([
                    "%s:%s" % (clusterStatus.orchestrator.ip,
                               clusterStatus.orchestrator.port)
                ]))
            servers.extend(non_orchestrator_servers[:temp_count])
        else:
            logger.error(
                "Remove nodes request invalid. # of nodes in cluster is not enough"
            )
            return []

    return servers
示例#11
0
def add_nodes(rest,
              servers='',
              cluster_id=cfg.CB_CLUSTER_TAG + "_status",
              zone_name='',
              services=None):
    # create zone if it does not exit
    if zone_name != '':
        if rest.is_zone_exist(zone_name) == False:
            rest.add_zone(zone_name)

    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id)
        count = int(servers)
        if (len(clusterStatus.all_available_hosts) -
                len(clusterStatus.nodes)) >= int(count):
            servers = list(
                set(clusterStatus.all_available_hosts) -
                set(clusterStatus.get_all_hosts()))
        else:
            logger.error(
                "Add nodes request invalid. # of nodes outside cluster is not enough"
            )
            return
        servers = servers[:count]
    for server in servers:
        logger.error("Adding node %s" % server)
        ip, port = parse_server_arg(server)
        if services:
            rest.add_node(cfg.COUCHBASE_USER, cfg.COUCHBASE_PWD, ip, port,
                          zone_name, services)
        else:
            rest.add_node(cfg.COUCHBASE_USER, cfg.COUCHBASE_PWD, ip, port,
                          zone_name)
示例#12
0
def queryConsumer(queryQueue="query_default"):

    rabbitHelper = queryConsumer.rabbitHelper
    queryQueueSize = rabbitHelper.qsize(queryQueue)

    # for cli retreive currently active query workload
    # since multi-query is not supported here
    active_query = None
    all_queries = CacheHelper.active_queries()
    if len(all_queries) > 0:
        active_query = all_queries[0]

    if queryQueueSize > 0:

        # setup new query workload from queued message
        queryMsg = rabbitHelper.getJsonMsg(queryQueue)
        logger.error(queryMsg)
        try:
            queryWorkload = QueryWorkload(queryMsg)

            # deactivate old query workload
            if active_query is not None:
                active_query.active = False

            # activate new query workload
            # to be detected in queryRunner task
            queryWorkload.active = True

            if 'rcq' in queryMsg:
                rabbitHelper.putMsg(queryMsg['rcq'], "Started Querying: %s/%s" % \
                    (queryWorkload.ddoc, queryWorkload.view))

        except KeyError:
            logger.info("Invalid query workload message: %s" % queryMsg)
示例#13
0
def restart(servers='', type='soft', cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id) or ClusterStatus(cluster_id)
        count = int(servers)
        if len(clusterStatus.nodes) >= int(count):
            servers = clusterStatus.get_all_hosts()
        else:
            logger.error("Restart nodes request invalid. # of nodes in cluster is not enough")
            return
        servers = servers[:count]

    for server in servers:
        ip, port = parse_server_arg(server)
        node_ssh, node = create_ssh_conn(ip)
        if type is not 'soft':
            logger.error('Hard Restart')
            cmd = "reboot"
        else:
            logger.error('Soft Restart')
            cmd = "/etc/init.d/couchbase-server restart"

        logger.error(cmd)
        result = node_ssh.execute_command(cmd, node)
        logger.error(result)
def postcondition_handler():

    workloads = CacheHelper.workloads()
    for workload in workloads:
        if workload.postcondition_handler and workload.active:
            bucket = workload.bucket
            bs = BucketStatus.from_cache(bucket)
            bs.block(bucket)
            status = True

            try:
                postcondition_handler = \
                    getattr(phandler,
                            workload.postcondition_handler)

                status = postcondition_handler(workload)

            except AttributeError:
                logger.error("Postcondition method %s doesn't exist" \
                             % workload.postcondition_handler)
                workload.postcondition = None
                workload.postcondition_handler = None


            if status == True:
                # unblock bucket and deactivate workload
                bs = BucketStatus.from_cache(bucket)
                bs.unblock(bucket)
                workload.active = False
示例#15
0
def queryConsumer(queryQueue = "query_default"):

    rabbitHelper = queryConsumer.rabbitHelper
    queryQueueSize = rabbitHelper.qsize(queryQueue)

    # for cli retreive currently active query workload
    # since multi-query is not supported here
    active_query = None
    all_queries = CacheHelper.active_queries()
    if len(all_queries) > 0:
        active_query = all_queries[0]

    if queryQueueSize> 0:

        # setup new query workload from queued message
        queryMsg = rabbitHelper.getJsonMsg(queryQueue)
        logger.error(queryMsg)
        try:
            queryWorkload = QueryWorkload(queryMsg)

            # deactivate old query workload
            if active_query is not None:
                active_query.active = False


            # activate new query workload
            # to be detected in queryRunner task
            queryWorkload.active = True

            if 'rcq' in queryMsg:
                rabbitHelper.putMsg(queryMsg['rcq'], "Started Querying: %s/%s" % \
                    (queryWorkload.ddoc, queryWorkload.view))

        except KeyError:
            logger.info("Invalid query workload message: %s" % queryMsg)
示例#16
0
def postcondition_handler():

    workloads = CacheHelper.workloads()
    for workload in workloads:
        if workload.postcondition_handler and workload.active:
            bucket = workload.bucket
            bs = BucketStatus.from_cache(bucket)
            bs.block(bucket)
            status = True

            try:
                postcondition_handler = \
                    getattr(phandler,
                            workload.postcondition_handler)

                status = postcondition_handler(workload)

            except AttributeError:
                logger.error("Postcondition method %s doesn't exist" \
                             % workload.postcondition_handler)
                workload.postcondition = None
                workload.postcondition_handler = None

            if status == True:
                # unblock bucket and deactivate workload
                bs = BucketStatus.from_cache(bucket)
                bs.unblock(bucket)
                workload.active = False
示例#17
0
def restart(servers="", type="soft", cluster_id=cfg.CB_CLUSTER_TAG + "_status"):
    if servers.find(".") != -1 or servers == "":
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id) or ClusterStatus(cluster_id)
        count = int(servers)
        if len(clusterStatus.nodes) >= int(count):
            servers = clusterStatus.get_all_hosts()
        else:
            logger.error("Restart nodes request invalid. # of nodes in cluster is not enough")
            return
        servers = servers[:count]

    for server in servers:
        ip, port = parse_server_arg(server)
        node_ssh, node = create_ssh_conn(ip)
        if type is not "soft":
            logger.error("Hard Restart")
            cmd = "reboot"
        else:
            logger.error("Soft Restart")
            cmd = "/etc/init.d/couchbase-server restart"

        logger.error(cmd)
        result = node_ssh.execute_command(cmd, node)
        logger.error(result)
def restart(servers='', type='soft', cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id)
        count = int(servers)
        if len(clusterStatus.nodes) >= int(count):
            servers = clusterStatus.get_all_hosts()
        else:
            logger.error("Restart nodes request invalid. # of nodes in cluster is not enough")
            return
        servers = servers[:count]

    for server in servers:
        ip, port = parse_server_arg(server)
        node_ssh, node = create_ssh_conn(ip)
        if type is not 'soft':
            logger.error('Hard Restart')
            if cfg.COUCHBASE_OS == "windows":
                cmd = "shutdown -r -t 0"
            else:
                cmd = "reboot"
        else:
            logger.error('Soft Restart')
            if cfg.COUCHBASE_OS == "windows":
                cmd = "net stop couchbaseserver && net start couchbaseserver"
            else:
                cmd = "/etc/init.d/couchbase-server restart"

        logger.error(cmd)
        result = node_ssh.execute_command(cmd, node)
        logger.error(result)
示例#19
0
def queryRunner():

    # retreive all active query workloads
    queries = CacheHelper.active_queries()
    for query in queries:

        count = int(query.qps)
        params = {"stale": "update_after"}
        multi_query.delay(count, query.ddoc, query.view, params, query.bucket,
                          query.password)
示例#20
0
def queryRunner(max_msgs=10):

    rabbitHelper = queryRunner.rabbitHelper

    # check queue with pending http requests
    pending_http_requests = "query_multi_" + cfg.CB_CLUSTER_TAG
    if rabbitHelper.qsize(pending_http_requests) > max_msgs:

        # purge waiting tasks
        rabbitHelper.purge(pending_http_requests)
        query_ops_manager(max_msgs, True)

    else:

        hosts = None
        clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG +
                                                  "_status")

        if clusterStatus:
            hosts = clusterStatus.get_all_hosts()

        # retreive all active query workloads
        queries = CacheHelper.active_queries()
        for query in queries:

            # async update query workload object
            updateQueryWorkload.apply_async(args=[query])

            count = int(query.qps)
            filters = list(set(query.include_filters) -\
                           set(query.exclude_filters))
            params = generateQueryParams(query.indexed_key, query.bucket,
                                         filters, query.limit, query.startkey,
                                         query.endkey, query.startkey_docid,
                                         query.endkey_docid)
            multi_query.delay(count,
                              query.ddoc,
                              query.view,
                              params,
                              query.bucket,
                              query.password,
                              hosts=hosts)
示例#21
0
def queryRunner(max_msgs=10):

    rabbitHelper = queryRunner.rabbitHelper

    # check queue with pending http requests
    pending_http_requests = "query_multi_" + cfg.CB_CLUSTER_TAG
    if rabbitHelper.qsize(pending_http_requests) > max_msgs:

        # purge waiting tasks
        rabbitHelper.purge(pending_http_requests)
        query_ops_manager(max_msgs, True)

    else:

        hosts = None
        clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG + "_status")

        if clusterStatus:
            hosts = clusterStatus.get_all_hosts()

        # retreive all active query workloads
        queries = CacheHelper.active_queries()
        for query in queries:

            # async update query workload object
            updateQueryWorkload.apply_async(args=[query])

            count = int(query.qps)
            filters = list(set(query.include_filters) - set(query.exclude_filters))
            params = generateQueryParams(
                query.indexed_key,
                query.bucket,
                filters,
                query.limit,
                query.startkey,
                query.endkey,
                query.startkey_docid,
                query.endkey_docid,
            )
            multi_query.delay(count, query.ddoc, query.view, params, query.bucket, query.password, hosts=hosts)
def getClusterStat(bucket, stat):

    val = 0
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or\
        ClusterStatus()
    host = clusterStatus.get_random_host()
    stat_checker = phandler.BucketStatChecker(bucket, addr = host)
    stats = stat_checker.get_stats()
    if len(stats) > 0:
        if stat in stats:
            val = stats[stat]

    return val
示例#23
0
def getClusterStat(bucket, stat):

    val = 0
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or\
        ClusterStatus()
    host = clusterStatus.get_random_host()
    stat_checker = phandler.BucketStatChecker(bucket, addr=host)
    stats = stat_checker.get_stats()
    if len(stats) > 0:
        if stat in stats:
            val = stats[stat]

    return val
示例#24
0
def get_ep_hostip_from_params(params):
    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG + "_status")
    random_host = None
    try:
        random_host = clusterStatus.get_random_host().split(":")[0]
    except AttributeError:
        logger.error("Can not fetch cluster status information")
        pass

    host = params.get('ip') or random_host or cfg.COUCHBASE_IP
    port = params.get('port') or 11210

    return host, int(port)
示例#25
0
def queryRunner():

    # retreive all active query workloads
    queries = CacheHelper.active_queries()
    for query in queries:

        count = int(query.qps)
        params = {"stale" : "update_after"}
        multi_query.delay(count,
                          query.ddoc,
                          query.view,
                          params,
                          query.bucket,
                          query.password)
def get_ep_hostip_from_params(params):
    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status")
    random_host = None
    try:
        random_host = clusterStatus.get_random_host().split(":")[0]
    except AttributeError:
        logger.error("Can not fetch cluster status information")
        pass

    host = params.get('ip') or random_host or cfg.COUCHBASE_IP
    port = params.get('port') or 11210

    return host, int(port)
示例#27
0
def throttle_kv_ops(isovercommited=True):

    rabbitHelper = throttle_kv_ops.rabbitHelper

    workloads = CacheHelper.workloads()
    for workload in workloads:
       if workload.active:
           if isovercommited:
               # clear pending task_queue
               rabbitHelper.purge(workload.task_queue)

               # reduce ops by 10%
               workload.ops_per_sec = workload.ops_per_sec*0.90
               logger.error("Cluster Overcommited: reduced ops to (%s)" % workload.ops_per_sec)
def queue_op_cycles(workload):


    # read doc template
    template = Template.from_cache(str(workload.template))
    if template is None:
        logger.error("no doc template imported")
        return

    rabbitHelper = queue_op_cycles.rabbitHelper
    bucket = str(workload.bucket)
    task_queue = workload.task_queue

    active_hosts = None
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status")
    if clusterStatus is not None:
        active_hosts = clusterStatus.get_all_hosts()

    # create 30 op cycles
    for i in xrange(20):

        if workload.cc_queues is not None:
            # override template attribute with workload
            template.cc_queues = workload.cc_queues

        if len(workload.indexed_keys) > 0:
            template.indexed_keys = workload.indexed_keys

        # read  workload settings
        bucketInfo = {"bucket" : workload.bucket,
                      "password" : workload.password}

        ops_sec = workload.ops_per_sec

        create_count = int(ops_sec *  workload.create_perc/100)
        update_count = int(ops_sec *  workload.update_perc/100)
        get_count = int(ops_sec *  workload.get_perc/100)
        del_count = int(ops_sec *  workload.del_perc/100)
        exp_count = int(ops_sec *  workload.exp_perc/100)
        consume_queue =  workload.consume_queue

        ttl = workload.ttl
        miss_queue = workload.miss_queue
        miss_perc = workload.miss_perc

        generate_pending_tasks(task_queue, template, bucketInfo, create_count,
                               update_count, get_count, del_count, exp_count,
                               consume_queue, ttl, miss_perc, miss_queue, active_hosts)
示例#29
0
def queue_op_cycles(workload):

    # read doc template
    template = Template.from_cache(str(workload.template))
    if template is None:
        logger.error("no doc template imported")
        return

    rabbitHelper = queue_op_cycles.rabbitHelper
    bucket = str(workload.bucket)
    task_queue = workload.task_queue

    active_hosts = None
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG + "_status")
    if clusterStatus is not None:
        active_hosts = clusterStatus.get_all_hosts()

    # create 30 op cycles
    for i in xrange(20):

        if workload.cc_queues is not None:
            # override template attribute with workload
            template.cc_queues = workload.cc_queues

        if len(workload.indexed_keys) > 0:
            template.indexed_keys = workload.indexed_keys

        # read  workload settings
        bucketInfo = {"bucket": workload.bucket, "password": workload.password}

        ops_sec = workload.ops_per_sec

        create_count = int(ops_sec * workload.create_perc / 100)
        update_count = int(ops_sec * workload.update_perc / 100)
        get_count = int(ops_sec * workload.get_perc / 100)
        del_count = int(ops_sec * workload.del_perc / 100)
        exp_count = int(ops_sec * workload.exp_perc / 100)
        consume_queue = workload.consume_queue

        ttl = workload.ttl
        miss_queue = workload.miss_queue
        miss_perc = workload.miss_perc

        generate_pending_tasks(task_queue, template, bucketInfo, create_count,
                               update_count, get_count, del_count, exp_count,
                               consume_queue, ttl, miss_perc, miss_queue,
                               active_hosts)
示例#30
0
def taskScheduler():

    workloads = CacheHelper.workloads()

    rabbitHelper = taskScheduler.rabbitHelper
    tasks = []

    for workload in workloads:
        if workload.active:
            task_queue = workload.task_queue
            # dequeue subtasks
            if rabbitHelper.qsize(task_queue) > 0:
                tasks = rabbitHelper.getJsonMsg(task_queue)
                if tasks is not None and len(tasks) > 0:

                    # apply async
                    result = TaskSet(tasks=tasks).apply_async()
示例#31
0
def add_nodes(rest, servers='', cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id) or ClusterStatus(cluster_id)
        count = int(servers)
        if (len(clusterStatus.all_available_hosts) - len(clusterStatus.nodes)) >= int(count):
            servers = list(set(clusterStatus.all_available_hosts) - set(clusterStatus.get_all_hosts()))
        else:
            logger.error("Add nodes request invalid. # of nodes outside cluster is not enough")
            return
        servers = servers[:count]

    for server in servers:
        logger.error("Adding node %s" % server)
        ip, port = parse_server_arg(server)
        rest.add_node(cfg.COUCHBASE_USER, cfg.COUCHBASE_PWD, ip, port)
示例#32
0
def throttle_kv_ops(isovercommited=True):

    rabbitHelper = kv_ops_manager.rabbitHelper

    workloads = CacheHelper.workloads()
    for workload in workloads:
        if workload.active:
            if isovercommited:
                # clear pending task_queue
                rabbitHelper.purge(workload.task_queue)

                # reduce ops by 10%
                new_ops_per_sec = workload.ops_per_sec * 0.90
                if new_ops_per_sec > 5000:
                    workload.ops_per_sec = workload.ops_per_sec * 0.90
                    logger.error("Cluster Overcommited: reduced ops to (%s)" %
                                 workload.ops_per_sec)
示例#33
0
def taskScheduler():

    workloads = CacheHelper.workloads()

    rabbitHelper = taskScheduler.rabbitHelper
    tasks = []

    for workload in workloads:
        if workload.active:
            task_queue = workload.task_queue
            # dequeue subtasks
            if rabbitHelper.qsize(task_queue) > 0:
                tasks = rabbitHelper.getJsonMsg(task_queue)
                if tasks is not None and len(tasks) > 0:

                    # apply async
                    result = TaskSet(tasks = tasks).apply_async()
示例#34
0
def add_nodes(rest, servers='', cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    if servers.find('.') != -1:
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id) or ClusterStatus(cluster_id)
        count = int(servers)
        if (len(clusterStatus.all_available_hosts) - len(clusterStatus.nodes)) >= int(count):
            servers = list(set(clusterStatus.all_available_hosts) - set(clusterStatus.get_all_hosts()))
        else:
            logger.error("Rebalance in request invalid. # of nodes outside cluster is not enough")
            return
        servers = servers[:count]

    for server in servers:
        logger.error("Adding node %s" % server)
        ip, port = parse_server_arg(server)
        rest.add_node(cfg.COUCHBASE_USER, cfg.COUCHBASE_PWD, ip, port)
def postcondition_handler():

    workloads = CacheHelper.workloads()
    for workload in workloads:
        if workload.postconditions and workload.active:
            bucket = workload.bucket
            bs = BucketStatus.from_cache(bucket)
            bs.block(bucket)

            stat_checker = StatChecker(cfg.COUCHBASE_IP +":"+cfg.COUCHBASE_PORT,
                                       bucket = bucket,
                                       username = cfg.COUCHBASE_USER,
                                       password = cfg.COUCHBASE_PWD)
            status = stat_checker.check(workload.postconditions)
            if status == True:
                # unblock bucket and deactivate workload
                bs = BucketStatus.from_cache(bucket)
                bs.unblock(bucket)
                workload.active = False
def updateClusterStatus(ignore_result = True):

    done = False

    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or\
        ClusterStatus()

    # check cluster nodes
    cached_nodes = clusterStatus.nodes
    new_cached_nodes = []

    for node in cached_nodes:

        # get an active node
        if clusterStatus.http_ping_node(node) is not None:

            # get remaining nodes
            active_nodes = clusterStatus.get_cluster_nodes(node)

            # populate cache with healthy nodes
            for active_node in active_nodes:
                if active_node.status == 'healthy':
                    new_cached_nodes.append(active_node)

            break



    if len(new_cached_nodes) > 0:

        # check for update
        new_node_list = ["%s:%s" % (n.ip, n.port) for n in new_cached_nodes]

        if len(new_node_list) != len(cached_nodes) or\
            len(set(clusterStatus.get_all_hosts()).intersection(new_node_list)) !=\
                len(cached_nodes):
            clusterStatus.nodes = new_cached_nodes
            clusterStatus.update_orchestrator()
    else:
        clusterStatus.orchestrator = None
        ObjCacher().delete(CacheHelper.CLUSTERSTATUSKEY, clusterStatus)
示例#37
0
def query_ops_manager(max_msgs=10, isovercommited=False):

    rabbitHelper = query_ops_manager.rabbitHelper

    # retreive all active query workloads
    queries = CacheHelper.active_queries()
    for query in queries:

        # check if query tasks are overloaded
        if rabbitHelper.qsize(query.task_queue) > max_msgs or isovercommited:

            # purge waiting tasks
            rabbitHelper.purge(query.task_queue)

            # throttle down ops by 10%
            new_queries_per_sec = query.qps * 0.90

            # cannot reduce below 10 qps
            if new_queries_per_sec > 10:
                query.qps = new_queries_per_sec
                logger.error("Cluster Overcommited: reduced queries/sec to (%s)" % query.qps)
示例#38
0
def generate_node_stats_report():

    allnodestats = CacheHelper.allnodestats()

    if len(allnodestats) > 0:
        # print current time at top of each report
        # TODO: add active tasks at time of report generation
        ts = time.localtime()
        ts_string = "%s/%s/%s %s:%s:%s" %\
            (ts.tm_year, ts.tm_mon, ts.tm_mday, ts.tm_hour, ts.tm_min, ts.tm_sec)
        print_separator()
        logger.error("\tSTAT REPORT: (%s)" % ts_string)

        for node_stats in allnodestats:
            calculate_node_stat_results(node_stats)

            if len(node_stats.results) > 0:
                print_node_results(node_stats)
        logger.error("\tEND OF REPORT: (%s)" % ts_string)
        print_separator()
        new_line()
示例#39
0
def pick_nodesToRemove(servers='', involve_orchestrator=False, cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id)
        count = int(servers)
        temp_count = count
        servers = []
        if involve_orchestrator:
            servers.append("%s:%s" % (clusterStatus.orchestrator.ip, clusterStatus.orchestrator.port))
            temp_count = temp_count -1

        if len(clusterStatus.nodes) > count:
            non_orchestrator_servers = list(set(clusterStatus.get_all_hosts()) -
                               set(["%s:%s" % (clusterStatus.orchestrator.ip, clusterStatus.orchestrator.port)]))
            servers.extend(non_orchestrator_servers[:temp_count])
        else:
            logger.error("Remove nodes request invalid. # of nodes in cluster is not enough")
            return []

    return servers
示例#40
0
def postcondition_handler():

    workloads = CacheHelper.workloads()

    for workload in workloads:
        if workload.postconditions and workload.active:
            bucket = workload.bucket
            bs = BucketStatus.from_cache(bucket)
            bs.block(bucket)

            stat_checker = StatChecker(cfg.COUCHBASE_IP + ":" +
                                       cfg.COUCHBASE_PORT,
                                       bucket=bucket,
                                       username=cfg.COUCHBASE_USER,
                                       password=cfg.COUCHBASE_PWD)
            status = stat_checker.check(workload.postconditions)
            if status == True:
                # unblock bucket and deactivate workload
                bs = BucketStatus.from_cache(bucket)
                bs.unblock(bucket)
                workload.active = False
示例#41
0
def generate_node_stats_report():

    allnodestats = CacheHelper.allnodestats()

    if len(allnodestats) > 0:
        # print current time at top of each report
        # TODO: add active tasks at time of report generation
        ts = time.localtime()
        ts_string = "%s/%s/%s %s:%s:%s" %\
            (ts.tm_year, ts.tm_mon, ts.tm_mday, ts.tm_hour, ts.tm_min, ts.tm_sec)
        print_separator()
        logger.error("\tSTAT REPORT: (%s)" % ts_string)

        for node_stats in allnodestats:
            calculate_node_stat_results(node_stats)

            if len(node_stats.results) > 0:
                print_node_results(node_stats)
        logger.error("\tEND OF REPORT: (%s)" % ts_string)
        print_separator()
        new_line()
示例#42
0
def query_ops_manager(max_msgs=10, isovercommited=False):

    rabbitHelper = query_ops_manager.rabbitHelper

    # retreive all active query workloads
    queries = CacheHelper.active_queries()
    for query in queries:

        # check if query tasks are overloaded
        if rabbitHelper.qsize(query.task_queue) > max_msgs or isovercommited:

            # purge waiting tasks
            rabbitHelper.purge(query.task_queue)

            # throttle down ops by 10%
            new_queries_per_sec = query.qps * 0.90

            # cannot reduce below 10 qps
            if new_queries_per_sec > 10:
                query.qps = new_queries_per_sec
                logger.error("Cluster Overcommited: reduced queries/sec to (%s)" %\
                             query.qps)
示例#43
0
def updateClusterStatus(ignore_result=True):

    done = False

    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or\
        ClusterStatus()

    # check cluster nodes
    cached_nodes = clusterStatus.nodes
    new_cached_nodes = []

    for node in cached_nodes:

        # get an active node
        if clusterStatus.http_ping_node(node) is not None:

            # get remaining nodes
            active_nodes = clusterStatus.get_cluster_nodes(node)

            # populate cache with healthy nodes
            for active_node in active_nodes:
                if active_node.status == 'healthy':
                    new_cached_nodes.append(active_node)

            break

    if len(new_cached_nodes) > 0:

        # check for update
        new_node_list = ["%s:%s" % (n.ip, n.port) for n in new_cached_nodes]

        if len(new_node_list) != len(cached_nodes) or\
            len(set(clusterStatus.get_all_hosts()).intersection(new_node_list)) !=\
                len(cached_nodes):
            clusterStatus.nodes = new_cached_nodes
            clusterStatus.update_orchestrator()
    else:
        clusterStatus.orchestrator = None
        ObjCacher().delete(CacheHelper.CLUSTERSTATUSKEY, clusterStatus)
示例#44
0
def perform_xdcr_tasks(xdcrMsg):
    logger.error(xdcrMsg)
    src_master = create_server_obj()
    remote_id = ''
    if len(cfg.CB_REMOTE_CLUSTER_TAG) > 0:
        remote_id = cfg.CB_REMOTE_CLUSTER_TAG[0]+"_status"
    else:
        logger.error("No remote cluster tag. Can not create xdcr")
        return
    clusterStatus = CacheHelper.clusterstatus(remote_id) or ClusterStatus(remote_id)
    remote_ip = clusterStatus.get_random_host().split(":")[0]

    dest_master = create_server_obj(server_ip=remote_ip, username=xdcrMsg['dest_cluster_rest_username'],
                                    password=xdcrMsg['dest_cluster_rest_pwd'])
    dest_cluster_name = xdcrMsg['dest_cluster_name']
    xdcr_link_cluster(src_master, dest_master, dest_cluster_name)
    xdcr_start_replication(src_master, dest_cluster_name)

    if xdcrMsg['replication_type'] == "bidirection":
        src_cluster_name = dest_cluster_name + "_temp"
        xdcr_link_cluster(dest_master, src_master, src_cluster_name)
        xdcr_start_replication(dest_master, src_cluster_name)
def epengine_stat_checker(workload):

    postcondition = workload.postconditions

    if isinstance(postcondition, dict):
        params = parse_condition_dict(postcondition)
    else:
        params = parse_condition(postcondition)

    random_host, port = get_ep_hostip_from_params(params)

    status = True
    all_hosts = [random_host]
    if params['cluster_check'] == True:
        clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status")
        all_hosts = clusterStatus.get_all_hosts()

    for host in all_hosts:
        statChecker = EPStatChecker(host.split(":")[0], port)
        status &= statChecker.check(postcondition)

    return status
示例#46
0
def add_nodes(rest, servers='', cluster_id=cfg.CB_CLUSTER_TAG+"_status", zone_name = ''):

    # create zone if it does not exit
    if zone_name != '':
        if rest.is_zone_exist(zone_name) == False:
           rest.add_zone(zone_name)

    if servers.find('.') != -1 or servers == '':
        servers = servers.split()
    else:
        clusterStatus = CacheHelper.clusterstatus(cluster_id)
        count = int(servers)
        if (len(clusterStatus.all_available_hosts) - len(clusterStatus.nodes)) >= int(count):
            servers = list(set(clusterStatus.all_available_hosts) - set(clusterStatus.get_all_hosts()))
        else:
            logger.error("Add nodes request invalid. # of nodes outside cluster is not enough")
            return
        servers = servers[:count]

    for server in servers:
        logger.error("Adding node %s" % server)
        ip, port = parse_server_arg(server)
        rest.add_node(cfg.COUCHBASE_USER, cfg.COUCHBASE_PWD, ip, port, zone_name)
示例#47
0
def epengine_stat_checker(workload):

    postcondition = workload.postconditions

    if isinstance(postcondition, dict):
        params = parse_condition_dict(postcondition)
    else:
        params = parse_condition(postcondition)

    random_host, port = get_ep_hostip_from_params(params)

    status = True
    all_hosts = [random_host]
    if params['cluster_check'] == True:
        clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG +
                                                  "_status")
        all_hosts = clusterStatus.get_all_hosts()

    for host in all_hosts:
        statChecker = EPStatChecker(host.split(":")[0], port)
        status &= statChecker.check(postcondition)

    return status
示例#48
0
def taskScheduler():

    workloads = CacheHelper.workloads()

    rabbitHelper = taskScheduler.rabbitHelper
    tasks = []

    for workload in workloads:
        if workload.active:

            task_queue = workload.task_queue
            num_ready_tasks = rabbitHelper.qsize(task_queue)
            # dequeue subtasks
            if num_ready_tasks > 0:
                tasks = rabbitHelper.getJsonMsg(task_queue)
                if tasks is not None and len(tasks) > 0:

                    # apply async
                    result = TaskSet(tasks=tasks).apply_async()

            # check if more subtasks need to be queued
            if num_ready_tasks < 10:
                queue_op_cycles.delay(workload)
示例#49
0
def taskScheduler():

    workloads = CacheHelper.workloads()

    rabbitHelper = taskScheduler.rabbitHelper
    tasks = []

    for workload in workloads:
        if workload.active:

            task_queue = workload.task_queue
            num_ready_tasks = rabbitHelper.qsize(task_queue)
            # dequeue subtasks
            if num_ready_tasks > 0:
                tasks = rabbitHelper.getJsonMsg(task_queue)
                if tasks is not None and len(tasks) > 0:

                    # apply async
                    result = TaskSet(tasks = tasks).apply_async()


            # check if more subtasks need to be queued
            if num_ready_tasks < 10:
                queue_op_cycles.delay(workload)
示例#50
0
def setPhaseForStats(phase_name):

    allnodestats = CacheHelper.allnodestats()
    if len(allnodestats) > 0:
        for node_stats in allnodestats:
            node_stats.phase = phase_name
def perform_admin_tasks(adminMsg, cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id)
    if clusterStatus is None:
        logger.error("Unable to fetch clusterStatus from cache: ")
        return

    rest = clusterStatus.node_rest()

    # Add nodes
    servers = adminMsg["rebalance_in"]
    zone_name = adminMsg["group"]
    if adminMsg["services"]:
        add_nodes(rest, servers, cluster_id, zone_name, adminMsg["services"])
    else:
        add_nodes(rest, servers, cluster_id, zone_name)
        # Get all nodes
    allNodes = []
    for node in rest.node_statuses():
        allNodes.append(node.id)

    # Remove nodes
    servers = adminMsg["rebalance_out"]
    toBeEjectedNodes  = remove_nodes(rest, servers, adminMsg["involve_orchestrator"], cluster_id)

    # Failover Node
    servers = adminMsg["failover"]
    auto_failover_servers = adminMsg["auto_failover"]
    only_failover = adminMsg["only_failover"]
    add_back_servers = adminMsg["add_back"]
    failoverNodes = failover_nodes(rest, servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)
    autoFailoverNodes = auto_failover_nodes(rest, auto_failover_servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)

    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id)
    rest = clusterStatus.node_rest()
    addBackNodes = add_back_nodes(rest, add_back_servers, autoFailoverNodes+failoverNodes)
    toBeEjectedNodes.extend(failoverNodes)
    toBeEjectedNodes.extend(autoFailoverNodes)
    for node in addBackNodes:
        toBeEjectedNodes.remove(node)

    # SoftRestart a node
    servers = adminMsg["soft_restart"]
    restart(servers, cluster_id=cluster_id)

    # HardRestart a node
    servers = adminMsg["hard_restart"]
    restart(servers, type='hard', cluster_id=cluster_id)

    if adminMsg["soft_restart"] == '' and adminMsg["hard_restart"] == '':
        if not only_failover and (len(allNodes) > 0 or len(toBeEjectedNodes) > 0):
            logger.error("Rebalance")
            logger.error(allNodes)
            logger.error(toBeEjectedNodes)
            rest.rebalance(otpNodes=allNodes, ejectedNodes=toBeEjectedNodes)

    # do a soft rest on ejectedNodes that were failed over
    logger.error(toBeEjectedNodes)
    restartNodes = ""
    for node in toBeEjectedNodes:
        if node in (failoverNodes + autoFailoverNodes):
            if '@' in node:  # ns_X@hostname  formated
                node = node.split('@')[1]
            restartNodes = "%s %s" % (node, restartNodes)
    if len(restartNodes):
        restart(restartNodes)
示例#52
0
def setPhaseForStats(phase_name):

    allnodestats = CacheHelper.allnodestats()
    if len(allnodestats) > 0:
        for node_stats in allnodestats:
            node_stats.phase = phase_name
示例#53
0
from cache import CacheHelper
import testcfg as cfg

# make sure logdir exists
os.system("mkdir -p " + cfg.LOGDIR)

#make sure celeybeat-schedule.db file is deleted
os.system("rm -rf celerybeat-schedule.db")

# kill old background processes
kill_procs = ["consumer"]
for proc in kill_procs:
    os.system("ps aux | grep %s | awk '{print $2}' | xargs kill" % proc)

# delete queues (note using --purge will remove cc_queues)
queues = CacheHelper.task_queues() + CacheHelper.miss_queues()

# when --purge set delete cc_queue's as well
# as seriesly db
if "--purge" in sys.argv:

    queues = set(CacheHelper.queues())

    # cleaning up seriesly database (fast and slow created by cbtop)
    if cfg.SERIESLY_IP != '':
        from seriesly import Seriesly
        seriesly = Seriesly(cfg.SERIESLY_IP, 3133)
        dbs = seriesly.list_dbs()
        for db in dbs:
            seriesly.drop_db(db)
示例#54
0
def cacheVariable(cacheMsg):
    bucket = cacheMsg.get("bucket") or "default"
    ref = str(cacheMsg.get("reference") or "default_key")
    stat = cacheMsg.get("stat") or "curr_items"
    value = getClusterStat(bucket, stat)
    CacheHelper.cachePhaseVar(ref, value)
def replace_magic_vars(str_):
    ref = re.match(r".*\$(?P<var>\w+)",str_).group('var')
    ref = str(ref.strip())
    value = CacheHelper.getPhaseVar(ref) or 0
    str_  = str_.replace("$"+ref, str(value))
    return str_
示例#56
0
def perform_admin_tasks(adminMsg, cluster_id=cfg.CB_CLUSTER_TAG+"_status"):
    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id)
    if clusterStatus is None:
        logger.error("Unable to fetch clusterStatus from cache: ")
        return

    rest = clusterStatus.node_rest()

    # Add nodes
    servers = adminMsg["rebalance_in"]
    zone_name = adminMsg["group"]
    add_nodes(rest, servers, cluster_id, zone_name)

    # Get all nodes
    allNodes = []
    for node in rest.node_statuses():
        allNodes.append(node.id)

    # Remove nodes
    servers = adminMsg["rebalance_out"]
    toBeEjectedNodes  = remove_nodes(rest, servers, adminMsg["involve_orchestrator"], cluster_id)

    # Failover Node
    servers = adminMsg["failover"]
    auto_failover_servers = adminMsg["auto_failover"]
    only_failover = adminMsg["only_failover"]
    add_back_servers = adminMsg["add_back"]
    failoverNodes = failover_nodes(rest, servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)
    autoFailoverNodes = auto_failover_nodes(rest, auto_failover_servers, only_failover, adminMsg["involve_orchestrator"], cluster_id)

    app.workload_manager.updateClusterStatus()
    clusterStatus = CacheHelper.clusterstatus(cluster_id)
    rest = clusterStatus.node_rest()
    addBackNodes = add_back_nodes(rest, add_back_servers, autoFailoverNodes+failoverNodes)
    toBeEjectedNodes.extend(failoverNodes)
    toBeEjectedNodes.extend(autoFailoverNodes)
    for node in addBackNodes:
        toBeEjectedNodes.remove(node)

    # SoftRestart a node
    servers = adminMsg["soft_restart"]
    restart(servers, cluster_id=cluster_id)

    # HardRestart a node
    servers = adminMsg["hard_restart"]
    restart(servers, type='hard', cluster_id=cluster_id)

    if adminMsg["soft_restart"] == '' and adminMsg["hard_restart"] == '':
        if not only_failover and (len(allNodes) > 0 or len(toBeEjectedNodes) > 0):
            logger.error("Rebalance")
            logger.error(allNodes)
            logger.error(toBeEjectedNodes)
            rest.rebalance(otpNodes=allNodes, ejectedNodes=toBeEjectedNodes)

    # do a soft rest on ejectedNodes that were failed over
    logger.error(toBeEjectedNodes)
    restartNodes = ""
    for node in toBeEjectedNodes:
        if node in (failoverNodes + autoFailoverNodes):
            if '@' in node:  # ns_X@hostname  formated
                node = node.split('@')[1]
            restartNodes = "%s %s" % (node, restartNodes)
    if len(restartNodes):
        restart(restartNodes)
示例#57
0
def cacheVariable(cacheMsg):
    bucket = cacheMsg.get("bucket") or "default"
    ref = str(cacheMsg.get("reference") or "default_key")
    stat = cacheMsg.get("stat") or "curr_items"
    value = getClusterStat(bucket, stat)
    CacheHelper.cachePhaseVar(ref, value)
示例#58
0
def run(workload):

    workload.active = True
    rabbitHelper = RabbitHelper()
    sdk_queue_key = "sdk_consumer.*"

    # read doc template
    template = Template.from_cache(str(workload.template))
    if template is None:
        logger.error("no doc template imported")
        return

    consumer_template = copy.deepcopy(template)
    bucket = str(workload.bucket)
    password = str(workload.password)

    active_hosts = None
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG + "_status")
    if clusterStatus is not None:
        active_hosts = clusterStatus.get_all_hosts()

    if workload.cc_queues is not None:
        # override template attribute with workload
        consumer_template.cc_queues = workload.cc_queues

    if len(workload.indexed_keys) > 0:
        template.indexed_keys = workload.indexed_keys

    ops_sec = workload.ops_per_sec

    # modify ops by number of consumers
    num_consumers = rabbitHelper.numExchangeQueues(cfg.CB_CLUSTER_TAG,
                                                   EXCHANGE)

    if num_consumers == 0:
        logger.error("No sdkclients running")
        return

    ops_sec = int(ops_sec) / num_consumers
    create_count = int(ops_sec * workload.create_perc / 100)
    update_count = int(ops_sec * workload.update_perc / 100)
    get_count = int(ops_sec * workload.get_perc / 100)
    del_count = int(ops_sec * workload.del_perc / 100)
    exp_count = int(ops_sec * workload.exp_perc / 100)
    consume_queue = workload.consume_queue

    ttl = workload.ttl
    miss_queue = workload.miss_queue
    miss_perc = workload.miss_perc

    # broadcast to sdk_consumers
    msg = {
        'bucket': bucket,
        'id': workload.id,
        'password': password,
        'template': consumer_template.__dict__,
        'ops_sec': ops_sec,
        'create_count': create_count,
        'update_count': update_count,
        'get_count': get_count,
        'del_count': del_count,
        'exp_count': exp_count,
        'consume_queue': consume_queue,
        'ttl': ttl,
        'miss_perc': miss_perc,
        'active': True,
        'active_hosts': active_hosts
    }

    rabbitHelper.putMsg('', json.dumps(msg), EXCHANGE)
    logger.error("start task sent to %s consumers" % num_consumers)
示例#59
0
def replace_magic_vars(str_):
    ref = re.match(r".*\$(?P<var>\w+)", str_).group('var')
    ref = str(ref.strip())
    value = CacheHelper.getPhaseVar(ref) or 0
    str_ = str_.replace("$" + ref, str(value))
    return str_
示例#60
0
def report_kv_latency(bucket="default"):

    if cfg.SERIESLY_IP == '':
        # seriesly not configured
        return

    rabbitHelper = report_kv_latency.rabbitHelper
    clusterStatus = CacheHelper.clusterstatus(cfg.CB_CLUSTER_TAG+"_status") or\
        ClusterStatus()

    host = clusterStatus.get_random_host()
    if host is None: return

    ip, port = host.split(':')

    workloads = CacheHelper.workloads()
    for workload in workloads:
        if workload.active and workload.bucket == bucket:

            # read workload params
            bucket = str(workload.bucket)
            password = str(workload.password)

            # read template from active workload
            template = Template.from_cache(str(workload.template))
            template = template.__dict__
            client.decodeMajgicStrings(template)

            # setup key/val to use for timing
            key = _random_string(12)
            value = json.dumps(template['kv'])
            get_key = key

            # for get op, try to pull from consume_queue
            # so that we can calc impact of dgm
            consume_queue = workload.consume_queue
            if consume_queue is not None:
                keys = rabbitHelper.getJsonMsg(str(consume_queue),
                                               requeue=True)
                if len(keys) > 0:
                    get_key = str(keys['start'])

            # collect op latency
            set_latency = client.mc_op_latency('set', key, value, ip, port,
                                               bucket, password)
            get_latency = client.mc_op_latency('get', get_key, value, ip, port,
                                               bucket, password)
            delete_latency = client.mc_op_latency('delete', key, value, ip,
                                                  port, bucket, password)

            # report to seriessly
            seriesly = Seriesly(cfg.SERIESLY_IP, 3133)
            db = None
            if 'fast' in seriesly.list_dbs():
                db = 'fast'
            else:
                bucketStatus = BucketStatus.from_cache(bucket) or BucketStatus(
                    bucket)
                db = bucketStatus.latency_db
                if db not in seriesly.list_dbs():
                    seriesly.create_db(db)

            if db is not None:
                seriesly[db].append({
                    'set_latency': set_latency,
                    'get_latency': get_latency,
                    'delete_latency': delete_latency
                })