示例#1
0
 def grow(self, by=1, timeout=30, with_test=True):
     logging.debug("grow(by={}, timeout={}, with_test={})".format(
         by, timeout, with_test))
     pre_partitions = self.get_partition_data() if with_test else None
     runners = []
     new_ports = get_port_values(num = 3 * by,
                                 host = self.host,
                                 base_port=25000)
     addrs = [["{}:{}".format(self.host, p) for p in new_ports[i*3: i*3 + 3]]
              for i in range(by)]
     for x in range(by):
         runner = add_runner(
             worker_id=self._worker_id_counter,
             runners=self.workers,
             command=self.command,
             source_addrs=self.source_addrs,
             sink_addrs=self.sink_addrs,
             metrics_addr=self.metrics_addr,
             control_addr=self.workers[0].control,
             res_dir=self.res_dir,
             workers=by,
             my_control_addr=addrs[x][0],
             my_data_addr=addrs[x][1],
             my_external_addr=addrs[x][2])
         self._worker_id_counter += 1
         runners.append(runner)
         self.runners.append(runner)
     if with_test:
         workers = {'joining': [w.name for w in runners],
                    'leaving': []}
         self.confirm_migration(pre_partitions, workers)
     return runners
示例#2
0
    def __init__(self, command, host='127.0.0.1', sources=1, workers=1,
            sinks=1, sink_mode='framed', worker_join_timeout=30,
            is_ready_timeout=30, res_dir=None, runner_data=[]):
        # Create attributes
        self._finalized = False
        self.command = command
        self.host = host
        self.workers = TypedList(types=(Runner,))
        self.dead_workers = TypedList(types=(Runner,))
        self.runners = TypedList(types=(Runner,))
        self.source_addrs = []
        self.sink_addrs = []
        self.sinks = []
        self.senders = []
        self.worker_join_timeout = worker_join_timeout
        self.is_ready_timeout = is_ready_timeout
        self.metrics = Metrics(host, mode='framed')
        self.errors = []
        self._worker_id_counter = 0
        if res_dir is None:
            self.res_dir = tempfile.mkdtemp(dir='/tmp/', prefix='res-data.')
        else:
            self.res_dir = res_dir
        self.runner_data = runner_data

        # Try to start everything... clean up on exception
        try:
            setup_resilience_path(self.res_dir)

            self.metrics.start()
            self.metrics_addr = ":".join(
                map(str, self.metrics.get_connection_info()))

            for s in range(sinks):
                self.sinks.append(Sink(host, mode=sink_mode))
                self.sinks[-1].start()
                if self.sinks[-1].err is not None:
                    raise self.sinks[-1].err

            self.sink_addrs = ["{}:{}"
                               .format(*map(str,s.get_connection_info()))
                               for s in self.sinks]

            num_ports = sources + 3 * workers
            ports = get_port_values(num=num_ports, host=host)
            addresses = ['{}:{}'.format(host, p) for p in ports]
            (self.source_addrs, worker_addrs) = (
                addresses[:sources],
                [addresses[sources:][i:i+3]
                 for i in xrange(0, len(addresses[sources:]), 3)])
            start_runners(self.workers, self.command, self.source_addrs,
                          self.sink_addrs,
                          self.metrics_addr, self.res_dir, workers,
                          worker_addrs)
            self.runners.extend(self.workers)
            self._worker_id_counter = len(self.workers)

            # Wait for all runners to report ready to process
            self.wait_to_resume_processing(self.is_ready_timeout)
            # make sure `workers` runners are active and listed in the
            # cluster status query
            logging.debug("Testing cluster size via obs query")
            self.query_observability(cluster_status_query,
                                     self.runners[0].external,
                                     tests=[(worker_count_matches, [workers])])
        except Exception as err:
            logging.error("Encountered and error when starting up the cluster")
            logging.exception(err)
            self.errors.append(err)
            self.__finally__()
            raise err