Exemplo n.º 1
0
    def update_stateless_job(self, job_id, new_job_config):
        """
        param job_id: id of the job
        param new_job_config: new config of the job
        type job_id: str
        type new_job_config: job.JobConfig

        rtype: job.UpdateResponse
        """
        request = update_svc.CreateUpdateRequest(
            jobId=peloton.JobID(value=job_id),
            jobConfig=new_job_config,
            updateConfig=update_pb2.UpdateConfig(),
        )
        try:
            print_okblue("Updating Job %s" % job_id)
            resp = self.client.update_svc.CreateUpdate(
                request,
                metadata=self.client.jobmgr_metadata,
                timeout=default_timeout,
            )
            return resp
        except Exception as e:
            print_fail("Exception calling Update Stateless Job: %s" % str(e))
            raise
Exemplo n.º 2
0
    def stop_task(self, job_id, instance_id):
        """
        param job_id: id of the job
        param instance_id: instance id of the task to stop

        type job_id: str
        type instance_id: int

        rtype: task.StopResponse
        """
        rng = task.InstanceRange(to=instance_id + 1)
        setattr(rng, "from", instance_id)
        request = task.StopRequest(
            jobId=peloton.JobID(value=job_id), ranges=[rng]
        )
        try:
            print_okblue("Stopping task %d of Job %s" % (instance_id, job_id))
            resp = self.client.task_svc.Stop(
                request,
                metadata=self.client.jobmgr_metadata,
                timeout=default_timeout,
            )
            return resp
        except Exception as e:
            print_fail("Exception calling Stop Tasks :%s" % str(e))
            raise
Exemplo n.º 3
0
    def setup(self, dynamic_env, instance_number,
              job_name=None, version=None, image_path=None):
        """
        param dynamic: dict of dynamic environment virable
        param instance_number: number of tasks in the job

        type dynamic: dict
        type instance_number: int

        return: job-id
        """
        if not job_name:
            job_name = self.label + '_' + self.name
        task_config = create_mesos_task_config(self.config,
                                               self.name,
                                               dynamic_env,
                                               version,
                                               image_path)
        if version:
            self.version = version

        resp = self.peloton_helper.create_job(
            label=self.label,
            name=job_name,
            default_task_config=task_config,
            num_instance=instance_number,
        )
        self.job_id = resp.jobId.value
        print_okblue('Waiting for job %s creating...' % job_name)
        if not self.peloton_helper.monitering(self.job_id,
                                              RUNNING_TARGET_STATUS):
            raise ModuleLaunchFailedException("%s can not launch" % self.name)
        return self.job_id
Exemplo n.º 4
0
    def teardown_peloton(self, remove=False):
        print_okgreen("Step: stopping all peloton applications")
        for app in reversed(self.APP_ORDER):
            print_okblue("Stopping peloton application: %s" % app)
            self.peloton.teardown(self.label_name + "_" + "peloton-" + app,
                                  remove=remove)

        print_okgreen("Step: stopping cassandra")
        self.cassandra.teardown(remove=remove)

        try:
            os.remove(self.config_name)
        except OSError:
            pass
Exemplo n.º 5
0
    def create_job(
        self,
        label,
        name,
        num_instance,
        default_task_config,
        instance_config=None,
        **extra
    ):
        """
        :param label: the label value of the job
        :param name: the name of the job
        :param respool_id: the id of the resource pool
        :param num_instance: the number of instance of the job
        :param default_task_config: the default task config of the job
        :param instance_config: instance specific task config
        :param extra: extra information of the job

        :type label: str
        :type name: str
        :type respool_id: str
        :type num_instance: int
        :type default_task_config: task.TaskConfig
        :type instance_config: dict<int, task.TaskConfig>
        :type extra: dict

        :rtypr: job.CreateResponse
        """
        request = job.CreateRequest(
            config=self.get_job_config_spec(
                label,
                name,
                num_instance,
                default_task_config,
                instance_config=instance_config,
                **extra
            )
        )

        try:
            resp = self.client.job_svc.Create(
                request,
                metadata=self.client.jobmgr_metadata,
                timeout=default_timeout,
            )
            print_okblue("Create job response : %s" % resp)
            return resp
        except Exception as e:
            print_fail("Exception calling Create job :%s" % str(e))
            raise
Exemplo n.º 6
0
    def setup(
        self,
        dynamic_env,
        instance_number,
        job_name=None,
        version=None,
        image_path=None,
    ):
        """
        Overrides setup() from base-class to create hostmgr in a phased manner.
        """
        if "hostmgr" not in job_name:
            return super(Peloton, self).setup(
                dynamic_env,
                instance_number,
                job_name=job_name,
                version=version,
                image_path=image_path,
            )

        # create a single instance of hostmgr to avoid running DB migrations
        # concurrently.
        super(Peloton, self).setup(
            dynamic_env,
            1,
            job_name=job_name,
            version=version,
            image_path=image_path,
        )
        # Wait a little so that DB migration can complete.
        # TODO(amitbose) Find a better way to wait
        time.sleep(30)
        jobInfo = self.peloton_helper.get_job(self.job_id).jobInfo
        runtime = jobInfo.runtime
        config = jobInfo.config

        # update the job to change the instances
        config.instanceCount = instance_number
        cl = peloton.ChangeLog(version=runtime.configurationVersion)
        config.changeLog.MergeFrom(cl)
        self.peloton_helper.update_stateless_job(self.job_id, config)

        print_okblue("Waiting for job %s update..." % job_name)
        if not self.peloton_helper.monitering(
            self.job_id, RUNNING_TARGET_STATUS
        ):
            raise ModuleLaunchFailedException(
                "%s can not launch: update failed" % self.name
            )
        return self.job_id
Exemplo n.º 7
0
    def setup(self, dynamic_env, instance_number, job_name=None, version=None):
        """
        param dynamic: dict of dynamic environment virable
        param instance_number: number of tasks in the job

        type dynamic: dict
        type instance_number: int

        return: job-id
        """
        if not job_name:
            job_name = self.label + "_" + self.name

        if version:
            self.version = version

        instance_config = {}

        for i in range(instance_number):
            dynamic_env["MESOS_HOSTNAME"] = "-".join(
                [self.label, self.name, str(i), str(uuid.uuid4())]
            )
            instance_config.update(
                {
                    i: create_mesos_task_config(
                        self.config, self.name, dynamic_env, version
                    )
                }
            )

        resp = self.peloton_helper.create_job(
            label=self.label,
            name=job_name,
            default_task_config=instance_config[0],
            instance_config=instance_config,
            num_instance=instance_number,
        )
        self.job_id = resp.jobId.value
        print_okblue("Waiting for job %s setup..." % job_name)
        if not self.peloton_helper.monitering(
            self.job_id, RUNNING_TARGET_STATUS
        ):
            raise ModuleLaunchFailedException("%s can not launch" % self.name)
        return self.job_id
Exemplo n.º 8
0
    def delete_job(self, job_id):
        """
        param job_id: id of the job
        type job_id: str

        rtype: job.DeleteResponse
        """
        request = job.DeleteRequest(id=peloton.JobID(value=job_id))
        try:
            print_okblue("Deleting job %s" % job_id)
            resp = self.client.job_svc.Delete(
                request,
                metadata=self.client.jobmgr_metadata,
                timeout=default_timeout,
            )
            return resp
        except Exception as e:
            print_fail("Exception calling delete job :%s" % str(e))
            raise
Exemplo n.º 9
0
    def stop_job(self, job_id):
        """
        param job_id: id of the job
        type job_id: str

        rtype: job.StopResponse
        """
        request = task.StopRequest(jobId=peloton.JobID(value=job_id))
        try:
            print_okblue("Killing all tasks of Job %s" % job_id)
            resp = self.client.task_svc.Stop(
                request,
                metadata=self.client.jobmgr_metadata,
                timeout=default_timeout,
            )
            return resp
        except Exception as e:
            print_fail("Exception calling List Tasks :%s" % str(e))
            raise
Exemplo n.º 10
0
def create_respool_for_new_peloton(config,
                                   zk_server,
                                   agent_num,
                                   respool_name=RESPOOL_PATH):
    """
    Create A respool for a cluster according the cluster size
    type config: dict
    type zk_server: string
    type agent_num: int
    type respool_name: string
    rtype: string

    """
    client = PelotonClient(name='peloton-client', zk_servers=zk_server)

    # Respool size should be 90% of the cluster size
    # CPU, Memory and Disk values are the announced
    # resource value of every Mesos slave
    resource_config = config.get('mesos-slave').get('resource')

    respool_config = create_pool_config(
        name=respool_name,
        cpu=agent_num * resource_config.get('cpuLimit') * 0.9,
        memory=agent_num * resource_config.get('memLimitMb') * 0.9,
        disk=agent_num * resource_config.get('diskLimitMb') * 0.9,
    )

    request = respool.CreateRequest(config=respool_config, )
    resp = client.respool_svc.CreateResourcePool(
        request,
        metadata=client.resmgr_metadata,
        timeout=default_timeout,
    )
    if resp.HasField('error'):
        print_fail('Failed to create resource pool %s: %s' %
                   (respool_name, resp))
        raise Exception("Resource pool creation failed")
    print_okblue('Created resource pool %s' % respool_name)
    return resp.result.value
Exemplo n.º 11
0
    def update_job(self, job_id, new_job_config):
        """
        param job_id: id of the job
        param new_job_config: new config of the job
        type job_id: str
        type new_job_config: job.JobConfig

        rtype: job.UpdateResponse
        """
        request = job.UpdateRequest(
            id=peloton.JobID(value=job_id),
            config=new_job_config,
        )
        try:
            print_okblue("Updating Job %s" % job_id)
            resp = self.client.job_svc.Update(
                request,
                metadata=self.client.jobmgr_metadata,
                timeout=default_timeout,
            )
            return resp
        except Exception, e:
            print_fail('Exception calling Update Job: %s' % str(e))
            raise
Exemplo n.º 12
0
    def start_peloton(
        self,
        virtual_zookeeper,
        agent_num,
        version=None,
        skip_respool=False,
        peloton_image=None,
        peloton_apps_config=None,
    ):
        """
        param virtual_zookeeper   : The zk url and port
        param agent_num           : The number of mesos agents to start
        param version             : The peloton version
        param skip_respool        : To skip creating the default respool or not
        param peloton_image       : The docker image of peloton
        param peloton_app_config  : The path to the peloton apps configs

        type virtual_zookeeper  : str
        type agent_num          : int
        type version            : str
        type skip_respool       : bool
        type peloton_image      : str
        type peloton_app_config : str
        """
        # Setup Cassandra
        chost, cport, keyspace = self.start_cassandra()
        # Wait a little for cassandra to start-up and create keyspace.
        # TODO(amitbose) find a better way to wait
        time.sleep(20)

        if peloton_image:
            parts = peloton_image.split(":")
            if len(parts) > 1:
                version = parts[-1]

        # Setup Peloton
        print_okgreen("Step: Create Peloton, version: %s, image: %s" %
                      (version, peloton_image))
        num_logs = self.config.get("peloton").get(
            "num_log_files", DEFAULT_PELOTON_NUM_LOG_FILES)

        for app in self.APP_ORDER:
            print_okblue("Creating peloton application: %s" % app)

            dynamic_env_master = {
                "PRODUCTION_CONFIG":
                self._get_base64_prod_config(app, peloton_apps_config),
                "APP":
                app,
                "ENVIRONMENT":
                "production",
                "ELECTION_ZK_SERVERS":
                virtual_zookeeper,
                "MESOS_ZK_PATH":
                "zk://%s/mesos" % virtual_zookeeper,
                "CASSANDRA_STORE":
                keyspace,
                "CASSANDRA_HOSTS":
                chost,
                "CASSANDRA_PORT":
                str(cport),
                "CONTAINER_LOGGER_LOGROTATE_STDERR_OPTIONS":
                "rotate %s" % num_logs,
            }
            mesos_slave_config = self.config.get("mesos-slave", {})
            mesos_work_dir = [
                kv["value"] for kv in mesos_slave_config.get("static_env", [])
                if kv.get("name") == "MESOS_WORK_DIR"
            ]
            if mesos_work_dir:
                dynamic_env_master["MESOS_AGENT_WORK_DIR"] = mesos_work_dir[0]

            if app == "hostmgr":
                dynamic_env_master["SCARCE_RESOURCE_TYPES"] = ",".join(
                    self.config.get("peloton").get(app).get(
                        "scarce_resource_types"))
                dynamic_env_master["SLACK_RESOURCE_TYPES"] = ",".join(
                    self.config.get("peloton").get(app).get(
                        "slack_resource_types"))
                dynamic_env_master["ENABLE_REVOCABLE_RESOURCES"] = str(
                    self.config.get("peloton").get(app).get(
                        "enable_revocable_resources"))
            if app == "placement_stateless":
                dynamic_env_master["APP"] = "placement"
                dynamic_env_master["TASK_TYPE"] = "STATELESS"

            peloton_app_count = int(
                self.config.get("peloton").get(app).get("instance_count"))
            self.vcluster_config["job_info"][app] = self.peloton.setup(
                dynamic_env_master,
                peloton_app_count,
                self.label_name + "_" + "peloton-" + app,
                version,
                peloton_image,
            )

        self.vcluster_config.update({"Peloton Version": version})

        # create a default resource pool
        if not skip_respool:
            create_respool_for_new_peloton(self.config,
                                           zk_server=virtual_zookeeper,
                                           agent_num=agent_num)
Exemplo n.º 13
0
    def start_peloton(self,
                      virtual_zookeeper,
                      agent_num,
                      version=None,
                      skip_respool=False,
                      peloton_image=None,
                      peloton_apps_config=None):
        """
        param virtual_zookeeper   : The zk url and port
        param agent_num           : The number of mesos agents to start
        param version             : The peloton version
        param skip_respool        : To skip creating the default respool or not
        param peloton_image       : The docker image of peloton
        param peloton_app_config  : The path to the peloton apps configs

        type virtual_zookeeper  : str
        type agent_num          : int
        type version            : str
        type skip_respool       : bool
        type peloton_image      : str
        type peloton_app_config : str
        """
        # Setup Cassandra
        chost, cport, keyspace = self.start_cassandra()
        # Wait a little for cassandra to start-up and create keyspace.
        # TODO(amitbose) find a better way to wait
        time.sleep(20)

        if peloton_image:
            parts = peloton_image.split(':')
            if len(parts) > 1:
                version = parts[-1]

        # Setup Peloton
        print_okgreen('Step: Create Peloton, version: %s, image: %s' %
                      (version, peloton_image))
        num_logs = self.config.get('peloton').get(
            'num_log_files', DEFAULT_PELOTON_NUM_LOG_FILES)

        for app in self.APP_ORDER:
            print_okblue('Creating peloton application: %s' % app)

            # placement_[stateless|stateful] is the placement app with the a
            # different name
            if app.startswith('placement_'):
                app = 'placement'

            prod_config_path = self._get_app_path(peloton_apps_config). \
                format(app)
            with open(prod_config_path, "rb") as config_file:
                prod_config_base64 = base64.b64encode(config_file.read())

            dynamic_env_master = {
                "PRODUCTION_CONFIG":
                prod_config_base64,
                'APP':
                app,
                'ENVIRONMENT':
                'production',
                'ELECTION_ZK_SERVERS':
                virtual_zookeeper,
                'MESOS_ZK_PATH':
                'zk://%s/mesos' % virtual_zookeeper,
                'CASSANDRA_STORE':
                keyspace,
                'CASSANDRA_HOSTS':
                chost,
                'CASSANDRA_PORT':
                str(cport),
                'CONTAINER_LOGGER_LOGROTATE_STDERR_OPTIONS':
                'rotate %s' % num_logs,
            }
            mesos_slave_config = self.config.get('mesos-slave', {})
            mesos_work_dir = [
                kv['value'] for kv in mesos_slave_config.get('static_env', [])
                if kv.get('name') == 'MESOS_WORK_DIR'
            ]
            if mesos_work_dir:
                dynamic_env_master['MESOS_AGENT_WORK_DIR'] = mesos_work_dir[0]

            if app == 'hostmgr':
                dynamic_env_master['SCARCE_RESOURCE_TYPES'] = ','.join(
                    self.config.get('peloton').get(app).get(
                        'scarce_resource_types'))
                dynamic_env_master['SLACK_RESOURCE_TYPES'] = ','.join(
                    self.config.get('peloton').get(app).get(
                        'slack_resource_types'))
                dynamic_env_master['ENABLE_REVOCABLE_RESOURCES'] = \
                    str(self.config.get('peloton').get(app).get(
                        'enable_revocable_resources'))
            if app == "placement_stateless":
                dynamic_env_master['APP'] = 'placement'
                dynamic_env_master['TASK_TYPE'] = 'STATELESS'

            peloton_app_count = int(
                self.config.get('peloton').get(app).get('instance_count'))
            self.vcluster_config['job_info'][app] = (self.peloton.setup(
                dynamic_env_master,
                peloton_app_count,
                self.label_name + '_' + 'peloton-' + app,
                version,
                peloton_image,
            ))

        self.vcluster_config.update({
            'Peloton Version': version,
        })

        # create a default resource pool
        if not skip_respool:
            create_respool_for_new_peloton(
                self.config,
                zk_server=virtual_zookeeper,
                agent_num=agent_num,
            )