Exemplo n.º 1
0
    def get_expr_list_by_hackathon_id(self, hackathon, context):
        # get a list of all experiments' detail
        user_name = context.user_name if "user_name" in context else None
        status = context.status if "status" in context else None
        page = int(context.page) if "page" in context else 1
        per_page = int(context.per_page) if "per_page" in context else 10
        users = User.objects(name=user_name).all() if user_name else []

        if user_name and status:
            experiments_pagi = Experiment.objects(hackathon=hackathon,
                                                  status=status,
                                                  user__in=users).paginate(
                                                      page, per_page)
        elif user_name and not status:
            experiments_pagi = Experiment.objects(hackathon=hackathon,
                                                  user__in=users).paginate(
                                                      page, per_page)
        elif not user_name and status:
            experiments_pagi = Experiment.objects(hackathon=hackathon,
                                                  status=status).paginate(
                                                      page, per_page)
        else:
            experiments_pagi = Experiment.objects(
                hackathon=hackathon).paginate(page, per_page)

        return self.util.paginate(experiments_pagi,
                                  self.__get_expr_with_detail)
Exemplo n.º 2
0
    def __check_expr_status(self, user, hackathon, template):
        """
        check experiment status, if there are pre-allocate experiments, the experiment will be assigned directly
        :param user:
        :param hackathon:
        :param template:
        :return:
        """
        criterion = Q(status__in=[EStatus.RUNNING, EStatus.STARTING],
                      hackathon=hackathon,
                      user=user)
        is_admin = self.admin_manager.is_hackathon_admin(hackathon.id, user.id)
        if is_admin:
            criterion &= Q(template=template)

        expr = Experiment.objects(criterion).first()
        if expr:
            # user has a running/starting experiment
            return expr

        # try to assign pre-configured expr to user
        expr = Experiment.objects(status=EStatus.RUNNING,
                                  hackathon=hackathon,
                                  template=template,
                                  user=None).first()
        if expr:
            expr.user = user
            expr.save()
            return expr
Exemplo n.º 3
0
    def pre_allocate_expr(self, context):
        # TODO: too complex, not check
        hackathon_id = context.hackathon_id
        self.log.debug("executing pre_allocate_expr for hackathon %s " % hackathon_id)
        hackathon = Hackathon.objects(id=hackathon_id).first()
        hackathon_templates = hackathon.templates
        for template in hackathon_templates:
            try:
                template = template
                pre_num = int(hackathon.config.get(HACKATHON_CONFIG.PRE_ALLOCATE_NUMBER, 1))
                query = Q(status=EStatus.STARTING) | Q(status=EStatus.RUNNING)
                curr_num = Experiment.objects(user=None, hackathon=hackathon, template=template).filter(query).count()
                self.log.debug("pre_alloc_exprs: pre_num is %d, curr_num is %d, remain_num is %d " %
                               (pre_num, curr_num, pre_num - curr_num))

                # TODO Should support VE_PROVIDER.K8S only in future after k8s Template is supported
                # if template.provider == VE_PROVIDER.K8S:
                if curr_num < pre_num:
                    start_num = Experiment.objects(user=None, template=template, status=EStatus.STARTING).count()
                    allowed_currency = int(hackathon.config.get(HACKATHON_CONFIG.PRE_ALLOCATE_CONCURRENT, 1))
                    if start_num >= allowed_currency:
                        self.log.debug(
                            "there are already %d Experiments starting, will check later ... " % allowed_currency)
                        return
                    else:
                        remain_num = min(allowed_currency, pre_num) - start_num
                        self.log.debug(
                            "no starting template: %s , remain num is %d ... " % (template.name, remain_num))
                        self.start_pre_alloc_exprs(None, template.name, hackathon.name, remain_num)
                        break
            except Exception as e:
                self.log.error(e)
                self.log.error("check default experiment failed")
    def schedule_create_k8s_service(self, context):
        template_unit = context.template_content.units[0]
        experiment = Experiment.objects.get(id=context.experiment_id)
        virtual_env = experiment.virtual_environments[0]
        k8s_dict = virtual_env.k8s_resource
        adapter = self.__get_adapter_from_ctx(K8SServiceAdapter, context)

        template_unit.set_ports(k8s_dict['ports'])
        labels = {
            "template_name": context.template_name,
            "hackathon_id": context.hackathon_id,
            "experiment_id": context.experiment_id,
        }
        try:
            deploy_name, port = adapter.create_k8s_environment(virtual_env.name, template_unit, labels=labels)

            expr = Experiment.objects(id=context.experiment_id).first()
            virtual_env = expr.virtual_environments[0]
            k8s_dict = virtual_env.k8s_resource
            vnc_port = k8s_dict['ports']
            vnc_port[0][K8S_UNIT.PORTS_PUBLIC_PORT] = port
            expr.save()

            # check deployment's status
            if self.__wait_for_k8s_status(adapter, virtual_env.name, K8S_DEPLOYMENT_STATUS.AVAILABLE):
                self.log.debug("k8s deployment succeeds: %s" % str(context))
                self.__on_create_success(context)
                return True
            else:
                self.log.error("k8s deployment fails: %s" % str(context))
                self.__on_message("k8s_service_create_failed", context)
        except Exception as e:
            self.__on_message("k8s_service_create_failed", context)

        return False
Exemplo n.º 5
0
    def __on_create_success(self, context):
        self.log.debug(
            "experiment started %s successfully. Setting remote parameters." %
            context.experiment_id)
        # set experiment status
        # update the status of virtual environment
        expr = Experiment.objects(id=context.experiment_id).first()
        virtual_env = expr.virtual_environments[0]

        # guacamole parameters
        k8s_dict = virtual_env.k8s_resource
        # TODO need to choose right port/protocol based on template
        vnc_port = k8s_dict['ports']
        if len(vnc_port):
            gc = {
                K8S_UNIT.REMOTE_PARAMETER_NAME:
                virtual_env.name,
                K8S_UNIT.REMOTE_PARAMETER_DISPLAY_NAME:
                vnc_port[0][K8S_UNIT.PORTS_NAME],
                # TODO need to query K8S list all supported IPs and pick one randomly either here or connecting phase
                # K8S_UNIT.REMOTE_PARAMETER_HOST_NAME: "49.4.90.39",
                K8S_UNIT.REMOTE_PARAMETER_PROTOCOL:
                "vnc",
                K8S_UNIT.REMOTE_PARAMETER_PORT:
                vnc_port[0][K8S_UNIT.PORTS_PUBLIC_PORT],
                # K8S_UNIT.REMOTE_PARAMETER_USER_NAME: "",
                # K8S_UNIT.REMOTE_PARAMETER_PASSWORD: "",
            }
            self.log.debug("expriment %s remote parameters: %s" %
                           (expr.id, str(gc)))
            virtual_env.remote_paras = gc

        virtual_env.status = VEStatus.RUNNING
        expr.status = EStatus.RUNNING
        expr.save()
    def __create_useful_k8s_dict(hackathon, experiment, template_unit):
        # FIXME K8s dict need a db model, not a dict
        _experiments = Experiment.objects(hackathon=hackathon).all()
        _virtual_envs = []
        for e in _experiments:
            _virtual_envs += list(e.virtual_environments)

        # TODO Need to check the rules about K8s resource name
        _names = [v.name for v in _virtual_envs]
        count = 0
        name = None
        while count < 100:
            count += 1
            name = "{}-{}-{}".format(template_unit.name, experiment.id, count)
            if name not in _names:
                break
        if count >= 100:
            raise RuntimeError("Can't get useful env name.")

        # Ensure that the external ports do not conflict
        ports = copy.deepcopy(template_unit.get_ports())
        return {
            "name": "{}".format(name).lower(),
            "ports": ports,
        }
Exemplo n.º 7
0
    def _enable_guacd_file_transfer(self, context):
        """
        This function should be invoked after container is started in alauda_docker.py and hosted_docker.py
        :param ve: virtual environment
        """
        expr = Experiment.objects(
            id=context.experiment_id).no_dereference().first()
        virtual_env = expr.virtual_environments.get(
            name=context.virtual_environment_name)
        remote = virtual_env.remote_paras

        p = pexpect.spawn(
            "scp -P %s %s %s@%s:/usr/local/sbin/guacctl" %
            (remote["port"],
             abspath("%s/../expr/guacctl" % dirname(realpath(__file__))),
             remote["username"], remote["hostname"]))
        i = p.expect([pexpect.TIMEOUT, 'yes/no', 'password: '******'password:'])

        if i != 0:
            p.sendline(remote["password"])
            p.expect(pexpect.EOF)
        p.close()
    def get_registration_detail(self, user, hackathon, registration=None):
        detail = {
            "hackathon": hackathon.dic(),
            "user": self.user_manager.user_display_info(user)}

        if not registration:
            registration = registration or self.get_registration_by_user_and_hackathon(user.id, hackathon.id)

        if not registration:
            return detail

        # "asset" is alreay in registration
        detail["registration"] = registration.dic()
        # experiment if any
        try:
            exp = Experiment.objects(
                user=user.id,
                hackathon=hackathon.id,
                status__in=[EStatus.STARTING, EStatus.RUNNING]).first()

            if exp:
                detail["experiment"] = exp.dic()
        except Exception as e:
            self.log.error(e)

        return detail
Exemplo n.º 9
0
 def _on_virtual_environment_unexpected_error(self, context):
     self.log.warn("experiment unexpected error: " + context.experiment_id)
     expr = Experiment.objects(id=context.experiment_id).no_dereference() \
         .only("status", "virtual_environments").first()
     if "virtual_environment_name" in context:
         expr.virtual_environments.get(name=context.virtual_environment_name).status = VEStatus.UNEXPECTED_ERROR
     expr.save()
Exemplo n.º 10
0
    def __create_useful_k8s_dict(hackathon, experiment, template_unit):
        # FIXME K8s dict need a db model, not a dict
        _experiments = Experiment.objects(hackathon=hackathon).all()
        _virtual_envs = []
        for e in _experiments:
            _virtual_envs += list(e.virtual_environments)

        # TODO Need to check the rules about K8s resource name
        _names = [v.name for v in _virtual_envs]
        count = 0
        name = None
        while count < 100:
            count += 1
            name = "{}-{}-{}".format(template_unit.name, experiment.id, count)
            if name not in _names:
                break
        if count >= 100:
            raise RuntimeError("Can't get useful env name.")

        # Ensure that the external ports do not conflict
        ports = copy.deepcopy(template_unit.get_ports())
        return {
            "name": "{}".format(name).lower(),
            "ports": ports,
        }
Exemplo n.º 11
0
    def getConnectInfo(self):
        connection_name = request.args.get("name")
        self.log.debug("Guacamole connecion request, connection name: %s" %
                       connection_name)
        expr = Experiment.objects(virtual_environments__name=connection_name
                                  ).no_dereference().first()
        if not expr:
            return not_found("not_found")

        if expr.user.id != g.user.id:
            return forbidden("forbidden")

        remote_paras = expr.virtual_environments.get(
            name=connection_name).remote_paras
        # TODO Support DYNAMIC host/port in case of they cannot be determined on provision phase
        if K8S_UNIT.REMOTE_PARAMETER_HOST_NAME not in remote_paras:
            # TTT
            available_public_ips = self.util.safe_get_config(
                "ukylin.k8s.ips", ["119.3.202.71", "49.4.90.39"])
            random_ip = available_public_ips[random.randint(
                0,
                len(available_public_ips) - 1)]
            remote_paras[K8S_UNIT.REMOTE_PARAMETER_HOST_NAME] = random_ip

        self.log.debug("get guacamole config by id: %s, paras: %r" %
                       (connection_name, remote_paras))
        return remote_paras
Exemplo n.º 12
0
    def __on_create_success(self, context):
        self.log.debug("experiment started %s successfully. Setting remote parameters." % context.experiment_id)
        # set experiment status
        # update the status of virtual environment
        expr = Experiment.objects(id=context.experiment_id).first()
        virtual_env = expr.virtual_environments[0]

        # guacamole parameters
        k8s_dict = virtual_env.k8s_resource
        # TODO need to choose right port/protocol based on template
        vnc_port = k8s_dict['ports']
        if len(vnc_port):
            gc = {
                K8S_UNIT.REMOTE_PARAMETER_NAME: virtual_env.name,
                K8S_UNIT.REMOTE_PARAMETER_DISPLAY_NAME: vnc_port[0][K8S_UNIT.PORTS_NAME],
                # TODO need to query K8S list all supported IPs and pick one randomly either here or connecting phase
                # K8S_UNIT.REMOTE_PARAMETER_HOST_NAME: "49.4.90.39",
                K8S_UNIT.REMOTE_PARAMETER_PROTOCOL: "vnc",
                K8S_UNIT.REMOTE_PARAMETER_PORT: vnc_port[0][K8S_UNIT.PORTS_PUBLIC_PORT],
                # K8S_UNIT.REMOTE_PARAMETER_USER_NAME: "",
                # K8S_UNIT.REMOTE_PARAMETER_PASSWORD: "",
            }
            self.log.debug("expriment %s remote parameters: %s" % (expr.id, str(gc)))
            virtual_env.remote_paras = gc

        virtual_env.status = VEStatus.RUNNING
        expr.status = EStatus.RUNNING
        expr.save()
Exemplo n.º 13
0
    def __start_virtual_environment(self, context, docker_template_unit):
        origin_name = docker_template_unit.get_name()
        prefix = str(context.experiment_id)[0:9]
        suffix = "".join(random.sample(string.ascii_letters + string.digits,
                                       8))
        new_name = '%s-%s-%s' % (prefix, origin_name, suffix.lower())
        docker_template_unit.set_name(new_name)
        self.log.debug("starting to start container: %s" % new_name)

        # db document for VirtualEnvironment
        ve = VirtualEnvironment(
            provider=VE_PROVIDER.DOCKER,
            name=new_name,
            image=docker_template_unit.get_image_with_tag(),
            status=VEStatus.INIT,
            remote_provider=VERemoteProvider.Guacamole)
        # create a new context for current ve only
        context = context.copy()
        experiment = Experiment.objects(
            id=context.experiment_id).no_dereference().only(
                "virtual_environments").first()
        experiment.virtual_environments.append(ve)
        experiment.save()

        # start container remotely , use hosted docker or alauda docker
        context.virtual_environment_name = ve.name
        context.unit = docker_template_unit
        self._internal_start_virtual_environment(context)
Exemplo n.º 14
0
 def get_expr_status_and_confirm_starting(self, expr_id):
     expr = Experiment.objects(id=expr_id).first()
     if expr:
         return self.__report_expr_status(expr,
                                          isToConfirmExprStarting=True)
     else:
         return not_found('Experiment Not found')
Exemplo n.º 15
0
    def get_registration_detail(self, user, hackathon, registration=None):
        detail = {
            "hackathon": hackathon.dic(),
            "user": self.user_manager.user_display_info(user)}

        if not registration:
            registration = registration or self.get_registration_by_user_and_hackathon(user.id, hackathon.id)

        if not registration:
            return detail

        # "asset" is alreay in registration
        detail["registration"] = registration.dic()
        # experiment if any
        try:
            exp = Experiment.objects(
                user=user.id,
                hackathon=hackathon.id,
                status__in=[EStatus.STARTING, EStatus.RUNNING]).first()

            if exp:
                detail["experiment"] = exp.dic()
        except Exception as e:
            self.log.error(e)

        return detail
    def __setup_virtual_machine_done(self, sctx):
        try:
            self.log.debug("azure virtual environment %d vm setup done" % sctx.current_job_index)
            ctx = sctx.job_ctxs[sctx.current_job_index]

            # update the status of virtual environment
            expr = Experiment.objects(id=sctx.experiment_id).first()
            ve = expr.virtual_environments[sctx.current_job_index]
            adapter = self.__get_adapter_from_sctx(sctx, VirtualMachineAdapter)

            ve.status = VEStatus.RUNNING
            expr.save()

            self._on_virtual_environment_success(Context(
                experiment_id=expr.id))

            azure_resource = AzureVirtualMachine(name=ctx.virtual_machine_name,
                                                 label=ctx.virtual_machine_label,
                                                 dns="%s.chinacloudapp.cn" % ctx.cloud_service_name,
                                                 end_points=[])
            # todo record AzureDeployment, AzureCloudService and so on in db for roll back

            vm_role = adapter.get_virtual_machine_role(ctx.cloud_service_name,
                                                       ctx.deployment_name,
                                                       ctx.virtual_machine_name)

            if (not vm_role) or (not vm_role.instance_endpoints):
                self.log.warn(
                    "unable to find vm %s, cannot update virtual env config like guacamole" % ctx.virtual_machine_name)
            else:
                for endpoint in vm_role.instance_endpoints:
                    azure_resource.public_ip = endpoint.vip
                    if endpoint.name == ctx.remote_endpoint_name:  # endpoint for remote desktop
                        ve.remote_provider = VERemoteProvider.Guacamole
                        ve.remote_paras = get_remote_parameters(
                            ctx.raw_system_config,
                            ctx.remote,
                            ctx.virtual_machine_name,
                            endpoint.vip,
                            endpoint.public_port)
                    else:
                        try:
                            aep = self.__get_persistable_endpoint(endpoint, ctx.raw_network_config)
                            azure_resource.end_points.append(aep)
                        except Exception as e:
                            self.log.error(e)

            ve.azure_resource = azure_resource
            azure_resource.save()
            expr.save()

            self.log.debug(
                "azure virtual environment %d vm success callback done, step to next" % sctx.current_job_index)
            # step to config next unit
            sctx.current_job_index += 1
            self.__schedule_setup(sctx)
        except Exception as e:
            self.log.error("azure virtual environment %d failed on vm_done: %r" % (sctx.current_job_index, e.message))
            self._on_virtual_environment_failed(sctx)
Exemplo n.º 17
0
    def heart_beat(self, expr_id):
        expr = Experiment.objects(id=expr_id, status=EStatus.RUNNING).first()
        if expr is None:
            return not_found('Experiment is not running')

        expr.last_heart_beat_time = self.util.get_now()
        expr.save()
        return ok()
Exemplo n.º 18
0
    def heart_beat(self, expr_id):
        expr = Experiment.objects(id=expr_id, status=EStatus.RUNNING).first()
        if expr is None:
            return not_found('Experiment is not running')

        expr.last_heart_beat_time = self.util.get_now()
        expr.save()
        return ok()
Exemplo n.º 19
0
 def _on_virtual_environment_unexpected_error(self, context):
     self.log.warn("experiment unexpected error: " + context.experiment_id)
     expr = Experiment.objects(id=context.experiment_id).no_dereference() \
         .only("status", "virtual_environments").first()
     if "virtual_environment_name" in context:
         expr.virtual_environments.get(name=context.virtual_environment_name
                                       ).status = VEStatus.UNEXPECTED_ERROR
     expr.save()
Exemplo n.º 20
0
    def _on_virtual_environment_stopped(self, context):
        expr = Experiment.objects(id=context.experiment_id).no_dereference() \
            .only("status", "virtual_environments").first()
        ve = expr.virtual_environments.get(name=context.virtual_environment_name)
        ve.status = VEStatus.STOPPED

        if all(ve.status == VEStatus.STOPPED for ve in expr.virtual_environments):
            expr.status = VEStatus.STOPPED
            expr.save()
Exemplo n.º 21
0
    def get_expr_list_by_hackathon_id(self, hackathon, context):
        # get a list of all experiments' detail
        user_name = context.user_name if "user_name" in context else None
        status = context.status if "status" in context else None
        page = int(context.page) if "page" in context else 1
        per_page = int(context.per_page) if "per_page" in context else 10
        users = User.objects(name=user_name).all() if user_name else []

        if user_name and status:
            experiments_pagi = Experiment.objects(hackathon=hackathon, status=status, user__in=users).paginate(page, per_page)
        elif user_name and not status:
            experiments_pagi = Experiment.objects(hackathon=hackathon, user__in=users).paginate(page, per_page)
        elif not user_name and status:
            experiments_pagi = Experiment.objects(hackathon=hackathon, status=status).paginate(page, per_page)
        else:
            experiments_pagi = Experiment.objects(hackathon=hackathon).paginate(page, per_page)

        return self.util.paginate(experiments_pagi, self.__get_expr_with_detail)
Exemplo n.º 22
0
    def _on_virtual_environment_success(self, context):
        expr = Experiment.objects(id=context.experiment_id).no_dereference() \
            .only("status", "virtual_environments").first()
        if all(ve.status == VEStatus.RUNNING for ve in expr.virtual_environments):
            expr.status = EStatus.RUNNING
            expr.save()
            self._on_expr_started(context)

        self._hooks_on_virtual_environment_success(context)
Exemplo n.º 23
0
    def __update_virtual_environment_cfg(self, context):
        experiment = Experiment.objects(
            id=context.experiment_id).no_dereference().first()
        virtual_environment = experiment.virtual_environments.get(
            name=context.virtual_environment_name)
        host_server = DockerHostServer.objects(
            id=context.host_server_id).first()

        # azure_key
        if not self.util.is_local():
            experiment.azure_key = AzureKey.objects(
                id=context.azure_key_id).first()

        # update port binding
        for cfg in context.port_config:
            public_port = cfg[
                DOCKER_UNIT.
                PORTS_PUBLIC_PORT] if DOCKER_UNIT.PORTS_PUBLIC_PORT in cfg else None
            port_binding = PortBinding(
                name=cfg[DOCKER_UNIT.PORTS_NAME],
                is_public=bool(cfg[DOCKER_UNIT.PORTS_PUBLIC]),
                public_port=public_port,
                host_port=cfg[DOCKER_UNIT.PORTS_HOST_PORT],
                container_port=cfg[DOCKER_UNIT.PORTS_PORT])
            if DOCKER_UNIT.PORTS_URL in cfg:
                port_binding.url = cfg[DOCKER_UNIT.PORTS_URL]
            virtual_environment.docker_container.port_bindings.append(
                port_binding)

        # guacamole config
        guacamole = context.unit.get_remote()
        port_cfg = filter(
            lambda p: p[DOCKER_UNIT.PORTS_PORT] == guacamole[
                DOCKER_UNIT.REMOTE_PORT], context.port_config)
        if len(port_cfg) > 0:
            virtual_environment.remote_provider = VERemoteProvider.Guacamole
            gc = {
                "displayname": context.virtual_environment_name,
                "name": context.virtual_environment_name,
                "protocol": guacamole[DOCKER_UNIT.REMOTE_PROTOCOL],
                "hostname": host_server.public_ip,
                "port": port_cfg[0][DOCKER_UNIT.PORTS_PUBLIC_PORT],
                "enable-sftp": True
            }
            if DOCKER_UNIT.REMOTE_USERNAME in guacamole:
                gc["username"] = guacamole[DOCKER_UNIT.REMOTE_USERNAME]
            if DOCKER_UNIT.REMOTE_PASSWORD in guacamole:
                gc["password"] = guacamole[DOCKER_UNIT.REMOTE_PASSWORD]

            # save guacamole config into DB
            virtual_environment.remote_paras = gc

        experiment.save()

        # start container
        self.__start_docker_container(context, experiment, host_server)
Exemplo n.º 24
0
    def pre_allocate_expr(self, context):
        # TODO: too complex, not check
        hackathon_id = context.hackathon_id
        self.log.debug("executing pre_allocate_expr for hackathon %s " % hackathon_id)
        hackathon = Hackathon.objects(id=hackathon_id).first()
        hackathon_templates = hackathon.templates
        for template in hackathon_templates:
            try:
                template = template
                pre_num = int(hackathon.config.get("pre_allocate_number", 1))
                query = Q(status=EStatus.STARTING) | Q(status=EStatus.RUNNING)
                curr_num = Experiment.objects(user=None, hackathon=hackathon, template=template).filter(query).count()
                if template.provider == VE_PROVIDER.AZURE:
                    if curr_num < pre_num:
                        remain_num = pre_num - curr_num
                        start_num = Experiment.objects(user=None, template=template, status=EStatus.STARTING).count()
                        if start_num > 0:
                            self.log.debug("there is an azure env starting, will check later ... ")
                            return
                        else:
                            self.log.debug(
                                "no starting template: %s , remain num is %d ... " % (template.name, remain_num))
                            self.start_expr(None, template.name, hackathon.name)
                            break
                elif template.provider == VE_PROVIDER.DOCKER:
                    if hackathon.config.get('cloud_provider') == CLOUD_PROVIDER.ALAUDA:
                        # don't create pre-env if alauda used
                        continue

                    self.log.debug(
                        "template name is %s, hackathon name is %s" % (template.name, hackathon.name))
                    if curr_num < pre_num:
                        remain_num = pre_num - curr_num
                        start_num = Experiment.objects(user=None, template=template, status=EStatus.STARTING).count()
                        if start_num > 0:
                            self.log.debug("there is an docker container starting, will check later ... ")
                            return
                        self.log.debug("no idle template: %s, remain num is %d ... " % (template.name, remain_num))
                        self.start_expr(None, template.name, hackathon.name)
                        break
            except Exception as e:
                self.log.error(e)
                self.log.error("check default experiment failed")
Exemplo n.º 25
0
    def _on_virtual_environment_success(self, context):
        expr = Experiment.objects(id=context.experiment_id).no_dereference() \
            .only("status", "virtual_environments").first()
        if all(ve.status == VEStatus.RUNNING
               for ve in expr.virtual_environments):
            expr.status = EStatus.RUNNING
            expr.save()
            self._on_expr_started(context)

        self._hooks_on_virtual_environment_success(context)
    def get_certificates_by_expr(self, expr_id):
        """Get certificates by experiment id
        """
        # expr = self.db.get_object(Experiment, expr_id)
        expr = Experiment.objects(id=expr_id)
        # hak = self.db.find_all_objects_by(HackathonAzureKey, hackathon_id=expr.hackathon_id)
        hak = Hackathon.objects(id=expr.hackathon_id).first().azure_keys[0]
        if not hak:
            raise Exception("no azure key configured")

        return map(lambda key: self.db.get_object(AzureKey, key.azure_key_id), hak)
Exemplo n.º 27
0
    def get_certificates_by_expr(self, expr_id):
        """Get certificates by experiment id
        """
        # expr = self.db.get_object(Experiment, expr_id)
        expr = Experiment.objects(id=expr_id)
        # hak = self.db.find_all_objects_by(HackathonAzureKey, hackathon_id=expr.hackathon_id)
        hak = Hackathon.objects(id=expr.hackathon_id).first().azure_keys[0]
        if not hak:
            raise Exception("no azure key configured")

        return map(lambda key: self.db.get_object(AzureKey, key.azure_key_id), hak)
Exemplo n.º 28
0
    def _on_virtual_environment_stopped(self, context):
        expr = Experiment.objects(id=context.experiment_id).no_dereference() \
            .only("status", "virtual_environments").first()
        ve = expr.virtual_environments.get(
            name=context.virtual_environment_name)
        ve.status = VEStatus.STOPPED

        if all(ve.status == VEStatus.STOPPED
               for ve in expr.virtual_environments):
            expr.status = EStatus.STOPPED
            expr.save()
Exemplo n.º 29
0
    def getConnectInfo(self):
        connection_name = request.args.get("name")
        expr = Experiment.objects(virtual_environments__name=connection_name).no_dereference().first()
        if not expr:
            return not_found("not_found")

        if expr.user.id != g.user.id:
            return forbidden("forbidden")

        ve = expr.virtual_environments.get(name=connection_name)
        self.log.debug("get guacamole config by id: %s, paras: %r" % (connection_name, ve.remote_paras))
        return ve.remote_paras
Exemplo n.º 30
0
    def restart_stopped_expr(self, experiment_id):
        experiment = Experiment.objects(id=experiment_id).first()
        for ve in experiment.virtual_environments:
            if ve.provider == VE_PROVIDER.DOCKER:
                if not self.hosted_docker_proxy.is_container_running(
                        ve.docker_container):
                    self.hosted_docker_proxy.start_container(
                        ve.docker_container.host_server,
                        ve.docker_container.container_id)
            elif ve.provider == VE_PROVIDER.AZURE:
                raise NotImplementedError()

        self.__check_expr_real_status(experiment)
        return experiment.dic()
Exemplo n.º 31
0
 def restart_stopped_expr(self, experiment_id):
     # todo: now just support hosted_docker, not support for alauda and windows
     experiment = Experiment.objects(id=experiment_id).first()
     for ve in experiment.virtual_environments:
         if ve.provider == VE_PROVIDER.DOCKER:
             if not self.hosted_docker_proxy.is_container_running(ve.docker_container):
                 self.hosted_docker_proxy.start_container(ve.docker_container.host_server,
                                                          ve.docker_container.container_id)
         elif ve.provider == VE_PROVIDER.ALAUDA:
             pass
         elif ve.provider == VE_PROVIDER.AZURE:
             pass
     self.__check_expr_real_status(experiment)
     return experiment.dic()
 def __clear_ports_cache(self):
     """
     cache ports, if ports' number more than host_port_max_num, release the ports.
     But if there is a thread apply new ports, we will do this operation in the next loop.
     Because the host machine do not update the ports information,
     if we release ports now, the new ports will be lost.
     :return:
     """
     num = Experiment.objects(status=EStatus.STARTING).count()
     if num > 0:
         self.log.debug("there are %d experiment is starting, host ports will updated in next loop" % num)
         return
     self.log.debug("-----release ports cache successfully------")
     self.host_ports = []
 def __clear_ports_cache(self):
     """
     cache ports, if ports' number more than host_port_max_num, release the ports.
     But if there is a thread apply new ports, we will do this operation in the next loop.
     Because the host machine do not update the ports information,
     if we release ports now, the new ports will be lost.
     :return:
     """
     num = Experiment.objects(status=EStatus.STARTING).count()
     if num > 0:
         self.log.debug("there are %d experiment is starting, host ports will updated in next loop" % num)
         return
     self.log.debug("-----release ports cache successfully------")
     self.host_ports = []
    def __assign_ports(self, context, host_server):
        self.log.debug("try to assign port on server %r" % host_server)
        unit = context.unit
        experiment = Experiment.objects(id=context.experiment_id).no_dereference().first()
        virtual_environment = experiment.virtual_environments.get(name=context.virtual_environment_name)
        container = DockerContainer(name=virtual_environment.name,
                                    image=unit.get_image_with_tag(),
                                    host_server=host_server,
                                    port_bindings=[])
        virtual_environment.docker_container = container
        experiment.save()

        context.container_name = container.name
        self.__assign_host_ports(context, host_server)
    def __assign_ports(self, context, host_server):
        self.log.debug("try to assign port on server %r" % host_server)
        unit = context.unit
        experiment = Experiment.objects(id=context.experiment_id).no_dereference().first()
        virtual_environment = experiment.virtual_environments.get(name=context.virtual_environment_name)
        container = DockerContainer(name=virtual_environment.name,
                                    image=unit.get_image_with_tag(),
                                    host_server=host_server,
                                    port_bindings=[])
        virtual_environment.docker_container = container
        experiment.save()

        context.container_name = container.name
        self.__assign_host_ports(context, host_server)
Exemplo n.º 36
0
 def restart_stopped_expr(self, experiment_id):
     # todo: now just support hosted_docker, not support for alauda and windows
     experiment = Experiment.objects(id=experiment_id).first()
     for ve in experiment.virtual_environments:
         if ve.provider == VE_PROVIDER.DOCKER:
             if not self.hosted_docker_proxy.is_container_running(ve.docker_container):
                 self.hosted_docker_proxy.start_container(ve.docker_container.host_server,
                                                          ve.docker_container.container_id)
         elif ve.provider == VE_PROVIDER.ALAUDA:
             pass
         elif ve.provider == VE_PROVIDER.AZURE:
             pass
     self.__check_expr_real_status(experiment)
     return experiment.dic()
    def _on_virtual_environment_failed(self, sctx):
        try:
            self.log.debug("azure virtual environment %d vm setup failed" % sctx.current_job_index)
            expr = Experiment.objects(id=sctx.experiment_id).first()
            ve = expr.virtual_environments[sctx.current_job_index]

            ve.status = VEStatus.FAILED
            expr.status = EStatus.FAILED
            expr.save()
        finally:
            self.log.debug(
                "azure virtual environment %d vm fail callback done, roll back start"
                % sctx.current_job_index)
            # rollback reverse
            self._internal_rollback(sctx)
Exemplo n.º 38
0
    def _internal_stop_expr(self, context):
        expr = Experiment.objects(id=context.experiment_id).first()
        if not expr:
            return

        if len(expr.virtual_environments) == 0:
            expr.status = EStatus.ROLL_BACKED
            expr.save()
            return

        # delete containers and change expr status
        for ve in expr.virtual_environments:
            context = context.copy()  # create new context for every virtual_environment
            context.virtual_environment_name = ve.name
            self._stop_virtual_environment(ve, expr, context)
Exemplo n.º 39
0
    def __check_expr_status(self, user, hackathon, template):
        """
        check experiment status, if there are pre-allocate experiments, the experiment will be assigned directly
        :param user:
        :param hackathon:
        :param template:
        :return:
        """
        criterion = Q(status__in=[EStatus.RUNNING, EStatus.STARTING], hackathon=hackathon)
        is_admin = self.admin_Manager.is_hackathon_admin(hackathon.id, user.id)
        if is_admin:
            criterion &= Q(template=template)

        expr = Experiment.objects(criterion).first()
        if expr:
            # user has a running/starting experiment
            return expr

        # try to assign pre-configured expr to user
        expr = Experiment.objects(status=EStatus.RUNNING, hackathon=hackathon, template=template, user=None).first()
        if expr:
            expr.user = user
            expr.save()
            return expr
Exemplo n.º 40
0
 def stop_expr(self, expr_id):
     """
     :param expr_id: experiment id
     :return:
     """
     self.log.debug("begin to stop %s" % str(expr_id))
     expr = Experiment.objects(id=expr_id, status=EStatus.RUNNING).first()
     if expr is not None:
         starter = self.get_starter(expr.hackathon, expr.template)
         if starter:
             starter.stop_expr(Context(experiment_id=expr.id, experiment=expr))
         self.log.debug("experiment %s ended success" % expr_id)
         return ok('OK')
     else:
         return ok()
Exemplo n.º 41
0
 def stop_expr(self, expr_id):
     """
     :param expr_id: experiment id
     :return:
     """
     self.log.debug("begin to stop %s" % str(expr_id))
     expr = Experiment.objects(id=expr_id).first()
     if expr is not None:
         starter = self.get_starter(expr.hackathon, expr.template)
         if starter:
             starter.stop_expr(Context(experiment_id=expr.id, experiment=expr))
         self.log.debug("experiment %s ended success" % expr_id)
         return ok('OK')
     else:
         return ok()
    def __update_virtual_environment_cfg(self, context):
        experiment = Experiment.objects(id=context.experiment_id).no_dereference().first()
        virtual_environment = experiment.virtual_environments.get(name=context.virtual_environment_name)
        host_server = DockerHostServer.objects(id=context.host_server_id).first()

        # azure_key
        if not self.util.is_local():
            experiment.azure_key = AzureKey.objects(id=context.azure_key_id).first()

        # update port binding
        for cfg in context.port_config:
            public_port = cfg[DOCKER_UNIT.PORTS_PUBLIC_PORT] if DOCKER_UNIT.PORTS_PUBLIC_PORT in cfg else None
            port_binding = PortBinding(name=cfg[DOCKER_UNIT.PORTS_NAME],
                                       is_public=bool(cfg[DOCKER_UNIT.PORTS_PUBLIC]),
                                       public_port=public_port,
                                       host_port=cfg[DOCKER_UNIT.PORTS_HOST_PORT],
                                       container_port=cfg[DOCKER_UNIT.PORTS_PORT])
            if DOCKER_UNIT.PORTS_URL in cfg:
                port_binding.url = cfg[DOCKER_UNIT.PORTS_URL]
            virtual_environment.docker_container.port_bindings.append(port_binding)

        # guacamole config
        guacamole = context.unit.get_remote()
        port_cfg = filter(lambda p:
                          p[DOCKER_UNIT.PORTS_PORT] == guacamole[DOCKER_UNIT.REMOTE_PORT],
                          context.port_config)
        if len(port_cfg) > 0:
            virtual_environment.remote_provider = VERemoteProvider.Guacamole
            gc = {
                "displayname": context.virtual_environment_name,
                "name": context.virtual_environment_name,
                "protocol": guacamole[DOCKER_UNIT.REMOTE_PROTOCOL],
                "hostname": host_server.public_ip,
                "port": port_cfg[0][DOCKER_UNIT.PORTS_PUBLIC_PORT],
                "enable-sftp": True
            }
            if DOCKER_UNIT.REMOTE_USERNAME in guacamole:
                gc["username"] = guacamole[DOCKER_UNIT.REMOTE_USERNAME]
            if DOCKER_UNIT.REMOTE_PASSWORD in guacamole:
                gc["password"] = guacamole[DOCKER_UNIT.REMOTE_PASSWORD]

            # save guacamole config into DB
            virtual_environment.remote_paras = gc

        experiment.save()

        # start container
        self.__start_docker_container(context, experiment, host_server)
Exemplo n.º 43
0
    def _on_virtual_environment_failed(self, sctx):
        try:
            self.log.debug("azure virtual environment %d vm setup failed" %
                           sctx.current_job_index)
            expr = Experiment.objects(id=sctx.experiment_id).first()
            ve = expr.virtual_environments[sctx.current_job_index]

            ve.status = VEStatus.FAILED
            expr.status = EStatus.FAILED
            expr.save()
        finally:
            self.log.debug(
                "azure virtual environment %d vm fail callback done, roll back start"
                % sctx.current_job_index)
            # rollback reverse
            self._internal_rollback(sctx)
Exemplo n.º 44
0
 def stop_expr(self, expr_id):
     """
     :param expr_id: experiment id
     :param force: 0: only stop container and release ports, 1: force stop and delete container and release ports.
     :return:
     """
     self.log.debug("begin to stop %d" % expr_id)
     expr = Experiment.objects(id=expr_id, status=EStatus.RUNNING)
     if expr is not None:
         starter = self.get_starter(expr.hackathon, expr.template)
         if starter:
             starter.stop_expr(Context(experiment=expr))
         self.log.debug("experiment %d ended success" % expr_id)
         return ok('OK')
     else:
         return ok()
Exemplo n.º 45
0
    def roll_back(self, expr_id):
        """
        roll back when exception occurred
        :param expr_id: experiment id
        """
        self.log.debug("Starting rollback experiment %s..." % expr_id)
        expr = Experiment.objects(id=expr_id)
        if not expr:
            self.log.warn("rollback failed due to experiment not found")
            return

        starter = self.get_starter(expr.hackathon, expr.template)
        if not starter:
            self.log.warn("rollback failed due to no starter found")
            return

        return starter.rollback(Context(experiment=expr))
Exemplo n.º 46
0
    def roll_back(self, expr_id):
        """
        roll back when exception occurred
        :param expr_id: experiment id
        """
        self.log.debug("Starting rollback experiment %d..." % expr_id)
        expr = Experiment.objects(id=expr_id)
        if not expr:
            self.log.warn("rollback failed due to experiment not found")
            return

        starter = self.get_starter(expr.hackathon, expr.template)
        if not starter:
            self.log.warn("rollback failed due to no starter found")
            return

        return starter.rollback(Context(experiment=expr))
Exemplo n.º 47
0
    def __create_useful_k8s_dict(hackathon, experiment, template_unit):
        # FIXME K8s dict need a db model, not a dict
        user = experiment.user or None
        user_id = user.id if user else "None"

        _experiments = Experiment.objects(hackathon=hackathon).all()
        _virtual_envs = []
        for e in _experiments:
            _virtual_envs += list(e.virtual_environments)

        # TODO Need to check the rules about K8s resource name
        _names = [v.name for v in _virtual_envs]
        count = 0
        name = None
        while count < 100:
            count += 1
            name = "{}-{}-{}".format(template_unit.name, user_id, count)
            if name not in _names:
                break
        if count >= 100:
            raise RuntimeError("Can't get useful env name.")

        # TODO do not leave magic number
        _max_port = 31000
        for v in _virtual_envs:
            k8s_resource = v.k8s_resource
            _ports = k8s_resource['ports']
            for p in _ports:
                if not p[K8S_UNIT.PORTS_PUBLIC]:
                    continue
                if _max_port < p[K8S_UNIT.PORTS_PUBLIC_PORT]:
                    _max_port = p[K8S_UNIT.PORTS_PUBLIC_PORT]

        # Ensure that the external ports do not conflict
        ports = copy.deepcopy(template_unit.get_ports())
        for p in ports:
            if not p[K8S_UNIT.PORTS_PUBLIC]:
                continue
            _max_port += 1
            p[K8S_UNIT.PORTS_PUBLIC_PORT] = _max_port

        return {
            "name": "{}".format(name).lower(),
            "ports": ports,
        }
Exemplo n.º 48
0
    def scheduler_recycle_expr(self):
        """recycle experiment according to hackathon basic info on recycle configuration

        According to the hackathon's basic info on 'recycle_enabled', find out time out experiments
        Then call function to recycle them

        :return:
        """
        self.log.debug("start checking recyclable experiment ... ")
        for hackathon in self.hackathon_manager.get_recyclable_hackathon_list():
            # check recycle enabled
            mins = self.hackathon_manager.get_recycle_minutes(hackathon)
            # filter out the experiments that need to be recycled
            exprs = Experiment.objects(create_time__lt=self.util.get_now() - timedelta(minutes=mins),
                                       status=EStatus.RUNNING,
                                       hackathon=hackathon)
            for expr in exprs:
                self.__recycle_expr(expr)
    def __load_azure_key_id(self, context):
        # todo which key to use? how to support multi subscription?
        azure_key = None
        if "azure_key_id" in context:
            azure_key = AzureKey.objects(id=context.azure_key_id).first()

        if not azure_key:
            expr = Experiment.objects(id=context.experiment_id).only("azure_key").first()
            azure_key = expr.azure_key

        if not azure_key:
            hackathon = Hackathon.objects(id=context.hackathon_id).first()
            if not hackathon or (len(hackathon.azure_keys) == 0):
                raise Exception("no azure key configured")
            azure_key = hackathon.azure_keys[0]
            context.azure_key_id = azure_key.id

        return azure_key
    def __load_azure_key_id(self, context):
        # todo which key to use? how to support multi subscription?
        azure_key = None
        if "azure_key_id" in context:
            azure_key = AzureKey.objects(id=context.azure_key_id).first()

        if not azure_key:
            expr = Experiment.objects(id=context.experiment_id).only("azure_key").first()
            azure_key = expr.azure_key

        if not azure_key:
            hackathon = Hackathon.objects(id=context.hackathon_id).first()
            if not hackathon or (len(hackathon.azure_keys) == 0):
                raise Exception("no azure key configured")
            azure_key = hackathon.azure_keys[0]
            context.azure_key_id = azure_key.id

        return azure_key
    def __stop_virtual_machine_done(self, sctx):
        self.log.debug("azure virtual environment %d stop vm done" % sctx.current_job_index)

        try:
            # update the status of virtual environment
            expr = Experiment.objects(id=sctx.experiment_id).first()
            ve = expr.virtual_environments[sctx.current_job_index]

            self._on_virtual_environment_stopped(Context(
                experiment_id=expr.id,
                virtual_environment_name=ve.name))

            self.log.debug("azure virtual environment %d vm success callback done, step to next" % sctx.current_job_index)
            # step to config next unit
            sctx.current_job_index += 1
            self.__schedule_setup(sctx)
        except Exception as e:
            self.log.error(
                "azure virtual environment %d error while stopping vm: %r" %
                (sctx.current_job_index, e.message))
            self.__on_stop_virtual_machine_failed(sctx)
Exemplo n.º 52
0
    def delete_template(self, template_id):
        self.log.debug("delete template [%s]" % template_id)
        try:
            template = self.get_template_info_by_id(template_id)
            if template is None:
                return ok("already removed")
            # user can only delete the template which created by himself except super admin
            if g.user.id != template.creator.id and not g.user.is_super:
                return forbidden()
            if Experiment.objects(template=template).count() > 0:
                return forbidden("template already in use")

            # remove record in DB
            # the Hackathon used this template will imply the mongoengine's PULL reverse_delete_rule
            self.log.debug("delete template {}".format(template.name))
            template.delete()

            return ok("delete template success")
        except Exception as ex:
            self.log.error(ex)
            return internal_server_error("delete template failed")
Exemplo n.º 53
0
    def scheduler_recycle_expr(self):
        """recycle experiment according to hackathon basic info on recycle configuration

        According to the hackathon's basic info on 'recycle_enabled', find out time out experiments
        Then call function to recycle them

        :return:
        """
        self.log.debug("start checking recyclable experiment ... ")
        for hackathon in self.hackathon_manager.get_recyclable_hackathon_list():
            try:
                # check recycle enabled
                mins = self.hackathon_manager.get_recycle_minutes(hackathon)
                # filter out the experiments that need to be recycled
                exprs = Experiment.objects(create_time__lt=self.util.get_now() - timedelta(minutes=mins),
                                           status=EStatus.RUNNING,
                                           hackathon=hackathon)
                for expr in exprs:
                    self.__recycle_expr(expr)
            except Exception as e:
                self.log.error(e)
Exemplo n.º 54
0
    def __on_setup_failed(self, sctx):
        try:
            self.log.debug("azure virtual environment %d vm setup failed" % sctx.current_job_index)
            ctx = sctx.job_ctxs[sctx.current_job_index]
            expr = Experiment.objects(id=ctx.experiment_id).first()
            ve = expr.virtual_environments[sctx.current_job_index]

            ve.status = VEStatus.FAILED
            expr.status = EStatus.FAILED
            expr.save()
        finally:
            self.log.debug(
                "azure virtual environment %d vm fail callback done, roll back start"
                % sctx.current_job_index)
            try:
                # rollback reverse
                self.__setup_rollback(sctx.remote_created[::-1], sctx)
                self.log.debug("azure virtual environment %d rollback done" % sctx.current_job_index)
            except Exception as e:
                self.log.error(
                    "azure virtual environment %d error while rollback: %r" %
                    (sctx.current_job_index, str(e)))
Exemplo n.º 55
0
    def getConnectInfo(self):
        connection_name = request.args.get("name")
        self.log.debug("Guacamole connecion request, connection name: %s" % connection_name)
        expr = Experiment.objects(virtual_environments__name=connection_name).no_dereference().first()
        if not expr:
            return not_found("not_found")

        if expr.user.id != g.user.id:
            return forbidden("forbidden")

        remote_paras = expr.virtual_environments.get(name=connection_name).remote_paras
        # TODO Support DYNAMIC host/port in case of they cannot be determined on provision phase
        if K8S_UNIT.REMOTE_PARAMETER_HOST_NAME not in remote_paras:
            # TTT
            available_public_ips = self.util.safe_get_config("ukylin.k8s.ips",
                                                             ["119.3.202.71",
                                                              "49.4.90.39"
                                                              ])
            random_ip = available_public_ips[random.randint(0, len(available_public_ips) - 1)]
            remote_paras[K8S_UNIT.REMOTE_PARAMETER_HOST_NAME] = random_ip

        self.log.debug("get guacamole config by id: %s, paras: %r" % (connection_name, remote_paras))
        return remote_paras
    def _enable_guacd_file_transfer(self, context):
        """
        This function should be invoked after container is started in alauda_docker.py and hosted_docker.py
        :param ve: virtual environment
        """
        expr = Experiment.objects(id=context.experiment_id).no_dereference().first()
        virtual_env = expr.virtual_environments.get(name=context.virtual_environment_name)
        remote = virtual_env.remote_paras

        p = pexpect.spawn("scp -P %s %s %s@%s:/usr/local/sbin/guacctl" %
                          (remote["port"],
                           abspath("%s/../expr/guacctl" % dirname(realpath(__file__))),
                           remote["username"],
                           remote["hostname"]))
        i = p.expect([pexpect.TIMEOUT, 'yes/no', 'password: '******'password:'])

        if i != 0:
            p.sendline(remote["password"])
            p.expect(pexpect.EOF)
        p.close()
Exemplo n.º 57
0
    def schedule_create_k8s_service(self, context):
        template_unit = context.template_content.units[0]
        experiment = Experiment.objects.get(id=context.experiment_id)
        virtual_env = experiment.virtual_environments[0]
        k8s_dict = virtual_env.k8s_resource
        adapter = self.__get_adapter_from_ctx(K8SServiceAdapter, context)

        template_unit.set_ports(k8s_dict['ports'])
        labels = {
            "template_name": context.template_name,
            "hackathon_id": context.hackathon_id,
            "experiment_id": context.experiment_id,
        }
        try:
            deploy_name, port = adapter.create_k8s_environment(
                virtual_env.name, template_unit, labels=labels)

            expr = Experiment.objects(id=context.experiment_id).first()
            virtual_env = expr.virtual_environments[0]
            k8s_dict = virtual_env.k8s_resource
            vnc_port = k8s_dict['ports']
            vnc_port[0][K8S_UNIT.PORTS_PUBLIC_PORT] = port
            expr.save()

            # check deployment's status
            if self.__wait_for_k8s_status(adapter, virtual_env.name,
                                          K8S_DEPLOYMENT_STATUS.AVAILABLE):
                self.log.debug("k8s deployment succeeds: %s" % str(context))
                self.__on_create_success(context)
                return True
            else:
                self.log.error("k8s deployment fails: %s" % str(context))
                self.__on_message("k8s_service_create_failed", context)
        except Exception as e:
            self.__on_message("k8s_service_create_failed", context)

        return False