예제 #1
0
class XprDbSetup:
    """
        Class that provides tools to setup mongodb on a node
    """

    def __init__(self, executor=None):
        if not executor:
            self.executor = LocalShellExecutor()
        self.logger = XprLogger()
        self.service_path = '/lib/systemd/system/mongod.service'

    def install_mongo(self):
        """
        installs mongodb on the system
        """
        self.logger.info('entering install_mongo method')
        if not linux_utils.check_root():
            self.logger.fatal("Please run this as root")
        import_key = 'sudo apt-key adv --keyserver ' \
                     'hkp://keyserver.ubuntu.com:80 --recv ' \
                     '9DA31620334BD75D9DCB49F368818C72E52529D4'
        self.executor.execute(import_key)
        create_list = 'echo "deb [ arch=amd64 ] https://repo.mongodb.org/' \
                      'apt/ubuntu bionic/mongodb-org/4.0 multiverse" | ' \
                      'sudo tee /etc/apt/sources.list.d/mongodb-org-4.0.list'
        self.executor.execute(create_list)
        reload_packages = 'sudo apt-get update'
        self.executor.execute(reload_packages)
        self.logger.debug('installing mongo')
        install_mongo = 'sudo apt-get install -y mongodb-org'
        self.executor.execute(install_mongo)
        hold = """echo "mongodb-org hold" | sudo dpkg --set-selections
                  echo "mongodb-org-server hold" | sudo dpkg --set-selections
                  echo "mongodb-org-shell hold" | sudo dpkg --set-selections
                  echo "mongodb-org-mongos hold" | sudo dpkg --set-selections
                  echo "mongodb-org-tools hold" | sudo dpkg --set-selections"""
        self.executor.execute(hold)
        self.logger.info('exiting install_mongo')

    def initial_setup(self, db):
        """
        sets up the initial users and collections in the db
        :param db: database against which the setup is to be done
        :return: nothing
        """
        self.logger.info('entering initial_setup method')
        # initiate users collection
        users = db.users
        self.insert_default_users(users)
        db.users.create_index([('uid', ASCENDING)], unique=True)
        self.logger.debug('created index for users collection')

        # initiate nodes collection
        nodes = db.nodes
        self.logger.debug('setting up initial node')
        initial_node = {
            "name": "initial_node",
            "address": ""
        }
        nodes.insert_one(initial_node)
        nodes.create_index([('address', ASCENDING)], unique=True)
        self.logger.debug('created index for nodes collection')
        nodes.delete_one({"name": "initial_node"})

        # initiate clusters collection
        clusters = db.clusters
        self.logger.debug('setting up initial cluster')
        initial_cluster = {
            "name": "initial_cluster",
            "activationStatus": True,
            "master_nodes": [],
            "worker_nodes": []
        }
        clusters.insert_one(initial_cluster)
        clusters.create_index([('name', ASCENDING)], unique=True)
        self.logger.debug('created index for clusters collection')
        clusters.delete_one({"name": "initial_cluster"})

        # initiate projects collection
        projects = db.projects
        self.logger.debug('setting up initial project')
        initial_project = {
            "name": "initial_project",
            "projectDescription": "Initiates the collection",
            "owner": {},
            "developers": [],
            "components": []
        }
        projects.insert_one(initial_project)
        projects.create_index([('name', ASCENDING)], unique=True)
        self.logger.debug('created index for projects collection')
        projects.delete_one({"name": "initial_project"})

        # create xprdb_admin user in mongo
        self.logger.debug('creating xprdb user in mongo')
        db.command("createUser", "xprdb_admin", pwd="xprdb@Abz00ba",
                   roles=[{"role": "root", "db": "admin"}])
        self.logger.info('exiting initial_setup method')

    def insert_default_users(self, users):
        self.logger.debug('setting up default users')
        admin_user = {
            "uid": "xprdb_admin",
            "firstName": "Xpresso",
            "lastName": "Admin",
            "pwd": sha512_crypt.hash('xprdb@Abz00ba'),
            "email": "*****@*****.**",
            "primaryRole": "Admin",
            "activationStatus": True,
            "loginStatus": False
        }
        users.insert_one(admin_user)
        superuser = {
            "uid": "superuser1",
            "firstName": "superuser1",
            "lastName": "superuser1",
            "pwd": sha512_crypt.hash('superuser1'),
            "email": "*****@*****.**",
            "primaryRole": "Su",
            "activationStatus": True,
            "loginStatus": False
        }
        users.insert_one(superuser)
        admin1_user = {
            "uid": "admin1",
            "firstName": "admin1",
            "lastName": "admin1",
            "pwd": sha512_crypt.hash('admin1'),
            "email": "*****@*****.**",
            "primaryRole": "Admin",
            "activationStatus": True,
            "loginStatus": False
        }
        users.insert_one(admin1_user)

    def enable_replication(self):
        """
        installs replica set for the database
        :return: nothing
        """
        self.logger.info('entering enable_replication method')
        path = '/srv/mongodb/rs0-0'
        linux_utils.create_directory(path, 0o777)
        self.logger.debug('created directory for replica set')
        ip = linux_utils.get_ip_address()
        start = 'mongod --replSet rs0 --port 27017 --bind_ip localhost,' \
                '{} --dbpath /srv/mongodb/rs0-0 --fork ' \
                '--logpath /var/log/mongodb/mongod.log'.format(ip)
        self.executor.execute(start)
        self.logger.debug('mongo daemon started')
        client = MongoClient('localhost', replicaset='rs0')
        db = client.xprdb
        client.admin.command("replSetInitiate")
        self.logger.debug('Replica set initiated')
        time.sleep(5)
        self.initial_setup(db)
        # stop mongo to restart with auth
        stop_mongod = 'pgrep mongod | xargs kill'
        self.executor.execute(stop_mongod)
        self.logger.debug('stopping mongo daemon to restart with auth')
        time.sleep(10)
        restart = 'mongod --replSet rs0 --port 27017 --bind_ip localhost,{} ' \
                  '--dbpath /srv/mongodb/rs0-0 --auth --fork --logpath ' \
                  '/var/log/mongodb/mongod.log'.format(ip)
        config = configparser.ConfigParser()
        config.read(self.service_path)
        config['Service']['ExecStart'] = restart
        with open(self.service_path, 'w') as f:
            config.write(f)
        restart_mongod = 'systemctl restart mongod'
        self.executor.execute(restart_mongod)
        self.logger.debug('db setup complete, exiting enable_replication')
class DeclarativePipelineBuilder:

    # all the pipeline reference variables will be stored in this array as
    # they are discovered by the code so that we can check for any faulty
    # reference made which is not present in the reference array
    reference_array = []

    def __init__(self, persistence_manager):
        self.kubeflow_utils = KubeflowUtils(persistence_manager)
        self.logger = XprLogger()
        self.executor = LocalShellExecutor()
        config_path = XprConfigParser.DEFAULT_CONFIG_PATH
        self.config = XprConfigParser(config_path)
        self.declarative_pipeline_folder = self.config[PROJECTS_SECTION][
            DECLARATIVE_PIPELINE_FOLDER]
        self.content = self.declarative_pipeline_folder_check()

    def declarative_pipeline_folder_check(self):
        """
        checks whether declarative pipeline folder is present
        Returns: contents of template

        """
        if not os.path.isdir(self.declarative_pipeline_folder):
            os.makedirs(self.declarative_pipeline_folder, permission_755)
        kubeflow_template = self.config[PROJECTS_SECTION][KUBEFLOW_TEMPLATE]
        try:
            with open(kubeflow_template, 'r') as f:
                template_content = f.read()
                return template_content
        except FileNotFoundException:
            self.logger.error('kubeflow template file not found')

    def prevalidate_declarative_json(self, pipeline_info):
        """
        Validates (with dummy data) if the pipeline yaml file is being created
        properly before adding pipeline as a part of project.
        Args:
            pipeline_info: declarative JSON file

        Returns: nothing

        """
        self.logger.info('entering prevalidate_declarative_json')
        temp_component_images = {}
        self.logger.info('creating dict with temporary component images')
        for component in pipeline_info['components']:
            self.validate_component_keys(component.keys())
            temp_component_images[
                component['xpresso_reference']] = "temp_image"
        self.generate_pipeline_file(pipeline_info, temp_component_images, 0)
        self.logger.info('Pipeline validated.')

    def check_for_reference(self, value):
        """
        Checks if the provided value has any faulty reference.
        Args:
            value: value to be checked

        Returns: raises exception if reference is not found

        """
        self.logger.info(f'entering check_for_reference to '
                         f'validate {value}')
        if '.output' in value:
            reference = value.split('.')[0]
            self.check_for_reference(reference)
            if open_parenthesis in reference:
                # in case of typecasting
                reference = reference.split(open_parenthesis)[1]
            if reference not in self.reference_array:
                self.logger.error(f'Reference "{reference}" not found.')
                raise ReferenceNotFoundException(
                    f'Reference "{reference}" not '
                    f'found in declarative JSON')
        self.logger.info('Reference validated. Exiting.')

    def modify_for_function_parameters(self, func_params):
        """
        modifies a string (json key-value pair) to be used as a function's
        parameters
        Args:
            func_params: json key-value pair string (in xpresso defined format)

        Returns: modified string, fit for using as a function's parameters

        """
        self.logger.info('entering modify_for_function_parameters')
        param_list = []
        for key, value in func_params.items():
            modified_key = key.replace(variable_indicator, "")
            if variable_indicator not in str(value):
                if double_quote in value:
                    value = value.replace(double_quote, escape_quote)
                modified_value = f'"{value}"'
            else:
                modified_value = value.replace(variable_indicator, "")
            # check for any faulty reference
            self.check_for_reference(modified_value)
            param_list.append(f'{modified_key}={modified_value}')
            self.reference_array.append(modified_key)
        result = ', '.join(param_list)
        self.logger.info(f'exiting modify_for_function_parameters with '
                         f'output {result}')
        return result

    def modify_for_function_variables(self, func_vars):
        """
        modifies a string (json key-value pair) to be used as a function's
        variables
        Args:
            func_vars: json key-value pair string (in xpresso defined format)

        Returns: modified string, fit for use as a function's variables

        """
        self.logger.info('entering modify_for_function_variables')
        result = """"""
        for key, value in func_vars.items():
            modified_key = key.replace(variable_indicator, "")
            if variable_indicator not in value:
                if double_quote in value:
                    value = value.replace(double_quote, escape_quote)
                modified_value = f'"{value}"'
            else:
                modified_value = value.replace(variable_indicator, "")
            # check for any faulty reference
            self.check_for_reference(modified_value)
            self.reference_array.append(modified_key)
            result = result + f'{modified_key} = {modified_value}\n\t'
        self.logger.info(f'exiting modify_for_function_variables with '
                         f'output {result}')
        return result

    def validate_declarative_json(self, pipeline_info):
        """
        validates the mandatory fields in the provided declarative json
        Args:
            pipeline_info: contents of the json file

        Returns: Raises exception in case of inconsistency

        """
        self.logger.info('entering validate_declarative_json method')
        if not pipeline_info:
            self.logger.error('Declarative JSON empty.')
            raise IncorrectDeclarativeJSONDefinitionException(
                'Declarative JSON empty.')
        pipeline_fields = [
            'name', 'description', 'pvc_name', 'components', 'main_func_params'
        ]
        for field in pipeline_fields:
            if field not in pipeline_info.keys():
                self.logger.error(f'Field "{field}" not present in '
                                  f'declarative JSON')
                raise IncorrectDeclarativeJSONDefinitionException(
                    f'Field "{field}" not present in declarative JSON')

    def validate_component_keys(self, component_keys):
        """
        Validates if the component has all default keys present
        Args:
            component_keys: keys present in the component

        Returns: nothing

        """
        default_keys = [
            'name', 'xpresso_reference', 'description', 'inputs',
            'input_values', 'implementation'
        ]
        for key in default_keys:
            if key not in component_keys:
                self.logger.error(f'Key "{key}" is missing from one or more '
                                  f'components in pipeline JSON')
                raise ComponentsSpecifiedIncorrectlyException(
                    f'Key "{key}" is missing from one or more components '
                    f'in pipeline JSON')

    def generate_pipeline_file(self, pipeline_info, component_images,
                               pipeline_deploy_id):
        """
        generates a python dsl pipeline file using the provided declarative
        json, executes it and uploads the pipeline to kubeflow.
        Args:
            component_images: dict of pipeline components and their
            corresponding docker images
            pipeline_info: declarative json file containing info
            about pipeline
            pipeline_deploy_id : deploy version id of pipeline fetched from
                                database
        Returns: ambassador port to view the pipeline on dashboard

        """
        self.logger.info('entering generate_python_file method')
        self.logger.debug('reading declarative json')

        # check for mandatory fields
        self.validate_declarative_json(pipeline_info)

        # generate code to load pipeline component objects
        components_info = self.generate_pipeline_component_objects(
            pipeline_info)

        # populate the pipeline name and description
        self.populate_name_and_description(pipeline_info)

        # populate main function's parameters
        self.populate_main_func_parameters(pipeline_info)

        # populate main function's variables, if any
        self.populate_main_func_variables(pipeline_info)

        # populate container op, if present
        self.populate_container_op(pipeline_info)

        # generate and populate component definitions with inputs
        self.populate_component_definitions(pipeline_info, components_info)

        # update pipeline yaml location
        pipeline_yaml_location = self.update_pipeline_yaml_location(
            pipeline_deploy_id, pipeline_info)

        # finally, populate and generate the python file
        self.generate_pipeline_python_file(pipeline_deploy_id, pipeline_info)

        # create yaml file for the generated python file to read components from
        self.create_pipeline_yaml(component_images, pipeline_info,
                                  pipeline_yaml_location)

        # run the generated python file to generate the zip file
        self.logger.debug('running generated python file')
        pipeline_file = f'{self.declarative_pipeline_folder}' \
            f'/{pipeline_info["name"]}--declarative_pipeline' \
            f'_{pipeline_deploy_id}.py'
        run_pipeline_python = f'python {pipeline_file}'
        status = self.executor.execute(run_pipeline_python)
        if status:
            raise IncorrectDeclarativeJSONDefinitionException(
                "Failed to run pipeline dsl file. "
                "Please re-check the declarative JSON file.")
        pipeline_zip = f'{pipeline_file}.zip'
        return pipeline_zip

    def create_pipeline_yaml(self, component_images, pipeline_info,
                             pipeline_yaml_location):
        """
        creates yaml file for dsl code to read components from
        Args:
            component_images: dict of pipeline components and their
            corresponding docker images
            pipeline_info: pipeline info from declarative json
            pipeline_yaml_location: location where the file is to be generated

        Returns: nothing

        """
        self.logger.debug('creating yaml for generated python file')
        temp_pipeline = deepcopy(pipeline_info)
        modified_components = temp_pipeline['components']
        for component in modified_components:
            component['implementation']['container']['image'] \
                = component_images[component['xpresso_reference']]
            del component['xpresso_reference']
            del component['input_values']
        data_to_insert = {"components": modified_components}
        with open(pipeline_yaml_location, 'w+') as f:
            f.write(yaml.dump(data_to_insert))

    def generate_pipeline_python_file(self, pipeline_deploy_id, pipeline_info):
        """
        generates pipeline python file
        Args:
            pipeline_deploy_id: deploy version id of pipeline fetched from
                                database
            pipeline_info: pipeline info from declarative json

        Returns: nothing

        """
        self.logger.debug('generating python file')
        with open(
                f'{self.declarative_pipeline_folder}/{pipeline_info["name"]}'
                f'--declarative_pipeline_{pipeline_deploy_id}.py', 'w+') as f:
            f.write(self.content)

    def update_pipeline_yaml_location(self, pipeline_deploy_id, pipeline_info):
        """
        updates location where pipeline yaml will be generated
        Args:
            pipeline_deploy_id: deploy version id of pipeline fetched from
                                database
            pipeline_info: pipeline info from declarative json

        Returns: yaml location

        """
        pipeline_yaml_location = f"{self.declarative_pipeline_folder}" \
            f"/{pipeline_info['name']}--pipeline_components_file_" \
            f"{pipeline_deploy_id}.yaml"
        self.content = self.content.replace('%pipeline_yaml_location%',
                                            f"'{pipeline_yaml_location}'")
        return pipeline_yaml_location

    def populate_container_op(self, pipeline_info):
        """
        populates container op
        Args:
            pipeline_info: pipeline info from declarative json

        Returns: nothing

        """
        if 'container_op' not in pipeline_info.keys():
            self.logger.debug('container op not present')
            self.content = self.content.replace('%container_op%', '')
        else:
            self.logger.debug('populating container op')
            checkout = f"""\t{str(pipeline_info['container_op'][
                                      '$$name$$'])} = dsl.ContainerOp({self.modify_for_function_parameters(
                pipeline_info['container_op'])})"""
            if 'checkout' in pipeline_info['after_dependencies'].keys():
                checkout = checkout + f"""\n\n\tcheckout.after({
                pipeline_info['after_dependencies']['checkout']})"""
            self.reference_array.append('checkout')
            self.content = self.content.replace('%container_op%', checkout)

    def populate_main_func_variables(self, pipeline_info):
        """
        populates main function variables
        Args:
            pipeline_info: pipeline info from declarative json

        Returns: nothing

        """
        if 'main_func_variables' in pipeline_info.keys():
            self.logger.debug("populating main function's variables")
            main_variables = "\t" + self.modify_for_function_variables(
                pipeline_info['main_func_variables'])
            self.content = self.content.replace('%main_function_variables%',
                                                main_variables)
        else:
            self.logger.debug('No variables found for main function')
            self.content = self.content.replace('%main_function_variables%',
                                                '')

    def generate_pipeline_component_objects(self, pipeline_info):
        """
        generates code to load pipeline component objects
        Args:
            pipeline_info: pipeline info from declarative json

        Returns: components info

        """
        self.logger.info('generating code to load pipeline component objects')
        pipeline_comps = """"""
        components_info = pipeline_info['components']
        self.reference_array.extend([comp['name'] for comp in components_info])
        for index, component in enumerate(components_info):
            self.validate_component_keys(component.keys())
            pipeline_comps = pipeline_comps + f"{component['name']}_ = " \
                f"components.load_component_from_text(str(" \
                f"component_info[{index}]))\n"
        self.content = self.content.replace('%load_components%',
                                            pipeline_comps)
        return components_info

    def populate_name_and_description(self, pipeline_info):
        """
        populates the pipeline name and description
        Args:
            pipeline_info: pipeline info from declarative json

        Returns: nothing

        """
        self.logger.debug('populating the pipeline name and description')
        self.content = self.content.replace("%pipeline_name%",
                                            f"'{pipeline_info['name']}'")
        self.content = self.content.replace(
            '%pipeline_description%', f"'{pipeline_info['description']}'")

    def populate_main_func_parameters(self, pipeline_info):
        """
        populates main function parameters
        Args:
            pipeline_info: pipeline info from declarative json

        Returns: nothing

        """
        self.logger.debug("populate main function's parameters")
        main_params = self.modify_for_function_parameters(
            pipeline_info['main_func_params'])
        self.content = self.content.replace('%main_function_params%',
                                            main_params)

    def populate_component_definitions(self, pipeline_info, components_info):
        """
        populates component definitions
        Args:
            pipeline_info: pipeline info from declarative json
            components_info: components info in declarative json

        Returns: nothing

        """
        self.logger.debug('populating component definitions with inputs')
        component_definitions = """"""
        for index, component in enumerate(components_info):
            if index == 0:
                add_pvc = \
                    f"add_volume(k8s_client.V1Volume(name='pipeline-nfs', " \
                    f"persistent_volume_claim=k8s_client." \
                    f"V1PersistentVolumeClaimVolumeSource(claim_name=" \
                    f"'{pipeline_info['pvc_name']}'))).add_volume_mount(" \
                    f"k8s_client.V1VolumeMount(" \
                    f"mount_path='/data', name='pipeline-nfs'))"
            else:
                add_pvc = "add_volume_mount(k8s_client.V1VolumeMount(" \
                          "mount_path='/data', name='pipeline-nfs'))"
            component_definitions = \
                component_definitions + \
                f"\t{component['name']} = {component['name']}_(" \
                f"{self.modify_for_function_parameters(component['input_values'])}).{add_pvc}\n\n"

            if 'after_dependencies' in pipeline_info.keys():
                if component['name'] in pipeline_info[
                        'after_dependencies'].keys():
                    component_definitions = \
                        component_definitions + \
                        f"\t{component['name']}.after({pipeline_info['after_dependencies'][component['name']]})\n\n"
        self.content = self.content.replace('%component_definitions%',
                                            component_definitions)