Пример #1
0
def tempdir():
    with TempDirectory() as d:
        original_dir = os.getcwd()
        os.chdir(d.path)

        # clear all Singletons at the beginning of the test
        Singleton._instances = {}

        # set rootdir
        paths.set_rootdir(d.path)

        yield d
        os.chdir(original_dir)
Пример #2
0
    def load(self, profile='default', rootpath=None):
        """
        Performs the following steps:
            - set rootdir for the given project
            - import variables from  <rootdir>/.env (if present),
            - load the `profile` from the metadata files
            - setup and start the data engine

        :param profile: load the given metadata profile (default: 'default')
        
        :param rootpath: root directory for loaded project 
               default behaviour: search parent dirs to detect rootdir by 
               looking for a '__main__.py' or 'main.ipynb' file. 
               When such a file is found, the corresponding directory is the 
               root path for the project. If nothing is found, the current 
               working directory, will be the rootpath

        :return: None

        Notes abount metadata configuration:

        1)  Metadata files are merged up, so you can split the information in 
            multiple files as long as they end with `metadata.yml`. 

            For example: `metadata.yml`, `abc.metadata.yaml`, `abc_metadata.yml` 
            are all valid metadata file names.

        2)  All metadata files in all subdirectories from the project root directory 
            are loaded, unless the directory contains a file `metadata.ignore.yml`

        3)  Metadata files can provide multiple profile configurations,
            by separating each profile configuration with a Document Marker 
            ( a line with `---`) (see https://yaml.org/spec/1.2/spec.html#YAML)

        4)  Each metadata profile, can be broken down in multiple yaml files,
            When loading the files all configuration belonging to the same profile 
            with be merged.

        5)  All metadata profiles inherit the settings from profile 'default'

        Metadata files are composed of 6 sections:
            - profile
            - variables
            - providers
            - resources
            - engine
            - loggers

        For more information about metadata configuration,
        type `help(datafaucet.project.metadata)`    
        """

        if self.loaded and self._no_reload:
            logging.notice(f"Profile {self._profile} already loaded. "
                           "Skipping project.load()")
            return self

        # set rootpath
        paths.set_rootdir(rootpath)

        # set loaded to false
        self.loaded = False

        # set username
        self._username = getpass.getuser()

        # get repo data
        self._repo = repo_data()

        # set session name
        L = [self._profile, self._repo.get('name')]
        self._session_name = '-'.join([x for x in L if x])

        # set session id
        self._session_id = hex(uuid.uuid1().int >> 64)

        # get currently running script path
        self._script_path = files.get_script_path(paths.rootdir())

        # set dotenv default file, check the file exists
        self._dotenv_path = files.get_dotenv_path(paths.rootdir())

        # get files
        self._metadata_files = files.get_metadata_files(paths.rootdir())
        self._notebook_files = files.get_jupyter_notebook_files(
            paths.rootdir())
        self._python_files = files.get_python_files(paths.rootdir())

        # metadata defaults
        dir_path = os.path.dirname(os.path.realpath(__file__))
        default_md_files = [os.path.join(dir_path, 'schemas/default.yml')]
        project_md_files = abspath(self._metadata_files, paths.rootdir())

        # load metadata
        try:
            md_paths = default_md_files + project_md_files
            dotenv_path = abspath(self._dotenv_path, paths.rootdir())

            metadata.load(profile, md_paths, dotenv_path)
        except ValueError as e:
            print(e)

        # bail if no metadata
        if metadata.profile is None:
            raise ValueError('No valid metadata to load.')

        # set profile from metadata
        self._profile_name = metadata.info()['active']

        # add roothpath to the list of python sys paths
        if paths.rootdir() not in sys.path:
            sys.path.append(paths.rootdir())

        # stop existing engine
        if self._engine:
            self._engine.stop()

        #services
        services = dict()

        all_aliases = list(metadata.profile()['providers'].keys())

        # get services from aliases
        for alias in all_aliases:
            r = Resource(alias)
            services[r['service']] = r

        # get one service from each type to
        # load drivers, jars etc via the engine init
        services = list(services.values())

        #initialize the engine
        md = metadata.profile()['engine']
        engines.Engine(md['type'],
                       session_name=self._session_name,
                       session_id=self._session_id,
                       master=md['master'],
                       timezone=md['timezone'],
                       jars=md['submit']['jars'],
                       packages=md['submit']['packages'],
                       pyfiles=md['submit']['py-files'],
                       files=md['submit']['files'],
                       repositories=md['submit']['repositories'],
                       conf=md['submit']['conf'],
                       services=services)

        # initialize logging
        logging.init(metadata.profile()['loggers'], self._session_id,
                     self._username, self._script_path, self._repo['name'],
                     self._repo['hash'])

        # set loaded to True
        self.loaded = True

        # return object
        return self