Exemplo n.º 1
0
    def __init__(self, *args, **kwargs):
        """
        This is a Singleton class.

        Args:
            *args:
            **kwargs:
        """
        if GlobalConfig.__instance__ is None:
            self.path = os.path.join(
                GlobalConfig.get_config_dir(), 'info.json')
            # Create default global config if it does not exist.
            path_utils.create_dir_recursive_if_not_exists(
                GlobalConfig.get_config_dir())

            if path_utils.file_exists(self.path):
                # Load the config
                self.load()
            else:
                # Set up a default config
                # True by default but user is always asked
                self['analytics_opt_in'] = True

            # Create user ID will save the whole thing
            self.user_id = self.create_user_id()

            super(GlobalConfig, self).__init__(*args, **kwargs)
            GlobalConfig.__instance__ = self
        else:
            raise Exception("You cannot create another GlobalConfig class!")
Exemplo n.º 2
0
    def to_config(path: Text,
                  artifact_store_path: Text = None,
                  metadata_store: Optional[Type[ZenMLMetadataStore]] = None,
                  pipelines_dir: Text = None):
        """
        Creates a default .zenml config at path/zenml/.zenml_config.

        Args:
            path (str): path to a directory.
            metadata_store: metadata store definition.
            artifact_store_path (str): path where to store artifacts.
            pipelines_dir (str): path where to store pipeline configs.
        """
        config_dir_path = os.path.join(path, ZENML_DIR_NAME)
        config_path = os.path.join(config_dir_path, ZENML_CONFIG_NAME)

        if path_utils.file_exists(config_path):
            raise AssertionError(f'.zenml file already exists at '
                                 f'{config_path}. '
                                 f'Cannot replace. Please delete the '
                                 f'{config_dir_path} directory first.')

        # Create config dir
        path_utils.create_dir_if_not_exists(config_dir_path)

        if artifact_store_path is None:
            artifact_store_path = \
                os.path.join(config_dir_path, ARTIFACT_STORE_DEFAULT_DIR)
        else:
            # if provided, then resolve it absolutely
            artifact_store_path = path_utils.resolve_relative_path(
                artifact_store_path)

        # create artifact_store path
        path_utils.create_dir_if_not_exists(artifact_store_path)

        if metadata_store is None:
            uri = os.path.join(artifact_store_path,
                               ML_METADATA_SQLITE_DEFAULT_NAME)
            from zenml.metadata import \
                SQLiteMetadataStore
            metadata_dict = SQLiteMetadataStore(uri).to_config()
        else:
            metadata_dict = metadata_store.to_config()

        if pipelines_dir is None:
            pipelines_dir = os.path.join(path, PIPELINES_DEFAULT_DIR_NAME)
        else:
            # if provided, still resolve
            pipelines_dir = path_utils.resolve_relative_path(pipelines_dir)

        path_utils.create_dir_if_not_exists(pipelines_dir)
        config_dict = {
            ARTIFACT_STORE_KEY: artifact_store_path,
            METADATA_KEY: metadata_dict,
            PIPELINES_DIR_KEY: pipelines_dir,
        }
        # Write initial config
        yaml_utils.write_yaml(config_path, config_dict)
Exemplo n.º 3
0
    def is_zenml_dir(path: Text):
        """
        Check if dir is a zenml dir or not.

        Args:
            path (str): path to the root.
        """
        config_path = os.path.join(path, ZENML_DIR_NAME, ZENML_CONFIG_NAME)
        if not path_utils.file_exists(config_path):
            return False
        return True
Exemplo n.º 4
0
def read_files_from_disk(pipeline: beam.Pipeline,
                         base_path: Text) -> beam.pvalue.PCollection:
    """
    The Beam PTransform used to read data from a collection of CSV files
    on a local file system.
    Args:
        pipeline: Input beam.Pipeline object coming from a TFX Executor.
        base_path: Base path pointing either to the directory containing the
         CSV files, or to a (single) CSV file.

    Returns:
        A beam.PCollection of data points. Each row in the collection of
         CSV files represents a single data point.

    """
    wildcard_qualifier = "*"
    file_pattern = os.path.join(base_path, wildcard_qualifier)

    if path_utils.is_dir(base_path):
        csv_files = path_utils.list_dir(base_path)
        if not csv_files:
            raise RuntimeError(
                'Split pattern {} does not match any files.'.format(
                    file_pattern))
    else:
        if path_utils.file_exists(base_path):
            csv_files = [base_path]
        else:
            raise RuntimeError(f'{base_path} does not exist.')

    # weed out bad file exts with this logic
    allowed_file_exts = [".csv", ".txt"]  # ".dat"
    csv_files = [
        uri for uri in csv_files
        if os.path.splitext(uri)[1] in allowed_file_exts
    ]

    logger.info(f'Matched {len(csv_files)}: {csv_files}')

    # Always use header from file
    logger.info(f'Using header from file: {csv_files[0]}.')
    column_names = path_utils.load_csv_header(csv_files[0])
    logger.info(f'Header: {column_names}.')

    parsed_csv_lines = (
        pipeline
        | 'ReadFromText' >> beam.io.ReadFromText(file_pattern=base_path,
                                                 skip_header_lines=1)
        | 'ParseCSVLine' >> beam.ParDo(csv_decoder.ParseCSVLine(delimiter=','))
        | 'ExtractParsedCSVLines' >>
        beam.Map(lambda x: dict(zip(column_names, x[0]))))

    return parsed_csv_lines
Exemplo n.º 5
0
def read_json(file_path: Text):
    """
    Read JSON on file path and returns contents as dict.

    Args:
        file_path (str): Path to JSON file.
    """
    if path_utils.file_exists(file_path):
        with open(file_path, 'r') as f:
            return json.loads(f.read())
    else:
        raise Exception(f'{file_path} does not exist.')
Exemplo n.º 6
0
def read_yaml(file_path: Text):
    """
    Read YAML on file path and returns contents as dict.

    Args:
        file_path (str): Path to YAML file.
    """
    if path_utils.file_exists(file_path):
        with open(file_path, 'r') as f:
            return yaml.load(f.read(), Loader=yaml.FullLoader)
    else:
        raise Exception(f'{file_path} does not exist.')
Exemplo n.º 7
0
    def add_gitignore(self, items: List[Text]):
        """
        Adds `items` to .gitignore, if .gitignore exists. Otherwise creates
        and adds.

        Args:
            items (list[str]): Items to add.
        """
        str_items = '\n'.join(items)
        str_items = '\n\n# ZenML\n' + str_items

        gitignore_path = os.path.join(self.repo_path, '.gitignore')
        if not path_utils.file_exists(gitignore_path):
            path_utils.create_file_if_not_exists(gitignore_path, str_items)
        else:
            path_utils.append_file(gitignore_path, str_items)