示例#1
0
def setup_keras(seed=None,
                profile=None,
                backend='theano',
                device=None,
                BLAS_thread_count=None,
                BLAS_MKL_CNR=True,
                nvcc_fastmath=None,
                theano_floatX=None,
                theano_optimizer=None,
                theano_OpenMP=None,
                theano_deterministic=None,
                verbose=True):
    """Setup Keras and environmental variables effecting on it.
    Given parameters are used to override ones specified in keras.json file.

    Parameters
    ----------
    seed : int, optional
        Randomization seed. If none given, no seed is set.

    profile : str, optional
        Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters.

    backend : str
        Keras backend ['theano', 'tensorflow']
        Default value 'theano'

    device : str, optional
        Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1']

    BLAS_thread_count : int
        Number of thread used for BLAS libraries

    BLAS_MKL_CNR : bool
        Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL.
        Default value True

    nvcc_fastmath : str, optional
        Control the usage of fast math library in NVCC

    theano_floatX : str, optional
        Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16']

    theano_optimizer : str, optional
        Optimizer ['fast_run', 'merge', 'fast_compile', 'None']

    theano_OpenMP : bool, optional
        Enable or disable parallel computation on the CPU with OpenMP.

    theano_deterministic : bool, optional

    verbose : bool
        Print information
        Default value True
    """
    def logger():
        logger_instance = logging.getLogger(__name__)
        if not logger_instance.handlers:
            setup_logging()
        return logger_instance

    if profile:
        if profile == 'deterministic':
            if seed is None:
                message = 'You should set randomization seed to get deterministic behaviour.'
                logger().exception(message)
                raise AttributeError(message)

            # Parameters to help to get deterministic results
            device = 'cpu'
            BLAS_thread_count = 1
            BLAS_MKL_CNR = True
            nvcc_fastmath = False
            theano_optimizer = 'None'
            theano_OpenMP = False
            theano_deterministic = True

        elif profile == 'cuda0_fast':
            device = 'cuda0'
            BLAS_thread_count = 8
            BLAS_MKL_CNR = True
            nvcc_fastmath = True
            theano_optimizer = 'fast_run'
            theano_OpenMP = True
            theano_deterministic = True

        else:
            message = 'Invalid Keras setup profile [{profile}].'.format(
                profile=profile)
            logger().exception(message)
            raise AttributeError(message)

    # Set seed first
    if seed:
        numpy.random.seed(seed)
        random.seed(seed)

    # Check parameter validity
    if backend and backend not in ['theano', 'tensorflow']:
        message = 'Invalid Keras backend type [{backend}].'.format(
            backend=backend)
        logger().exception(message)
        raise AttributeError(message)

    if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']:
        message = 'Invalid Keras device type [{device}].'.format(device=device)
        logger().exception(message)
        raise AttributeError(message)

    if theano_floatX and theano_floatX not in [
            'float64', 'float32', 'float16'
    ]:
        message = 'Invalid Keras floatX type [{floatX}].'.format(
            floatX=theano_floatX)
        logger().exception(message)
        raise AttributeError(message)

    if theano_optimizer and theano_optimizer not in [
            'fast_run', 'merge', 'fast_compile', 'None'
    ]:
        message = 'Invalid Keras optimizer type [{optimizer}].'.format(
            optimizer=theano_optimizer)
        logger().exception(message)
        raise AttributeError(message)

    ui = FancyLogger()
    if verbose:
        ui.sub_header('Keras setup')

    # Get BLAS library associated to numpy
    if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info:
        blas_libraries = numpy.__config__.blas_opt_info['libraries']
    else:
        blas_libraries = ['']

    blas_extra_info = []

    # Select Keras backend
    os.environ["KERAS_BACKEND"] = backend

    # Threading
    if BLAS_thread_count:
        os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count)
        os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count)
        os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count)
        blas_extra_info.append(
            'Threads[{threads}]'.format(threads=BLAS_thread_count))

        if BLAS_thread_count > 1:
            os.environ['OMP_DYNAMIC'] = 'False'
            os.environ['MKL_DYNAMIC'] = 'False'
        else:
            os.environ['OMP_DYNAMIC'] = 'True'
            os.environ['MKL_DYNAMIC'] = 'True'

    # Conditional Numerical Reproducibility (CNR) for MKL BLAS library
    if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'):
        os.environ['MKL_CBWR'] = 'COMPATIBLE'
        blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE'))

    # Show BLAS info
    if verbose:
        if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info:
            blas_libraries = numpy.__config__.blas_opt_info['libraries']
            if blas_libraries[0].startswith('openblas'):
                ui.data(field='BLAS library',
                        value='OpenBLAS ({info})'.format(
                            info=', '.join(blas_extra_info)))

            elif blas_libraries[0].startswith('blas'):
                ui.data(field='BLAS library',
                        value='BLAS/Atlas ({info})'.format(
                            info=', '.join(blas_extra_info)))

            elif blas_libraries[0].startswith('mkl'):
                ui.data(field='BLAS library',
                        value='MKL ({info})'.format(
                            info=', '.join(blas_extra_info)))

    # Set backend and parameters before importing keras
    if verbose:
        ui.data(field='Backend', value=backend)

    if backend == 'theano':
        # Theano setup

        # Default flags
        flags = [
            # 'ldflags=',
            'warn.round=False',
        ]

        # Set device
        if device:
            flags.append('device=' + device)

        # Set floatX
        if theano_floatX:
            flags.append('floatX=' + theano_floatX)

            if verbose:
                ui.data(field='floatX', value=theano_floatX)

        # Set optimizer
        if theano_optimizer is not None:
            flags.append('optimizer=' + theano_optimizer)

        # Set fastmath for GPU mode only
        if nvcc_fastmath and device != 'cpu':
            if nvcc_fastmath:
                flags.append('nvcc.fastmath=True')
            else:
                flags.append('nvcc.fastmath=False')

        # Set OpenMP
        if theano_OpenMP is not None:
            if theano_OpenMP:
                flags.append('openmp=True')
            else:
                flags.append('openmp=False')

        if theano_deterministic is not None:
            if theano_deterministic:
                flags.append('deterministic=more')
            else:
                flags.append('deterministic=default')

        if verbose:
            ui.line('Theano', indent=2)

            for item in flags:
                ui.data(field=item.split('=')[0],
                        value=item.split('=')[1],
                        indent=4)

        # Set environmental variable for Theano
        os.environ["THEANO_FLAGS"] = ','.join(flags)

    elif backend == 'tensorflow':
        flags = []
        # Tensorflow setup
        if verbose:
            ui.line('Tensorflow', indent=2)

        # Set device
        if device:
            flags.append('device=' + device)

            # In case of CPU disable visible GPU.
            if device == 'cpu':
                os.environ["CUDA_VISIBLE_DEVICES"] = ''

        if verbose:
            ui.line('Tensorflow', indent=2)

            for item in flags:
                ui.data(field=item.split('=')[0],
                        value=item.split('=')[1],
                        indent=4)

    with SuppressStdoutAndStderr():
        # Import keras and suppress backend announcement printed to stderr
        import keras

    if verbose:
        ui.foot()
示例#2
0
    def pack(self,
             dataset_name='dcase-dataset',
             content=None,
             output_path=None,
             base_path=None,
             overwrite=False,
             verbose=True):
        """Pack dataset.

        Parameters
        ----------
        dataset_name : str
            Dataset name
            Default value 'dcase-dataset'

        content : list of dict
            List of packages to be packed. Package item dict should have format {'data_name': 'doc', 'file_list': [{'source': 'file1.txt'}]}.
            Default value None

        output_path : str
            Path to which packages are saved.
            Default value None

        base_path : str
            Base path of the data. If per item package paths are not given ('target' field), this parameter is used
            to create one from source path.
            Default value None

        overwrite : bool
            Overwrite existing packages.
            Default value False

        verbose : bool
            Show information during the packing.
            Default value True

        Returns
        -------
        nothing

        """

        if verbose:
            log = FancyLogger()
            log.section_header('Packing dataset [{dataset_name}]'.format(
                dataset_name=dataset_name))

        if base_path is not None and not base_path.endswith(os.path.sep):
            base_path += os.path.sep

        for group in content:
            if verbose:
                log.line('[{data_name}]'.format(data_name=group['data_name']))

            package_filename = os.path.join(
                output_path,
                self.filename_template.format(
                    dataset_name=dataset_name,
                    data_name=group['data_name'],
                    extension=self.package_extension))

            newest_source = 0
            for item in group['file_list']:
                if not os.path.exists(item['source']):
                    message = '{name}: File not found [{source_file}].'.format(
                        name=self.__class__.__name__,
                        source_file=item['source'])

                    self.logger.exception(message)
                    raise IOError(message)

                if 'target' not in item:
                    if item['source'].startswith(base_path):
                        item['target'] = item['source'][len(base_path):]
                    else:
                        item['target'] = item['source']

                timestamp = os.path.getmtime(item['source'])
                if newest_source < timestamp:
                    newest_source = timestamp

            # Get newest package, take care of split packages
            all_packages = Path().file_list(
                path=os.path.split(os.path.abspath(package_filename))[0],
                extensions=os.path.splitext(package_filename)[1][1:])

            newest_package = 0
            for package in all_packages:
                base_name = os.path.splitext(os.path.split(package)[-1])[0]

                if base_name[-1].isdigit():
                    base_name = os.path.splitext(base_name)[0]

                if base_name == os.path.splitext(
                        os.path.split(package_filename)[-1])[0]:
                    timestamp = os.path.getmtime(package)
                    if newest_package < timestamp:
                        newest_package = timestamp

            if newest_package < newest_source or overwrite:
                if self.convert_md_to_html:
                    # Check for markdown content
                    new_files = []
                    for item in group['file_list']:
                        if os.path.splitext(item['source'])[-1] == '.md':
                            if not os.path.exists(
                                    os.path.splitext(item['source'])[0] +
                                    '.html'
                            ) or (os.path.exists(
                                    os.path.splitext(item['source'])[0] +
                                    '.html')
                                  and os.path.getmtime(
                                      item['source']) > os.path.getmtime(
                                          os.path.splitext(item['source'])[0] +
                                          '.html')) or overwrite:
                                # Convert
                                self.convert_markdown(
                                    source_filename=item['source'],
                                    target_filename=os.path.splitext(
                                        item['source'])[0] + '.html')

                                new_files.append({
                                    'source':
                                    os.path.splitext(item['source'])[0] +
                                    '.html',
                                    'target':
                                    os.path.splitext(item['target'])[0] +
                                    '.html'
                                })

                    # Add new html files to the file_list
                    group['file_list'] += new_files

                # Create packages
                package = Package(filename=package_filename)
                package_filenames = package.compress(
                    file_list=group['file_list'],
                    size_limit=self.package_size_limit)

                if verbose:
                    log.line('Saved', indent=2)

                    for i in package_filenames:
                        log.line('[{file}] [{size}]'.format(
                            file=i.replace(base_path, ''),
                            size=get_byte_string(os.path.getsize(i),
                                                 show_bytes=False)),
                                 indent=4)

        if verbose:
            log.foot()