def setup_keras(seed=None, profile=None, backend='theano', device=None, BLAS_thread_count=None, BLAS_MKL_CNR=True, nvcc_fastmath=None, theano_floatX=None, theano_optimizer=None, theano_OpenMP=None, theano_deterministic=None, verbose=True): """Setup Keras and environmental variables effecting on it. Given parameters are used to override ones specified in keras.json file. Parameters ---------- seed : int, optional Randomization seed. If none given, no seed is set. profile : str, optional Profile name ['deterministic', 'cuda0_fast'], will override other parameters with profile parameters. backend : str Keras backend ['theano', 'tensorflow'] Default value 'theano' device : str, optional Device for computations ['cpu', 'cuda', 'cuda0', 'cuda1', 'opencl0:0', 'opencl0:1'] BLAS_thread_count : int Number of thread used for BLAS libraries BLAS_MKL_CNR : bool Conditional numerical reproducibility for MKL BLAS library. Use this to reproduce results with MKL. Default value True nvcc_fastmath : str, optional Control the usage of fast math library in NVCC theano_floatX : str, optional Default dtype for Theano matrix and tensor ['float64', 'float32', 'float16'] theano_optimizer : str, optional Optimizer ['fast_run', 'merge', 'fast_compile', 'None'] theano_OpenMP : bool, optional Enable or disable parallel computation on the CPU with OpenMP. theano_deterministic : bool, optional verbose : bool Print information Default value True """ def logger(): logger_instance = logging.getLogger(__name__) if not logger_instance.handlers: setup_logging() return logger_instance if profile: if profile == 'deterministic': if seed is None: message = 'You should set randomization seed to get deterministic behaviour.' logger().exception(message) raise AttributeError(message) # Parameters to help to get deterministic results device = 'cpu' BLAS_thread_count = 1 BLAS_MKL_CNR = True nvcc_fastmath = False theano_optimizer = 'None' theano_OpenMP = False theano_deterministic = True elif profile == 'cuda0_fast': device = 'cuda0' BLAS_thread_count = 8 BLAS_MKL_CNR = True nvcc_fastmath = True theano_optimizer = 'fast_run' theano_OpenMP = True theano_deterministic = True else: message = 'Invalid Keras setup profile [{profile}].'.format( profile=profile) logger().exception(message) raise AttributeError(message) # Set seed first if seed: numpy.random.seed(seed) random.seed(seed) # Check parameter validity if backend and backend not in ['theano', 'tensorflow']: message = 'Invalid Keras backend type [{backend}].'.format( backend=backend) logger().exception(message) raise AttributeError(message) if device and device not in ['cpu', 'cuda', 'cuda0', 'opencl0:0']: message = 'Invalid Keras device type [{device}].'.format(device=device) logger().exception(message) raise AttributeError(message) if theano_floatX and theano_floatX not in [ 'float64', 'float32', 'float16' ]: message = 'Invalid Keras floatX type [{floatX}].'.format( floatX=theano_floatX) logger().exception(message) raise AttributeError(message) if theano_optimizer and theano_optimizer not in [ 'fast_run', 'merge', 'fast_compile', 'None' ]: message = 'Invalid Keras optimizer type [{optimizer}].'.format( optimizer=theano_optimizer) logger().exception(message) raise AttributeError(message) ui = FancyLogger() if verbose: ui.sub_header('Keras setup') # Get BLAS library associated to numpy if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] else: blas_libraries = [''] blas_extra_info = [] # Select Keras backend os.environ["KERAS_BACKEND"] = backend # Threading if BLAS_thread_count: os.environ['GOTO_NUM_THREADS'] = str(BLAS_thread_count) os.environ['OMP_NUM_THREADS'] = str(BLAS_thread_count) os.environ['MKL_NUM_THREADS'] = str(BLAS_thread_count) blas_extra_info.append( 'Threads[{threads}]'.format(threads=BLAS_thread_count)) if BLAS_thread_count > 1: os.environ['OMP_DYNAMIC'] = 'False' os.environ['MKL_DYNAMIC'] = 'False' else: os.environ['OMP_DYNAMIC'] = 'True' os.environ['MKL_DYNAMIC'] = 'True' # Conditional Numerical Reproducibility (CNR) for MKL BLAS library if BLAS_MKL_CNR and blas_libraries[0].startswith('mkl'): os.environ['MKL_CBWR'] = 'COMPATIBLE' blas_extra_info.append('MKL_CBWR[{mode}]'.format(mode='COMPATIBLE')) # Show BLAS info if verbose: if numpy.__config__.blas_opt_info and 'libraries' in numpy.__config__.blas_opt_info: blas_libraries = numpy.__config__.blas_opt_info['libraries'] if blas_libraries[0].startswith('openblas'): ui.data(field='BLAS library', value='OpenBLAS ({info})'.format( info=', '.join(blas_extra_info))) elif blas_libraries[0].startswith('blas'): ui.data(field='BLAS library', value='BLAS/Atlas ({info})'.format( info=', '.join(blas_extra_info))) elif blas_libraries[0].startswith('mkl'): ui.data(field='BLAS library', value='MKL ({info})'.format( info=', '.join(blas_extra_info))) # Set backend and parameters before importing keras if verbose: ui.data(field='Backend', value=backend) if backend == 'theano': # Theano setup # Default flags flags = [ # 'ldflags=', 'warn.round=False', ] # Set device if device: flags.append('device=' + device) # Set floatX if theano_floatX: flags.append('floatX=' + theano_floatX) if verbose: ui.data(field='floatX', value=theano_floatX) # Set optimizer if theano_optimizer is not None: flags.append('optimizer=' + theano_optimizer) # Set fastmath for GPU mode only if nvcc_fastmath and device != 'cpu': if nvcc_fastmath: flags.append('nvcc.fastmath=True') else: flags.append('nvcc.fastmath=False') # Set OpenMP if theano_OpenMP is not None: if theano_OpenMP: flags.append('openmp=True') else: flags.append('openmp=False') if theano_deterministic is not None: if theano_deterministic: flags.append('deterministic=more') else: flags.append('deterministic=default') if verbose: ui.line('Theano', indent=2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=4) # Set environmental variable for Theano os.environ["THEANO_FLAGS"] = ','.join(flags) elif backend == 'tensorflow': flags = [] # Tensorflow setup if verbose: ui.line('Tensorflow', indent=2) # Set device if device: flags.append('device=' + device) # In case of CPU disable visible GPU. if device == 'cpu': os.environ["CUDA_VISIBLE_DEVICES"] = '' if verbose: ui.line('Tensorflow', indent=2) for item in flags: ui.data(field=item.split('=')[0], value=item.split('=')[1], indent=4) with SuppressStdoutAndStderr(): # Import keras and suppress backend announcement printed to stderr import keras if verbose: ui.foot()
def pack(self, dataset_name='dcase-dataset', content=None, output_path=None, base_path=None, overwrite=False, verbose=True): """Pack dataset. Parameters ---------- dataset_name : str Dataset name Default value 'dcase-dataset' content : list of dict List of packages to be packed. Package item dict should have format {'data_name': 'doc', 'file_list': [{'source': 'file1.txt'}]}. Default value None output_path : str Path to which packages are saved. Default value None base_path : str Base path of the data. If per item package paths are not given ('target' field), this parameter is used to create one from source path. Default value None overwrite : bool Overwrite existing packages. Default value False verbose : bool Show information during the packing. Default value True Returns ------- nothing """ if verbose: log = FancyLogger() log.section_header('Packing dataset [{dataset_name}]'.format( dataset_name=dataset_name)) if base_path is not None and not base_path.endswith(os.path.sep): base_path += os.path.sep for group in content: if verbose: log.line('[{data_name}]'.format(data_name=group['data_name'])) package_filename = os.path.join( output_path, self.filename_template.format( dataset_name=dataset_name, data_name=group['data_name'], extension=self.package_extension)) newest_source = 0 for item in group['file_list']: if not os.path.exists(item['source']): message = '{name}: File not found [{source_file}].'.format( name=self.__class__.__name__, source_file=item['source']) self.logger.exception(message) raise IOError(message) if 'target' not in item: if item['source'].startswith(base_path): item['target'] = item['source'][len(base_path):] else: item['target'] = item['source'] timestamp = os.path.getmtime(item['source']) if newest_source < timestamp: newest_source = timestamp # Get newest package, take care of split packages all_packages = Path().file_list( path=os.path.split(os.path.abspath(package_filename))[0], extensions=os.path.splitext(package_filename)[1][1:]) newest_package = 0 for package in all_packages: base_name = os.path.splitext(os.path.split(package)[-1])[0] if base_name[-1].isdigit(): base_name = os.path.splitext(base_name)[0] if base_name == os.path.splitext( os.path.split(package_filename)[-1])[0]: timestamp = os.path.getmtime(package) if newest_package < timestamp: newest_package = timestamp if newest_package < newest_source or overwrite: if self.convert_md_to_html: # Check for markdown content new_files = [] for item in group['file_list']: if os.path.splitext(item['source'])[-1] == '.md': if not os.path.exists( os.path.splitext(item['source'])[0] + '.html' ) or (os.path.exists( os.path.splitext(item['source'])[0] + '.html') and os.path.getmtime( item['source']) > os.path.getmtime( os.path.splitext(item['source'])[0] + '.html')) or overwrite: # Convert self.convert_markdown( source_filename=item['source'], target_filename=os.path.splitext( item['source'])[0] + '.html') new_files.append({ 'source': os.path.splitext(item['source'])[0] + '.html', 'target': os.path.splitext(item['target'])[0] + '.html' }) # Add new html files to the file_list group['file_list'] += new_files # Create packages package = Package(filename=package_filename) package_filenames = package.compress( file_list=group['file_list'], size_limit=self.package_size_limit) if verbose: log.line('Saved', indent=2) for i in package_filenames: log.line('[{file}] [{size}]'.format( file=i.replace(base_path, ''), size=get_byte_string(os.path.getsize(i), show_bytes=False)), indent=4) if verbose: log.foot()