示例#1
0
    def __init__(self,
                 data_source,
                 cache_dir=None,
                 cache_file_name_prefix='cache',
                 shuffle=False,
                 rng=None):
        self._tempdir_created = False
        logger.info('Using DataSourceWithFileCache')
        super(DataSourceWithFileCache, self).__init__(shuffle=shuffle, rng=rng)
        self._cache_file_name_prefix = cache_file_name_prefix
        self._cache_dir = cache_dir
        logger.info('Cache Directory is {}'.format(self._cache_dir))

        self._cache_size = int(nnabla_config.get(
            'DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._num_of_threads = int(nnabla_config.get(
            'DATA_ITERATOR', 'data_source_file_cache_num_of_threads'))
        logger.info('Num of thread is {}'.format(self._num_of_threads))

        self._cache_file_format = nnabla_config.get(
            'DATA_ITERATOR', 'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._thread_lock = threading.Lock()

        self._size = data_source._size
        self._variables = data_source.variables
        self._data_source = data_source
        self._generation = -1
        self._cache_positions = []
        self._total_cached_size = 0
        self._cache_file_names = []
        self._cache_file_order = []
        self._cache_file_start_positions = []
        self._cache_file_data_orders = []

        self._current_cache_file_index = -1
        self._current_cache_data = None

        self.shuffle = shuffle
        self._original_order = list(range(self._size))
        self._order = list(range(self._size))

        # __enter__
        if self._cache_dir is None:
            self._tempdir_created = True
            if nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_location') != '':
                self._cache_dir = tempfile.mkdtemp(dir=nnabla_config.get(
                    'DATA_ITERATOR', 'data_source_file_cache_location'))
            else:
                self._cache_dir = tempfile.mkdtemp()
            logger.info(
                'Tempdir for cache {} created.'.format(self._cache_dir))
        self._closed = False
        atexit.register(self.close)

        self._create_cache()
        self._create_cache_file_position_table()
示例#2
0
    def __init__(self,
                 input_csv_filename,
                 rng=None,
                 shuffle=False,
                 num_of_threads=None):
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._filereader = FileReader(input_csv_filename)
        self._original_source_uri = input_csv_filename
        if rng is None:
            self._rng = numpy.random.RandomState(313)
        else:
            self._rng = rng
        self._shuffle = shuffle

        # read index.csv
        self._file = open(input_csv_filename, 'r', encoding='utf-8')
        csvreader = csv.reader(self._file)

        header = next(csvreader)

        # Store file positions of each data.
        self._csv_data = list(csvreader)
        self._size = len(self._csv_data)

        self._file.close()

        self._remove_comment_cols(header, self._csv_data)
        self._process_header(header)
        self._variables = tuple(self._variables_dict.keys())

        self._original_order = list(range(self._size))

        # Shuffle, the order is processing csv file order
        if self._shuffle:
            self._order = list(self._rng.permutation(list(range(self._size))))
        else:
            self._order = list(range(self._size))

        if num_of_threads:
            self._num_of_threads = num_of_threads
        else:
            self._num_of_threads = int(
                nnabla_config.get('DATA_ITERATOR',
                                  'data_source_file_cache_num_of_threads'))
        logger.info('Num of thread is {}'.format(self._num_of_threads))
示例#3
0
    def __init__(self, data_source, shuffle=False, rng=None):
        logger.info('Using DataSourceWithMemoryCache')
        super(DataSourceWithMemoryCache, self).__init__(
            shuffle=shuffle, rng=rng)
        self._buffer_max_size = int(nnabla_config.get(
            'DATA_ITERATOR', 'data_source_buffer_max_size'))
        self._size = data_source._size
        self._variables = data_source.variables
        self._data_source = data_source
        self._order = list(range(self._size))

        self._on_memory = False
        self._cache = []

        data = self._get_data_func(0)
        self._data_size = 0
        for d in data:
            self._data_size += d.size * d.itemsize
        total_size = self._data_size * self._size
        if total_size < self._buffer_max_size:
            logger.info('On-memory')
            self._on_memory = True
        self._generation = -1
        self._closed = False
        atexit.register(self.close)
示例#4
0
    def __init__(self, input_csv_filename, rng=None, shuffle=False):
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._filereader = FileReader(input_csv_filename)
        self._original_source_uri = input_csv_filename
        if rng is None:
            self._rng = numpy.random.RandomState(313)
        else:
            self._rng = rng
        self._shuffle = shuffle

        # Binary mode is required to use seek and tell function.
        self._file = open(input_csv_filename, 'rb')

        self._line_positions = []
        line = self._file.readline().decode('utf-8')
        csvreader = csv.reader([line])
        self._process_header(next(csvreader))

        # Store file positions of each data.
        self._size = 0
        while True:
            self._line_positions.append(self._file.tell())
            line = self._file.readline()
            if line is None or len(line) == 0:
                break
            self._size += 1

        # rewind
        self._file.seek(0)

        self._cache_file_order = []
        self._cache_file_data_orders = []
        self._cache_file_names = []

        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(
            numpy.ceil(float(self._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files -
                                         1] = self._cache_file_data_orders[
                                             num_of_cache_files -
                                             1][0:self._size %
                                                self._cache_size]

        self._original_order = list(range(self._size))
        self._order = list(range(self._size))
        self._variables = tuple(self._variables_dict.keys())

        # Shuffle
        if self._shuffle:
            self._order = list(self._rng.permutation(list(range(self._size))))
        else:
            self._order = list(range(self._size))
示例#5
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._cache_dir = output_cache_dirname

        progress(None)

        self._cache_data = []
        for self._position in range(self._size):
            progress('Create cache', self._position * 1.0 / self._size)
            self._file.seek(self._line_positions[self._order[self._position]])
            line = self._file.readline().decode('utf-8')
            csvreader = csv.reader([line])
            row = next(csvreader)
            self._cache_data.append(tuple(self._process_row(row)))

            if len(self._cache_data) >= self._cache_size:
                self._save_cache()
                self._cache_data = []

        self._save_cache()

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for fn, orders in zip(self._cache_file_names,
                                  self._cache_file_data_orders):
                writer.writerow((os.path.basename(fn), len(orders)))
        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(self._cache_dir, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
示例#6
0
    def __init__(self, data_source, cache_dir=None, shuffle=False, rng=None):
        logger.info('Using DataSourceWithFileCache')
        super(DataSourceWithFileCache, self).__init__(shuffle=shuffle, rng=rng)
        self._cache_dir = cache_dir
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))
        self._size = data_source._size
        self._variables = data_source.variables
        self._data_source = data_source
        self._generation = -1
        self._cache_data = []
        self._total_cached_size = 0
        self._cache_file_names = []
        self._cache_file_order = []
        self._cache_file_start_positions = []
        self._cache_file_data_orders = []

        self._current_cache_file_index = -1
        self._current_cache_data = None

        self.shuffle = shuffle
        self._order = list(range(self._size))

        # __enter__
        self._tempdir_created = False
        if self._cache_dir is None:
            self._tempdir_created = True
            if nnabla_config.get('DATA_ITERATOR',
                                 'data_source_file_cache_location') != '':
                self._cache_dir = tempfile.mkdtemp(dir=nnabla_config.get(
                    'DATA_ITERATOR', 'data_source_file_cache_location'))
            else:
                self._cache_dir = tempfile.mkdtemp()
            logger.info('Tempdir {} created.'.format(self._cache_dir))
        self._closed = False
        atexit.register(self.close)
示例#7
0
def _convert(args, source):
    _, ext = os.path.splitext(args.destination)
    if ext.lower() == '.cache':
        with DataSourceWithFileCache(source, cache_dir=args.destination, shuffle=args.shuffle) as ds:
            print('Number of Data: {}'.format(ds.size))
            print('Shuffle:        {}'.format(args.shuffle))
            print('Normalize:      {}'.format(args.normalize))
            pbar = None
            if nnabla_config.get('MISC', 'misc_show_progress') == 'True':
                pbar = tqdm(total=ds.size)
            for i in range(ds.size):
                ds._get_data(i)
                if pbar is not None:
                    pbar.update(1)
    else:
        print('Command `conv_dataset` only supports CACHE as destination.')
示例#8
0
    def __init__(self,
                 input_csv_filename,
                 rng=None,
                 shuffle=False,
                 process_num=None):
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._filereader = FileReader(input_csv_filename)
        self._original_source_uri = input_csv_filename
        if rng is None:
            self._rng = numpy.random.RandomState(313)
        else:
            self._rng = rng
        self._shuffle = shuffle

        # read index.csv
        self._file = open(input_csv_filename, 'r')
        csvreader = csv.reader(self._file)

        self._process_header(next(csvreader))
        self._variables = tuple(self._variables_dict.keys())

        # Store file positions of each data.
        self._csv_data = list(csvreader)
        self._size = len(self._csv_data)

        self._file.close()

        self._original_order = list(range(self._size))

        # Shuffle, the order is processing csv file order
        if self._shuffle:
            self._order = list(self._rng.permutation(list(range(self._size))))
        else:
            self._order = list(range(self._size))

        # multiprocess num
        if process_num:
            self._process_num = process_num
        else:
            self._process_num = multiprocessing.cpu_count()
        logger.info('Num of process is {}'.format(self._process_num))
示例#9
0
    def __init__(self, cachedir, shuffle=False, rng=None, normalize=False):
        super(CacheDataSource, self).__init__(shuffle=shuffle, rng=rng)

        self._current_data = {}
        self._current_filename = None

        self._cachedir = cachedir
        self._normalize = normalize
        self._filereader = FileReader(self._cachedir)
        self._num_of_threads = int(
            nnabla_config.get('DATA_ITERATOR',
                              'cache_file_cache_num_of_threads'))
        self._variables = None

        self._generation = -1
        self._cache_files = []
        self._max_length = 1

        info_filename = os.path.join(self._cachedir, "cache_info.csv")
        if os.path.exists(info_filename):
            self.initialize_cache_info(info_filename)
            self._cache_type = '.npy'
        else:
            self._cache_type = '.h5'
        index_filename = os.path.join(self._cachedir, "cache_index.csv")
        if os.path.exists(index_filename):
            self.initialize_cache_files_with_index(index_filename)
        else:
            self._filenames = [
                f for f in self._filereader.listdir()
                if os.path.splitext(f)[1].lower() == ".h5"
            ]
            for filename in self._filenames:
                self.initialize_cache_files(filename)

        logger.info('{}'.format(len(self._cache_files)))

        self._cache_reader_with_prefetch = CacheReaderWithPrefetch(
            self._cachedir, self._num_of_threads, self._variables)
        self._thread_lock = threading.Lock()

        self.reset()
示例#10
0
    def __init__(self, cachedir, shuffle=False, rng=None, normalize=False):
        super(CacheDataSource, self).__init__(shuffle=shuffle, rng=rng)

        self._current_data = {}
        self._current_filename = None

        self._cachedir = cachedir
        self._normalize = normalize
        self._filereader = FileReader(self._cachedir)
        self._num_of_threads = int(
            nnabla_config.get('DATA_ITERATOR',
                              'cache_file_cache_num_of_threads'))
        self._variables = None

        self._generation = -1
        self._cache_files = []
        self._max_length = 1

        info_filename = os.path.join(self._cachedir, "cache_info.csv")
        self.initialize_cache_info(info_filename)

        index_filename = os.path.join(self._cachedir, "cache_index.csv")
        self.initialize_cache_files_with_index(index_filename)

        logger.info('{}'.format(len(self._cache_files)))

        self._cache_reader_with_prefetch = CacheReaderWithPrefetch(
            self._cachedir, self._num_of_threads, self._variables)
        self._thread_lock = threading.Lock()

        self._original_order = []
        for i in range(len(self._cache_files)):
            filename, length = self._cache_files[i]
            for j in range(length):
                self._original_order.append((filename, j))

        self.reset()
示例#11
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix
        self._cache_dir = output_cache_dirname

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        progress(None)

        csv_position_and_data = []
        csv_row = []
        for _position in range(self._size):
            csv_row.append(self._csv_data[self._order[_position]])
            if len(csv_row) == self._cache_size:
                csv_position_and_data.append((_position, csv_row))
                csv_row = []
        if len(csv_row):
            csv_position_and_data.append((self._size - 1, csv_row))

        progress('Create cache', 0)
        with closing(ThreadPool(processes=self._num_of_threads)) as pool:
            cache_index_rows = pool.map(self._save_cache,
                                        csv_position_and_data)
        progress('Create cache', 1.0)

        # Create Index
        index_filename = os.path.join(output_cache_dirname, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for row in cache_index_rows:
                if row:
                    # row: (file_path, data_nums)
                    writer.writerow((os.path.basename(row[0]), row[1]))

        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(output_cache_dirname,
                                         "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(output_cache_dirname, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(output_cache_dirname, "order.csv"),
                      'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
示例#12
0
                                 "NVIDIA Corporation/NVSMI/nvml.dll")
        if not os.path.exists(nvml_path):
            nvml_path = os.path.join("C:/windows", "system32", "nvml.dll")
        if os.path.exists(nvml_path):
            nvml.nvml_lib = nvml.CDLL(nvml_path)
        nvml.lib_load_lock.release()

    _load_nvml_for_win()
    pynvml.nvmlInit()
except Exception:
    # measure gpu load only if nvml installed
    gpu_load_backend_ok = False

try:
    _ANALYSE_GPU_STATUS_INTERVAL = int(
        nnabla_config.get('MULTINODE', 'analyse_gpu_status_interval'))
    _GPU_SLOWING_WARNING_THRESHOLD = float(
        nnabla_config.get('MULTINODE', 'gpu_slowing_warning_threshold'))
    _GPU_SLOWING_ERROR_THRESHOLD = float(
        nnabla_config.get('MULTINODE', 'gpu_slowing_error_threshold'))
except Exception:
    _ANALYSE_GPU_STATUS_INTERVAL = 20
    _GPU_SLOWING_WARNING_THRESHOLD = 1.4
    _GPU_SLOWING_ERROR_THRESHOLD = 2

# load variable
# ============
gpu_m_count = 0
gpu_a_load = {}
if cpu_load_backend_ok:
    p_handler = psutil.Process()
示例#13
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nnabla.config import nnabla_config
from nnabla.logger import logger

################################################################################
# Import callback module
callback = None

import importlib
try:
    callback = importlib.import_module(
        nnabla_config.get('CALLBACK', 'util_callback_module'))
except:
    callback = None


def get_callback_version():
    if callback is not None:
        return callback.get_callback_version()
    else:
        return None


def get_best_from_status(args):
    if callback is not None:
        return callback.get_best_from_status(args)
    else:
示例#14
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                              'cache_file_format')
        logger.info('Cache file format is {}'.format(cache_file_format))

        progress(None)

        cache_file_name_and_data_nums_list = multiprocessing.Manager().list()

        csv_position_and_data = []
        csv_row = []
        for _position in range(self._size):
            csv_row.append(self._csv_data[self._order[_position]])
            if len(csv_row) == self._cache_size:
                csv_position_and_data.append((_position, csv_row))
                csv_row = []
        if len(csv_row):
            csv_position_and_data.append((self._size - 1, csv_row))

        self_args = {
            '_cache_file_name_prefix': cache_file_name_prefix,
            '_cache_file_format': cache_file_format,
            '_cache_file_name_and_data_nums_list':
            cache_file_name_and_data_nums_list,
            '_output_cache_dirname': output_cache_dirname,
            '_variables': self._variables,
            '_filereader': self._filereader,
            '_normalize': normalize,
            '_columns': self._columns,
            '_cache_file_count': len(csv_position_and_data)
        }

        # Notice:
        #   Here, we have to place a gc.collect(), since we found
        #   python might perform garbage collection operation in
        #   a child process, which tends to release some objects
        #   created by its parent process, thus, it might touch
        #   cuda APIs which has not initialized in child process.
        #   Place a gc.collect() here can avoid such cases.
        gc.collect()

        progress('Create cache', 0)
        with closing(multiprocessing.Pool(self._process_num)) as pool:
            pool.map(multiprocess_save_cache,
                     ((i, self_args) for i in csv_position_and_data))
        progress('Create cache', 1.0)

        logger.info('The total of cache files is {}'.format(
            len(cache_file_name_and_data_nums_list)))

        # Create Index
        index_filename = os.path.join(output_cache_dirname, "cache_index.csv")
        cache_index_rows = sorted(cache_file_name_and_data_nums_list,
                                  key=lambda x: x[0])
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for file_name, data_nums in cache_index_rows:
                writer.writerow((os.path.basename(file_name), data_nums))

        # Create Info
        if cache_file_format == ".npy":
            info_filename = os.path.join(output_cache_dirname,
                                         "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(output_cache_dirname, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(output_cache_dirname, "order.csv"),
                      'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
示例#15
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._cache_dir = output_cache_dirname

        progress(None)

        self._cache_file_name_and_data_nums_q = multiprocessing.Manager(
        ).Queue()

        self._csv_position_and_data = []
        csv_row = []
        for _position in range(self._size):
            csv_row.append(self._csv_data[self._order[_position]])
            if len(csv_row) == self._cache_size:
                self._csv_position_and_data.append((_position, csv_row))
                csv_row = []
        if len(csv_row):
            self._csv_position_and_data.append((self._size - 1, csv_row))

        self_args = {
            '_cache_file_name_prefix': self._cache_file_name_prefix,
            '_cache_file_format': self._cache_file_format,
            '_cache_file_name_and_data_nums_q':
            self._cache_file_name_and_data_nums_q,
            '_cache_dir': self._cache_dir,
            '_variables': self._variables,
            '_filereader': self._filereader,
            '_normalize': self._normalize,
            '_columns': self._columns,
            '_cache_file_count': len(self._csv_position_and_data)
        }

        progress('Create cache', 0)
        with closing(multiprocessing.Pool(self._process_num)) as pool:
            pool.map(multiprocess_save_cache,
                     ((i, self_args) for i in self._csv_position_and_data))
        progress('Create cache', 1.0)

        logger.info('The total of cache files is {}'.format(
            self._cache_file_name_and_data_nums_q.qsize()))

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        cache_index_rows = []
        while True:
            try:
                cache_index_rows.append(
                    self._cache_file_name_and_data_nums_q.get(block=False))
            except Exception:
                break
        cache_index_rows = sorted(cache_index_rows, key=lambda x: x[0])
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for file_name, data_nums in cache_index_rows:
                writer.writerow((os.path.basename(file_name), data_nums))

        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(self._cache_dir, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
示例#16
0
文件: callback.py 项目: sony/nnabla
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nnabla.config import nnabla_config

################################################################################
# Import callback module
callback_list = []

import importlib
try:
    module_list_str = nnabla_config.get('CALLBACK', 'util_callback_module')
    module_list = module_list_str.strip('[]').replace(' ', '').split(',')
    for module in module_list:
        callback_list.append(importlib.import_module(module))
except:
    callback_list = []


def _get_callback(func_name):
    for callback in callback_list:
        if func_name in dir(callback):
            return callback
    return None


def alternative_cli(args):
示例#17
0
from nnabla import available_contexts
from nnabla.parameter import save_parameters

from nnabla.utils.progress import configure_progress, progress
import nnabla.utils.callback as callback

from nnabla.utils.cli.utility import let_data_to_variable, measure_cpu_gpu_instant_load, get_cpu_gpu_average_load, save_optimizer_states, NodeTimeInfoCollector
from nnabla.utils.nnp_format import nnp_version
from nnabla.utils.communicator_util import current_communicator, single_or_rankzero

import nnabla.utils.load as load
from nnabla.config import nnabla_config

try:
    _OPTIMIZER_CHECKPOINT_INTERVAL = int(
        nnabla_config.get('CHECKPOINT', 'optimizer_interval'))
except Exception:
    _OPTIMIZER_CHECKPOINT_INTERVAL = 5

_save_parameter_info = {}

nodeTimeCollector = NodeTimeInfoCollector()


def _all_reduce(comm, var, division, inplace):
    comm.all_reduce(var, division=division, inplace=inplace)


def _save_parameters(args, suffix, epoch, train_config, force=False):
    global _save_parameter_info
示例#18
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._cache_dir = output_cache_dirname

        progress(None)

        self._cache_file_order = []
        self._cache_file_data_orders = []
        self._cache_file_names = []

        self._cache_data = []
        progress('Create cache', 0)
        last_time = time.time()
        for self._position in range(self._size):
            if time.time() >= last_time + 1.0:
                progress('Create cache', self._position / self._size)
                last_time = time.time()
            self._file.seek(self._line_positions[self._order[self._position]])
            line = self._file.readline().decode('utf-8')
            csvreader = csv.reader([line])
            row = next(csvreader)
            self._cache_data.append(tuple(self._process_row(row)))

            if len(self._cache_data) >= self._cache_size:
                self._save_cache()
                self._cache_data = []

        self._save_cache()
        progress('Create cache', 1.0)

        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(
            numpy.ceil(float(self._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files -
                                         1] = self._cache_file_data_orders[
                                             num_of_cache_files -
                                             1][0:self._size %
                                                self._cache_size]

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for fn, orders in zip(self._cache_file_names,
                                  self._cache_file_data_orders):
                writer.writerow((os.path.basename(fn), len(orders)))
        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(self._cache_dir, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))