Пример #1
0
def _create_dataset(uri, batch_size, shuffle, no_image_normalization, cache_dir, overwrite_cache, create_cache_explicitly, prepare_data_iterator):
    class Dataset:
        pass
    dataset = Dataset()
    dataset.uri = uri
    dataset.normalize = not no_image_normalization

    if prepare_data_iterator:
        if cache_dir == '':
            cache_dir = None
        if cache_dir and create_cache_explicitly:
            if not os.path.exists(cache_dir) or len(os.listdir(cache_dir)) == 0 or overwrite_cache:
                if not os.path.exists(cache_dir):
                    os.mkdir(cache_dir)
                logger.log(99, 'Creating cache data for "' + uri + '"')
                with data_iterator_csv_dataset(uri, batch_size, shuffle, normalize=False, cache_dir=cache_dir) as di:
                    index = 0
                    while index < di.size:
                        progress('', (1.0 * di.position) / di.size)
                        di.next()
                        index += batch_size
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
        elif not cache_dir or overwrite_cache or not os.path.exists(cache_dir) or len(os.listdir(cache_dir)) == 0:
            if cache_dir and not os.path.exists(cache_dir):
                os.mkdir(cache_dir)
            dataset.data_iterator = (lambda: data_iterator_csv_dataset(
                uri, batch_size, shuffle, normalize=dataset.normalize, cache_dir=cache_dir))
        else:
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
    else:
        dataset.data_iterator = None
    return dataset
Пример #2
0
    def _create_cache(self):
        # Save all data into cache file(s).
        self._position = 0
        logger.info('Creating cache start')

        percent = 0
        while self._position < self._data_source._size:
            current_percent = self._position * 10 // self._data_source._size
            progress('', self._position * 1.0 / self._data_source._size)
            if current_percent != percent:
                percent = current_percent
                logger.info('Creating cache {}0% finished.'.format(percent))

            self._store_data_to_cache_buffer(self._position)
            self._position += 1
        if len(self._cache_data) > 0:
            self._save_cache_to_file()
        logger.info('Creating cache end')
        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(numpy.ceil(
            float(self._data_source._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[
            0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._data_source._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files - 1] = self._cache_file_data_orders[
                num_of_cache_files - 1][0:self._data_source._size % self._cache_size]
Пример #3
0
def _create_dataset(uri, batch_size, shuffle, no_image_normalization, cache_dir, overwrite_cache, create_cache_explicitly, prepare_data_iterator):
    class Dataset:
        pass
    dataset = Dataset()
    dataset.uri = uri
    dataset.normalize = not no_image_normalization

    if prepare_data_iterator:
        if cache_dir == '':
            cache_dir = None
        if cache_dir and create_cache_explicitly:
            if not os.path.exists(cache_dir) or overwrite_cache:
                if not os.path.exists(cache_dir):
                    os.mkdir(cache_dir)
                logger.info('Creating cache data for "' + uri + '"')
                with data_iterator_csv_dataset(uri, batch_size, shuffle, normalize=False, cache_dir=cache_dir) as di:
                    index = 0
                    while index < di.size:
                        progress('', (1.0 * di.position) / di.size)
                        di.next()
                        index += batch_size
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
        elif not cache_dir or overwrite_cache or not os.path.exists(cache_dir):
            if cache_dir and not os.path.exists(cache_dir):
                os.mkdir(cache_dir)
            dataset.data_iterator = (lambda: data_iterator_csv_dataset(
                uri, batch_size, shuffle, normalize=dataset.normalize, cache_dir=cache_dir))
        else:
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
    else:
        dataset.data_iterator = None
    return dataset
Пример #4
0
def train_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)

    class TrainConfig:
        pass

    config = TrainConfig()
    info = load.load(files)

    logger.log(99, 'Train with contexts {}'.format(available_contexts))

    config.global_config = info.global_config
    config.training_config = info.training_config

    class OptConfig:
        pass

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    class MonConfig:
        pass

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    # Training
    max_iter = config.training_config.max_epoch * \
        config.training_config.iter_per_epoch
    if max_iter > 0:

        data_iterators = {'optimizer': {}, 'monitor': {}}
        with ExitStack() as stack:
            for name, o in config.optimizers.items():
                o.data_iterator = stack.enter_context(
                    o.optimizer.data_iterator())
            for name, m in config.monitors.items():
                m.data_iterator = stack.enter_context(
                    m.monitor.data_iterator())
            train(args, config)

    else:
        # save parameters without training (0 epoch learning)
        save_parameters(os.path.join(args.outdir, 'parameters.h5'))

    logger.log(99, 'Training Completed.')
    progress(None)
Пример #5
0
def profile_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)

    class TrainConfig:
        pass

    config = TrainConfig()
    info = load.load(files)

    config.global_config = info.global_config
    config.training_config = info.training_config

    class OptConfig:
        pass

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    class MonConfig:
        pass

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    ext_module = import_extension_module(
        config.global_config.default_context.backend[0].split(':')[0])

    def synchronize():
        return ext_module.synchronize(
            device_id=config.global_config.default_context.device_id)

    result_array = [['time in ms']]

    # Profile Optimizer
    with ExitStack() as stack:
        for name, o in config.optimizers.items():
            o.data_iterator = stack.enter_context(o.optimizer.data_iterator())
        result_array = profile_optimizer(config, result_array, synchronize)

    # Write profiling result
    import csv
    with open(args.outdir + os.sep + 'profile.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(result_array)

    logger.log(99, 'Profile Completed.')
    progress(None)
    return True
Пример #6
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._cache_dir = output_cache_dirname

        progress(None)

        self._cache_data = []
        for self._position in range(self._size):
            progress('Create cache', self._position * 1.0 / self._size)
            self._file.seek(self._line_positions[self._order[self._position]])
            line = self._file.readline().decode('utf-8')
            csvreader = csv.reader([line])
            row = next(csvreader)
            self._cache_data.append(tuple(self._process_row(row)))

            if len(self._cache_data) >= self._cache_size:
                self._save_cache()
                self._cache_data = []

        self._save_cache()

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for fn, orders in zip(self._cache_file_names,
                                  self._cache_file_data_orders):
                writer.writerow((os.path.basename(fn), len(orders)))
        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(self._cache_dir, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
Пример #7
0
def train_command(args):
    logger.log(99, 'Train with contexts {}'.format(available_contexts))

    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)

    class TrainConfig:
        pass
    config = TrainConfig()
    info = load.load(files)

    config.global_config = info.global_config
    config.training_config = info.training_config

    class OptConfig:
        pass
    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    class MonConfig:
        pass
    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    # Training
    max_iter = config.training_config.max_epoch * \
        config.training_config.iter_per_epoch
    if max_iter > 0:

        data_iterators = {'optimizer': {}, 'monitor': {}}
        with ExitStack() as stack:
            for name, o in config.optimizers.items():
                o.data_iterator = stack.enter_context(
                    o.optimizer.data_iterator())
            for name, m in config.monitors.items():
                m.data_iterator = stack.enter_context(
                    m.monitor.data_iterator())
            train(args, config)

    else:
        # save parameters without training (0 epoch learning)
        save_parameters(os.path.join(
            args.outdir, 'parameters.h5'))

    logger.log(99, 'Training Completed.')
    progress(None)
Пример #8
0
def profile_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)

    class TrainConfig:
        pass
    config = TrainConfig()
    info = load.load(files)

    config.global_config = info.global_config
    config.training_config = info.training_config

    class OptConfig:
        pass
    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    class MonConfig:
        pass
    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    result_array = [['time in ms']]

    # Profile Optimizer
    with ExitStack() as stack:
        for name, o in config.optimizers.items():
            o.data_iterator = stack.enter_context(
                o.optimizer.data_iterator())
        result_array = profile_optimizer(config, result_array)

    # Write profiling result
    import csv
    with open(args.outdir + os.sep + 'profile.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(result_array)

    logger.log(99, 'Profile Completed.')
    progress(None)
Пример #9
0
    def _create_cache(self):
        # Save all data into cache file(s).
        self._cache_positions = []
        self._position = 0

        percent = 0
        while self._position < self._data_source._size:
            if single_or_rankzero():
                progress('Create cache',
                         self._position * 1.0 / self._data_source._size)

            self._store_data_to_cache_buffer(self._position)
            self._position += 1
        if len(self._cache_positions) > 0:
            self._save_cache_to_file()
        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(
            numpy.ceil(float(self._data_source._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._data_source._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files -
                                         1] = self._cache_file_data_orders[
                                             num_of_cache_files -
                                             1][0:self._data_source._size %
                                                self._cache_size]
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for fn, orders in zip(self._cache_file_names,
                                  self._cache_file_data_orders):
                writer.writerow((os.path.basename(fn), len(orders)))

        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))
Пример #10
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix
        self._cache_dir = output_cache_dirname

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        progress(None)

        csv_position_and_data = []
        csv_row = []
        for _position in range(self._size):
            csv_row.append(self._csv_data[self._order[_position]])
            if len(csv_row) == self._cache_size:
                csv_position_and_data.append((_position, csv_row))
                csv_row = []
        if len(csv_row):
            csv_position_and_data.append((self._size - 1, csv_row))

        progress('Create cache', 0)
        with closing(ThreadPool(processes=self._num_of_threads)) as pool:
            cache_index_rows = pool.map(self._save_cache,
                                        csv_position_and_data)
        progress('Create cache', 1.0)

        # Create Index
        index_filename = os.path.join(output_cache_dirname, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for row in cache_index_rows:
                if row:
                    # row: (file_path, data_nums)
                    writer.writerow((os.path.basename(row[0]), row[1]))

        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(output_cache_dirname,
                                         "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(output_cache_dirname, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(output_cache_dirname, "order.csv"),
                      'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
Пример #11
0
def train_command(args):

    if single_or_rankzero():
        configure_progress(os.path.join(args.outdir, 'progress.txt'))

    info = load.load([args.config], exclude_parameter=True)

    # Check dataset uri is empty.
    dataset_error = False
    for dataset in info.datasets.values():
        if dataset.uri.strip() == '':
            dataset_error = True
    if dataset_error:
        logger.log(99, 'Fatal error. Dataset URI is empty.')
        return False

    class TrainConfig:
        pass

    config = TrainConfig()
    config.timelimit = -1
    if args.param:
        load.load([args.param], parameter_only=True)

    config.global_config = info.global_config
    config.training_config = info.training_config

    if single_or_rankzero():
        logger.log(99, 'Train with contexts {}'.format(available_contexts))

    class OptConfig:
        pass

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    class MonConfig:
        pass

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    # Training
    comm = current_communicator()
    config.training_config.iter_per_epoch //= comm.size if comm else 1
    max_iteration = config.training_config.max_epoch * \
        config.training_config.iter_per_epoch

    global _save_parameter_info
    _save_parameter_info = {}
    _, config_ext = os.path.splitext(args.config)
    if config_ext == '.prototxt' or config_ext == '.nntxt':
        _save_parameter_info['config'] = args.config
    elif config_ext == '.nnp':
        with zipfile.ZipFile(args.config, 'r') as nnp:
            for name in nnp.namelist():
                _, ext = os.path.splitext(name)
                if ext == '.nntxt' or ext == '.prototxt':
                    nnp.extract(name, args.outdir)
                    _save_parameter_info['config'] = os.path.join(
                        args.outdir, name)

    result = False
    if max_iteration > 0:
        data_iterators = {'optimizer': {}, 'monitor': {}}
        rng = np.random.RandomState(comm.rank if comm else 0)
        with ExitStack() as stack:
            for name, o in config.optimizers.items():
                o.data_iterator = stack.enter_context(
                    o.optimizer.data_iterator())
                if comm and comm.size > 1:
                    o.data_iterator = o.data_iterator.slice(
                        rng, comm.size, comm.rank)
            for name, m in config.monitors.items():
                m.data_iterator = stack.enter_context(
                    m.monitor.data_iterator())
                if comm and comm.size > 1:
                    m.data_iterator = m.data_iterator.slice(
                        rng, comm.size, comm.rank)
            result = _train(args, config)
    else:
        # save parameters without training (0 epoch learning)
        logger.log(99, '0 epoch learning. (Just save parameter.)')
        if single_or_rankzero():
            _save_parameters(args, 'current', 0, True)
        result = True

    if single_or_rankzero():
        if result:
            logger.log(99, 'Training Completed.')
        else:
            logger.log(99, 'Training Incompleted.')
    if single_or_rankzero():
        progress(None)

    return True
Пример #12
0
def _update(iter, config, cost):
    loaded_datas = {}
    is_first_optimizer = True
    for opt in config.optimizers.values():
        o = opt.optimizer
        # Load dataset
        di = opt.data_iterator
        if o.data_iterator not in loaded_datas:
            loaded_datas[o.data_iterator] = di.next()
        datas = loaded_datas[o.data_iterator]
        for v, d in o.dataset_assign.items():
            dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                0].inputs else None
            let_data_to_variable(v.variable_instance, datas[
                                 di.variables.index(d)], ctx=dest_context)

        # Generate data
        for v, generator in o.generator_assign.items():
            dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                0].inputs else None
            let_data_to_variable(v.variable_instance,
                                 data=generator(v.shape), ctx=dest_context)

        # Monitor loss before forward to prepare input data while processing on
        # GPU
        if cost.variables:
            for l in cost.variables:
                cost.sum_iter += np.mean(l.variable_instance.d)
            if is_first_optimizer:
                is_first_optimizer = False
                progress("Training : cost={0:0.6f}".format(cost.sum_iter),
                         (iter % config.training_config.iter_per_epoch) * 1.0 / config.training_config.iter_per_epoch)
                cost.sum_epoch += cost.sum_iter
                cost.sum_iter = 0.0

        # Forward
        o.network.forward(o.forward_sequence)

        # Backward
        o.network.backward(o.backward_sequence, iter % o.update_interval == 0)

        # Update
        if iter % o.update_interval == o.update_interval - 1:
            if o.weight_decay > 0:
                o.solver.weight_decay(o.weight_decay)
            o.solver.update()

        if o.lr_decay != 1.0 and iter % o.lr_decay_interval == o.lr_decay_interval - 1:
            o.solver.set_learning_rate(o.solver.learning_rate() * o.lr_decay)

        # Reserve monitor loss
        cost.variables = o.loss_variables

    # Monitor loss at the end of iteration
    if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables:
        for l in cost.variables:
            cost.sum_iter += np.mean(l.variable_instance.d)
        cost.sum_epoch += cost.sum_iter
        cost.variables = None
        cost.sum_iter = 0.0

    return cost
Пример #13
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                              'cache_file_format')
        logger.info('Cache file format is {}'.format(cache_file_format))

        progress(None)

        cache_file_name_and_data_nums_list = multiprocessing.Manager().list()

        csv_position_and_data = []
        csv_row = []
        for _position in range(self._size):
            csv_row.append(self._csv_data[self._order[_position]])
            if len(csv_row) == self._cache_size:
                csv_position_and_data.append((_position, csv_row))
                csv_row = []
        if len(csv_row):
            csv_position_and_data.append((self._size - 1, csv_row))

        self_args = {
            '_cache_file_name_prefix': cache_file_name_prefix,
            '_cache_file_format': cache_file_format,
            '_cache_file_name_and_data_nums_list':
            cache_file_name_and_data_nums_list,
            '_output_cache_dirname': output_cache_dirname,
            '_variables': self._variables,
            '_filereader': self._filereader,
            '_normalize': normalize,
            '_columns': self._columns,
            '_cache_file_count': len(csv_position_and_data)
        }

        # Notice:
        #   Here, we have to place a gc.collect(), since we found
        #   python might perform garbage collection operation in
        #   a child process, which tends to release some objects
        #   created by its parent process, thus, it might touch
        #   cuda APIs which has not initialized in child process.
        #   Place a gc.collect() here can avoid such cases.
        gc.collect()

        progress('Create cache', 0)
        with closing(multiprocessing.Pool(self._process_num)) as pool:
            pool.map(multiprocess_save_cache,
                     ((i, self_args) for i in csv_position_and_data))
        progress('Create cache', 1.0)

        logger.info('The total of cache files is {}'.format(
            len(cache_file_name_and_data_nums_list)))

        # Create Index
        index_filename = os.path.join(output_cache_dirname, "cache_index.csv")
        cache_index_rows = sorted(cache_file_name_and_data_nums_list,
                                  key=lambda x: x[0])
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for file_name, data_nums in cache_index_rows:
                writer.writerow((os.path.basename(file_name), data_nums))

        # Create Info
        if cache_file_format == ".npy":
            info_filename = os.path.join(output_cache_dirname,
                                         "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(output_cache_dirname, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(output_cache_dirname, "order.csv"),
                      'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
Пример #14
0
def _update(iter, config, cost):
    comm = current_communicator()
    loaded_data = {}
    is_first_optimizer = True

    def _sum_cost():
        if comm:
            # logger.log(99, "Calc cost with communicator")
            var = [nn.NdArray()]
            var[0].data = cost.sum_iteration
            _all_reduce(comm, var, division=False, inplace=True)
            cost.sum_epoch += var[0].data
            cost.num_iteration += comm.size
        else:
            cost.sum_epoch += cost.sum_iteration
            cost.num_iteration += 1

    for opt in config.optimizers.values():
        o = opt.optimizer
        # Load dataset
        di = opt.data_iterator
        if o.data_iterator not in loaded_data:
            loaded_data[o.data_iterator] = di.next()
        data = loaded_data[o.data_iterator]
        for v, d in o.dataset_assign.items():
            dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                0].inputs else None
            let_data_to_variable(v.variable_instance,
                                 data[di.variables.index(d)],
                                 ctx=dest_context,
                                 data_name=d,
                                 variable_name=v.name)

        # Generate data
        for v, generator in o.generator_assign.items():
            dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                0].inputs else None
            let_data_to_variable(v.variable_instance,
                                 data=generator(v.shape),
                                 ctx=dest_context,
                                 variable_name=v.name)

        # Monitor loss before forward to prepare input data while processing on
        # GPU
        if cost.variables:
            for l in cost.variables:
                cost.sum_iteration += np.mean(l.variable_instance.d)
                l.variable_instance.data.zero()
            if is_first_optimizer:
                is_first_optimizer = False
                _sum_cost()
                if single_or_rankzero():
                    progress(
                        "Training : cost={0:0.6f}".format(cost.sum_iteration),
                        (iter % config.training_config.iter_per_epoch) * 1.0 /
                        config.training_config.iter_per_epoch)
                cost.sum_iteration = 0.0

        # Forward
        o.network.forward(o.forward_sequence)

        # Backward
        o.network.backward(o.backward_sequence, iter % o.update_interval == 0)

        # Update
        if iter % o.update_interval == o.update_interval - 1:
            if o.weight_decay > 0:
                o.solver.weight_decay(o.weight_decay)

            if o.comm:  # Updated param with communicator
                params = [x.grad for x in o.parameters.values()]
                _all_reduce(o.comm, params, division=True, inplace=True)

            if o.scheduler is not None:
                o.solver.set_learning_rate(o.scheduler.get_learning_rate(iter))
            o.solver.update()
        # Sync w sometimes
        if iter % 10 == 9:  # TODO: change the interval
            if o.comm:
                params = [x.data for x in o.parameters.values()]
                _all_reduce(o.comm, params, division=True, inplace=True)

        # Reserve monitor loss
        cost.variables = o.loss_variables

    # Monitor loss at the end of iteration
    if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables:
        for l in cost.variables:
            cost.sum_iteration += np.mean(l.variable_instance.d)
            l.variable_instance.data.zero()
        _sum_cost()
        cost.variables = None
        cost.sum_iteration = 0.0

    return cost
Пример #15
0
def _update(iter, config, cost):
    loaded_datas = {}
    is_first_optimizer = True
    for opt in config.optimizers.values():
        o = opt.optimizer
        # Load dataset
        di = opt.data_iterator
        if o.data_iterator not in loaded_datas:
            loaded_datas[o.data_iterator] = di.next()
        datas = loaded_datas[o.data_iterator]
        for v, d in o.dataset_assign.items():
            dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                0].inputs else None
            let_data_to_variable(v.variable_instance,
                                 datas[di.variables.index(d)],
                                 ctx=dest_context)

        # Generate data
        for v, generator in o.generator_assign.items():
            dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                0].inputs else None
            let_data_to_variable(v.variable_instance,
                                 data=generator(v.shape),
                                 ctx=dest_context)

        # Monitor loss before forward to prepare input data while processing on
        # GPU
        if cost.variables:
            for l in cost.variables:
                cost.sum_iter += np.mean(l.variable_instance.d)
            if is_first_optimizer:
                is_first_optimizer = False
                progress("Training : cost={0:0.6f}".format(cost.sum_iter),
                         (iter % config.training_config.iter_per_epoch) * 1.0 /
                         config.training_config.iter_per_epoch)
                cost.sum_epoch += cost.sum_iter
                cost.sum_iter = 0.0

        # Forward
        o.network.forward(o.forward_sequence)

        # Backward
        o.network.backward(o.backward_sequence, iter % o.update_interval == 0)

        # Update
        if iter % o.update_interval == o.update_interval - 1:
            if o.weight_decay > 0:
                o.solver.weight_decay(o.weight_decay)
            o.solver.update()

        if o.lr_decay != 1.0 and iter % o.lr_decay_interval == o.lr_decay_interval - 1:
            o.solver.set_learning_rate(o.solver.learning_rate() * o.lr_decay)

        # Reserve monitor loss
        cost.variables = o.loss_variables

    # Monitor loss at the end of iteration
    if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables:
        for l in cost.variables:
            cost.sum_iter += np.mean(l.variable_instance.d)
        cost.sum_epoch += cost.sum_iter
        cost.variables = None
        cost.sum_iter = 0.0

    return cost
Пример #16
0
def _update(iter, config, cost):
    comm = current_communicator()

    loaded_data = {}
    is_first_optimizer = True

    def _sum_cost():
        if comm:
            # logger.log(99, "Calc cost with communicator")
            var = [nn.NdArray()]
            var[0].data = cost.sum_iteration
            _all_reduce(comm, var, division=False, inplace=True)
            cost.sum_epoch += var[0].data
            cost.num_iteration += comm.size
        else:
            cost.sum_epoch += cost.sum_iteration
            cost.num_iteration += 1

    def _get_reserved_variable(shape, reserved_variable_name, iter,
                               iter_per_epoch, max_epoch):
        if reserved_variable_name == "%iter":
            value = iter
        elif reserved_variable_name == "%max_iter":
            value = max_epoch * iter_per_epoch
        elif reserved_variable_name == "%epoch":
            value = iter // iter_per_epoch
        elif reserved_variable_name == "%epochf":
            value = iter * 1.0 / iter_per_epoch
        elif reserved_variable_name == "%max_epoch":
            value = max_epoch
        elif reserved_variable_name == "%progress":
            value = (iter * 1.0 / iter_per_epoch) / max_epoch
        else:
            raise ValueError(
                "Unknown reserved variable {}".format(reserved_variable_name))
        return value

    for opt in config.optimizers.values():
        o = opt.optimizer
        if (o.start_iter == 0
                or iter + 1 >= o.start_iter) and (o.end_iter == 0
                                                  or iter + 1 <= o.end_iter):
            # Load dataset
            data = OrderedDict()
            for di in opt.data_iterators:
                if di not in loaded_data:
                    loaded_data[di] = di.next()
                data.update(zip(di.variables, loaded_data[di]))
            for v, d in o.dataset_assign.items():
                dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                    0].inputs else None
                if d not in data and d[0] == "%":
                    value = _get_reserved_variable(
                        v.variable_instance.shape, d, iter,
                        config.training_config.iter_per_epoch,
                        config.training_config.max_epoch)
                    v.variable_instance.data.fill(value)
                elif d in data:
                    let_data_to_variable(v.variable_instance,
                                         data[d],
                                         ctx=dest_context,
                                         data_name=d,
                                         variable_name=v.name)
                else:
                    raise ValueError(
                        'Variable "{}" is not found in dataset "{}", optimizer "{}"'
                        .format(d, ', '.join(o.data_iterators.keys()), o.name))

            # Generate data
            for v, generator in o.generator_assign.items():
                dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance,
                                     data=generator(v.shape),
                                     ctx=dest_context,
                                     variable_name=v.name)

            # Monitor loss before forward to prepare input data while processing on
            # GPU
            if cost.variables:
                for l in cost.variables:
                    cost.sum_iteration += np.mean(l.variable_instance.d)
                    # l.variable_instance.data.zero()
                if is_first_optimizer:
                    is_first_optimizer = False
                    _sum_cost()
                    if single_or_rankzero():
                        progress(
                            "Training : cost={0:0.6f}".format(
                                cost.sum_iteration),
                            (iter % config.training_config.iter_per_epoch) *
                            1.0 / config.training_config.iter_per_epoch)
                    cost.sum_iteration = 0.0

            with nodeTimeCollector.collect_cost_time(comm, iter):
                # Forward
                o.network.forward(o.forward_sequence)

                # Backward
                o.network.backward(o.backward_sequence,
                                   iter % o.update_interval == 0)

            # Update
            if iter % o.update_interval == o.update_interval - 1:
                if o.weight_decay > 0:
                    o.solver.weight_decay(o.weight_decay)

                if o.comm:  # Updated param with communicator
                    params = [x.grad for x in o.parameters.values()]
                    _all_reduce(o.comm, params, division=True, inplace=True)

                if o.scheduler is not None:
                    o.solver.set_learning_rate(
                        o.scheduler.get_learning_rate(iter))
                o.solver.update()
            # Sync w sometimes
            if iter % 10 == 9:  # TODO: change the interval
                if o.comm:
                    params = [x.data for x in o.parameters.values()]
                    _all_reduce(o.comm, params, division=True, inplace=True)

            # Reserve monitor loss
            cost.variables = o.loss_variables

    # Monitor loss at the end of epoch
    if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables:
        for l in cost.variables:
            cost.sum_iteration += np.mean(l.variable_instance.d)
            # l.variable_instance.data.zero()
        _sum_cost()
        cost.variables = None
        cost.sum_iteration = 0.0

    return cost
Пример #17
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._cache_dir = output_cache_dirname

        progress(None)

        self._cache_file_name_and_data_nums_q = multiprocessing.Manager(
        ).Queue()

        self._csv_position_and_data = []
        csv_row = []
        for _position in range(self._size):
            csv_row.append(self._csv_data[self._order[_position]])
            if len(csv_row) == self._cache_size:
                self._csv_position_and_data.append((_position, csv_row))
                csv_row = []
        if len(csv_row):
            self._csv_position_and_data.append((self._size - 1, csv_row))

        self_args = {
            '_cache_file_name_prefix': self._cache_file_name_prefix,
            '_cache_file_format': self._cache_file_format,
            '_cache_file_name_and_data_nums_q':
            self._cache_file_name_and_data_nums_q,
            '_cache_dir': self._cache_dir,
            '_variables': self._variables,
            '_filereader': self._filereader,
            '_normalize': self._normalize,
            '_columns': self._columns,
            '_cache_file_count': len(self._csv_position_and_data)
        }

        progress('Create cache', 0)
        with closing(multiprocessing.Pool(self._process_num)) as pool:
            pool.map(multiprocess_save_cache,
                     ((i, self_args) for i in self._csv_position_and_data))
        progress('Create cache', 1.0)

        logger.info('The total of cache files is {}'.format(
            self._cache_file_name_and_data_nums_q.qsize()))

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        cache_index_rows = []
        while True:
            try:
                cache_index_rows.append(
                    self._cache_file_name_and_data_nums_q.get(block=False))
            except Exception:
                break
        cache_index_rows = sorted(cache_index_rows, key=lambda x: x[0])
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for file_name, data_nums in cache_index_rows:
                writer.writerow((os.path.basename(file_name), data_nums))

        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(self._cache_dir, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
Пример #18
0
    def _create_cache(self):
        # Save all data into cache file(s).
        self._cache_positions = []
        self._position = 0

        percent = 0

        if single_or_rankzero():
            progress(None)

        while self._position < self._data_source._size:

            if single_or_rankzero():
                progress('Create cache',
                         self._position * 1.0 / self._data_source._size)

            self._store_data_to_cache_buffer(self._position)
            self._position += 1
        if len(self._cache_positions) > 0:
            self._save_cache_to_file()

        if single_or_rankzero():
            progress(None)

        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(
            numpy.ceil(float(self._data_source._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._data_source._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files -
                                         1] = self._cache_file_data_orders[
                                             num_of_cache_files -
                                             1][0:self._data_source._size %
                                                self._cache_size]

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for fn, orders in zip(self._cache_file_names,
                                  self._cache_file_data_orders):
                writer.writerow((os.path.basename(fn), len(orders)))
        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._data_source._original_source_uri is not None:
            fr = FileReader(self._data_source._original_source_uri)
            with fr.open() as f:
                csv_lines = [x.decode('utf-8') for x in f.readlines()]
                with open(os.path.join(self._cache_dir, "original.csv"),
                          'w') as o:
                    for l in csv_lines:
                        o.write(l)

        # Create order.csv
        if self._data_source._order is not None and \
                self._data_source._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._data_source._original_order,
                                  self._data_source._order):
                    writer.writerow(list(orders))
Пример #19
0
def compare_with_cpu_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))

    class TrainConfig:
        pass

    class OptConfig:
        pass

    class MonConfig:
        pass

    # Load config with current context
    files = []
    files.append(args.config)

    with nn.parameter_scope('current'):
        info = load.load(files)
        parameters = get_parameters(grad_only=False)

    config = TrainConfig()
    config.global_config = info.global_config
    config.training_config = info.training_config

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    # Load config with cpu context
    files = []
    files.append(args.config2)

    with nn.parameter_scope('cpu'):
        info_cpu = load.load(files)
        cpu_parameters = get_parameters(grad_only=False)

    config_cpu = TrainConfig()
    config_cpu.global_config = info_cpu.global_config
    config_cpu.training_config = info_cpu.training_config

    config_cpu.optimizers = OrderedDict()
    for name, opt in info_cpu.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config_cpu.optimizers[name] = o

    config_cpu.monitors = OrderedDict()
    for name, mon in info_cpu.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config_cpu.monitors[name] = m

    result_array = [['1-Correl']]

    # Profile Optimizer
    with ExitStack() as stack:
        for name, o in config.optimizers.items():
            o.data_iterator = stack.enter_context(o.optimizer.data_iterator())
        for name, o in config_cpu.optimizers.items():
            o.data_iterator = stack.enter_context(o.optimizer.data_iterator())
        result_array = compare_optimizer(config, parameters, config_cpu,
                                         cpu_parameters, result_array)

    # Write profiling result
    import csv
    with open(args.outdir + os.sep + 'compare_with_cpu.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(result_array)

    logger.log(99, 'Compare with CPU Completed.')
    progress(None)
    return True
Пример #20
0
def _train(args, config):
    global _save_parameter_info
    comm = current_communicator()
    _CGLOAD_LOG_INTERVAL = 20

    best_epoch = None
    best_error = None
    last_epoch = 0
    if args.resume:
        last_epoch, best_epoch, best_error = _get_current_parameter(args)
        if best_epoch is not None:
            logger.log(
                99, "Best error {} recorded at epoch {} in previous training.".
                format(best_error, best_epoch))
            if best_epoch > last_epoch:
                logger.log(
                    99,
                    "Resumed epoch is {} but this training keep this result.".
                    format(last_epoch))
        logger.log(99, "Resume from epoch {}".format(last_epoch + 1))

    callback.update_status(('epoch.max', config.training_config.max_epoch))
    callback.update_status(
        ('epoch.current',
         last_epoch + 1 if last_epoch < config.training_config.max_epoch else
         config.training_config.max_epoch))

    max_iteration = config.training_config.max_epoch * \
        config.training_config.iter_per_epoch
    if single_or_rankzero():
        logger.log(
            99, 'Training epoch {} of {} begin'.format(
                last_epoch + 1, config.training_config.max_epoch))

    class Cost:
        pass

    cost = Cost()
    cost.sum_epoch = 0.0
    cost.num_iteration = 0
    cost.sum_iteration = 0.0
    cost.variables = None

    class TimeInfo:
        pass

    timeinfo = TimeInfo()
    timeinfo.past_time = 0
    timeinfo.estimate_time = 0
    timeinfo.last_past_time = None

    if max_iteration > 0:
        last_iteration = last_epoch * config.training_config.iter_per_epoch
        if last_iteration < max_iteration:

            timeinfo.start_time = time.time()
            timeinfo.last_epoch_start_time = timeinfo.start_time

            callback.update_status('processing', True, timeinfo.start_time)

            for iteration in range(last_iteration, max_iteration):

                # instant load measurement
                measure_cpu_gpu_instant_load()

                cost = _update(iteration, config, cost)

                if np.isnan(cost.sum_epoch) or np.isinf(cost.sum_epoch):
                    logger.log(99, 'Cost is Nan')
                    return False, False

                timeinfo = _calc_estimate_time(timeinfo, max_iteration,
                                               last_iteration, iteration + 1)
                callback.update_time_train(prediction=timeinfo.estimate_time)

                if 0 < config.timelimit < timeinfo.estimate_time:
                    logger.log(
                        99,
                        'Expected training time ({:.3f}s) will exceed time limit ({}s).'
                        .format(timeinfo.estimate_time, config.timelimit))
                    return False, False

                if (iteration +
                        1) % config.training_config.iter_per_epoch == 0:
                    last_past_time = -1
                    # End of epoch
                    epoch = iteration // config.training_config.iter_per_epoch + 1
                    cost_avg_epoch = cost.sum_epoch / cost.num_iteration if cost.num_iteration else 0
                    cost.sum_epoch = 0.0
                    cost.num_iteration = 0
                    monitoring_report = []

                    # Evaluation
                    error_str = ''
                    if epoch % config.training_config.monitor_interval == 0 or epoch <= 5:
                        best_error, error_str = _evaluate(
                            args, config, monitoring_report, best_error, epoch)

                    # Cpu/Gpu average load
                    cg_load_str = ''
                    cgload_log = ''
                    cg_load = get_cpu_gpu_average_load()
                    if cg_load:
                        cg_load_str = 'epoch {} average_load_matrix: {}'.format(
                            epoch, cg_load)
                        span = _calc_epoch_span(timeinfo)
                        if span > _CGLOAD_LOG_INTERVAL:
                            cgload_log = _format_cgload_log(cg_load)

                    if single_or_rankzero():
                        # Write to monitoring_report.yml
                        f = open(
                            os.path.join(args.outdir, 'monitoring_report.yml'),
                            'a')
                        f.write('{}:\n'.format(epoch - 1))
                        f.write('  cost: {}\n'.format(cost_avg_epoch))
                        for s in monitoring_report:
                            f.write(s)
                        f.close()

                        callback.update_status(
                            (['monitoring_report', epoch,
                              'cost'], cost_avg_epoch))

                        _save_parameters(args, 'current', epoch, config)

                        callback.update_status(('epoch.current', epoch))
                        callback.update_status()

                        logger.log(
                            99,
                            'epoch {} of {} cost={:.6f} {} time=({:.1f}s /{:.1f}s) {}'
                            .format(epoch, config.training_config.max_epoch,
                                    cost_avg_epoch, error_str,
                                    timeinfo.past_time, timeinfo.estimate_time,
                                    cgload_log))

                        if cg_load_str:
                            # cpu_gpu_average_load record at epoch level
                            callback.update_status(
                                (['cpu_gpu_epoch_load', epoch], cg_load))
                            progress(cg_load_str, 1)

                        if not callback.check_training_time(
                                args, config, timeinfo, epoch, last_epoch):
                            _save_parameters(args, 'current', epoch, config,
                                             True)
                            return False, True

            if single_or_rankzero():
                _save_parameters(args, 'current', epoch, config, True)
    return True, False
Пример #21
0
    def create(self,
               output_cache_dirname,
               normalize=True,
               cache_file_name_prefix='cache'):

        self._normalize = normalize
        self._cache_file_name_prefix = cache_file_name_prefix

        self._cache_file_format = nnabla_config.get('DATA_ITERATOR',
                                                    'cache_file_format')
        logger.info('Cache file format is {}'.format(self._cache_file_format))

        self._cache_dir = output_cache_dirname

        progress(None)

        self._cache_file_order = []
        self._cache_file_data_orders = []
        self._cache_file_names = []

        self._cache_data = []
        progress('Create cache', 0)
        last_time = time.time()
        for self._position in range(self._size):
            if time.time() >= last_time + 1.0:
                progress('Create cache', self._position / self._size)
                last_time = time.time()
            self._file.seek(self._line_positions[self._order[self._position]])
            line = self._file.readline().decode('utf-8')
            csvreader = csv.reader([line])
            row = next(csvreader)
            self._cache_data.append(tuple(self._process_row(row)))

            if len(self._cache_data) >= self._cache_size:
                self._save_cache()
                self._cache_data = []

        self._save_cache()
        progress('Create cache', 1.0)

        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(
            numpy.ceil(float(self._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files -
                                         1] = self._cache_file_data_orders[
                                             num_of_cache_files -
                                             1][0:self._size %
                                                self._cache_size]

        # Create Index
        index_filename = os.path.join(self._cache_dir, "cache_index.csv")
        with open(index_filename, 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for fn, orders in zip(self._cache_file_names,
                                  self._cache_file_data_orders):
                writer.writerow((os.path.basename(fn), len(orders)))
        # Create Info
        if self._cache_file_format == ".npy":
            info_filename = os.path.join(self._cache_dir, "cache_info.csv")
            with open(info_filename, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                for variable in self._variables:
                    writer.writerow((variable, ))

        # Create original.csv
        if self._original_source_uri is not None:
            shutil.copy(self._original_source_uri,
                        os.path.join(self._cache_dir, "original.csv"))

        # Create order.csv
        if self._order is not None and \
                self._original_order is not None:
            with open(os.path.join(self._cache_dir, "order.csv"), 'w') as o:
                writer = csv.writer(o, lineterminator='\n')
                for orders in zip(self._original_order, self._order):
                    writer.writerow(list(orders))
Пример #22
0
def forward_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)

    class ForwardConfig:
        pass
    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} does not found.'.format(
                config.executor.network.name))
            return

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset:
            normalize = d.normalize
    data_iterator = (lambda: data_iterator_csv_dataset(
        args.dataset, config.networks[0].batch_size, False, normalize=normalize))

    # load dataset as csv
    with open(args.dataset, 'rt') as f:
        rows = [row for row in csv.reader(f)]
    row0 = rows.pop(0)
    root_path = os.path.dirname(args.dataset)
    root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
    rows = list(map(lambda row: list(map(lambda x: x if is_float(
        x) else compute_full_path(root_path, x), row)), rows))

    with data_iterator() as di:
        index = 0
        while index < di.size:
            data = di.next()
            result, outputs = forward(args, index, config, data, di.variables)
            if index == 0:
                for name, dim in zip(result.names, result.dims):
                    if dim == 1:
                        row0.append(name)
                    else:
                        for d in range(dim):
                            row0.append(name + '__' + str(d))
            for i, output in enumerate(outputs):
                if index + i < len(rows):
                    rows[index + i].extend(output)
            index += len(outputs)
            logger.log(
                99, 'data {} / {}'.format(min([index, len(rows)]), len(rows)))

    with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(row0)
        writer.writerows(rows)

    logger.log(99, 'Forward Completed.')
    progress(None)
Пример #23
0
def multiprocess_save_cache(create_cache_args):
    def _process_row(row, args):
        def _get_value(value, is_vector=False):
            try:
                if is_vector:
                    value = [float(value)]
                else:
                    value = float(value)
                return value
            except ValueError:
                pass
            ext = (os.path.splitext(value)[1]).lower()
            with args._filereader.open(value) as f:
                value = load(ext)(f, normalize=args._normalize)
            return value

        values = collections.OrderedDict()
        if len(row) == len(args._columns):
            for column, column_value in enumerate(row):
                variable, index, label = args._columns[column]
                if index is None:
                    values[variable] = _get_value(column_value, is_vector=True)
                else:
                    if variable not in values:
                        values[variable] = []
                    values[variable].append(_get_value(column_value))
        return values.values()

    (position, cache_csv), cc_args = create_cache_args
    cc_args = SimpleNamespace(**cc_args)
    cache_data = []
    for row in cache_csv:
        cache_data.append(tuple(_process_row(row, cc_args)))

    if len(cache_data) > 0:
        start_position = position + 1 - len(cache_data)
        end_position = position
        cache_filename = os.path.join(
            cc_args._output_cache_dirname,
            '{}_{:08d}_{:08d}{}'.format(cc_args._cache_file_name_prefix,
                                        start_position, end_position,
                                        cc_args._cache_file_format))

        logger.info('Creating cache file {}'.format(cache_filename))

        data = collections.OrderedDict([(n, []) for n in cc_args._variables])
        for _, cd in enumerate(cache_data):
            for i, n in enumerate(cc_args._variables):
                if isinstance(cd[i], numpy.ndarray):
                    d = cd[i]
                else:
                    d = numpy.array(cd[i]).astype(numpy.float32)
                data[n].append(d)
        try:
            if cc_args._cache_file_format == ".h5":
                h5 = h5py.File(cache_filename, 'w')
                for k, v in data.items():
                    h5.create_dataset(k, data=v)
                h5.close()
            else:
                retry_count = 1
                is_create_cache_incomplete = True
                while is_create_cache_incomplete:
                    try:
                        with open(cache_filename, 'wb') as f:
                            for v in data.values():
                                numpy.save(f, v)
                        is_create_cache_incomplete = False
                    except OSError:
                        retry_count += 1
                        if retry_count > 10:
                            raise
                        logger.info(
                            'Creating cache retry {}/10'.format(retry_count))
        except:
            logger.critical(
                'An error occurred while creating cache file from dataset.')
            for k, v in data.items():
                size = v[0].shape
                for d in v:
                    if size != d.shape:
                        logger.critical(
                            'The sizes of data "{}" are not the same. ({} != {})'
                            .format(k, size, d.shape))
            raise

        cc_args._cache_file_name_and_data_nums_list.append(
            (cache_filename, len(cache_data)))
        progress(
            'Create cache',
            len(cc_args._cache_file_name_and_data_nums_list) /
            cc_args._cache_file_count)
Пример #24
0
def forward_command(args):
    callback.update_status(args)

    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)
    batch_size = args.batch_size
    if batch_size < 1:
        batch_size = None

    class ForwardConfig:
        pass

    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False, batch_size=batch_size)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} is not found.'.format(
                config.executor.network.name))
            return False

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset or d.cache_dir == args.dataset:
            normalize = d.normalize
    for e in config.executors:
        normalize = normalize and not e.no_image_normalization

    orders = {}
    # With CSV
    if os.path.splitext(args.dataset)[1] == '.csv':
        data_iterator = (lambda: data_iterator_csv_dataset(
            uri=args.dataset,
            batch_size=config.networks[0].batch_size,
            shuffle=False,
            normalize=normalize,
            with_memory_cache=False,
            with_file_cache=False))

        # load dataset as csv
        filereader = FileReader(args.dataset)
        with filereader.open(textmode=True, encoding='utf-8-sig') as f:
            rows = [row for row in csv.reader(f)]
        row0 = rows.pop(0)
        if args.replace_path:
            root_path = os.path.dirname(args.dataset)
            root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
        else:
            root_path = '.'
        rows = [row for row in rows if len(row)]
        rows = list(
            map(
                lambda row: list(
                    map(
                        lambda i, x: x if row0[i][0] == '#' or is_float(
                            x) else compute_full_path(root_path, x),
                        range(len(row)), row)), rows))
        for i in range(len(rows)):
            orders[i] = i
    # With Cache
    elif os.path.splitext(args.dataset)[1] == '.cache':
        data_iterator = (lambda: data_iterator_cache(uri=args.dataset,
                                                     batch_size=config.
                                                     networks[0].batch_size,
                                                     shuffle=False,
                                                     normalize=normalize))

        # Get original CSV
        original_csv = os.path.join(args.dataset, 'original.csv')
        try:
            # load dataset as csv
            filereader = FileReader(original_csv)
            with filereader.open(textmode=True, encoding='utf-8-sig') as f:
                rows = [row for row in csv.reader(f)]
            row0 = rows.pop(0)
            root_path = '.'
            rows = list(
                map(
                    lambda row: list(
                        map(
                            lambda x: x if is_float(x) else compute_full_path(
                                root_path, x), row)), rows))
        except:
            print('Cannot open', original_csv)
            pass

        # Get original Data order.
        order_csv = os.path.join(args.dataset, 'order.csv')
        try:
            filereader = FileReader(order_csv)
            with filereader.open(textmode=True) as f:
                for original, shuffled in [[int(x) for x in row]
                                           for row in csv.reader(f)]:
                    orders[original] = shuffled
        except:
            print('Cannot open', order_csv)
            for i in range(len(rows)):
                orders[i] = i
    else:
        print('Unsupported extension "{}" in "{}".'.format(
            os.path.splitext(args.dataset)[1], args.dataset))

    callback.update_status(('data.max', len(rows)))
    callback.update_status(('data.current', 0))
    callback.update_status('processing', True)

    result_csv_filename = os.path.join(args.outdir, args.outfile)
    with open(result_csv_filename, 'w', encoding='utf-8') as f:
        writer = csv.writer(f, lineterminator='\n')
        with data_iterator() as di:
            index = 0
            while index < di.size:
                data = di.next()
                result, outputs = _forward(args, index, config, data,
                                           di.variables)
                if index == 0:
                    for name, dim in zip(result.names, result.dims):
                        if dim == 1:
                            if e.repeat_evaluation_type == "std":
                                name = "Uncertainty(Std)"
                            row0.append(name)
                        else:
                            for d in range(dim):
                                row0.append(name + '__' + str(d))
                    writer.writerow(row0)
                for i, output in enumerate(outputs):
                    if index + i < len(rows):
                        import copy
                        row = copy.deepcopy(rows[orders[index + i]])
                        row.extend(output)
                        writer.writerow(row)
                index += len(outputs)

                callback.update_status(('data.current', min([index,
                                                             len(rows)])))
                callback.update_forward_time()
                callback.update_status()

                logger.log(
                    99, 'data {} / {}'.format(min([index, len(rows)]),
                                              len(rows)))

    callback.process_evaluation_result(args.outdir, result_csv_filename)

    logger.log(99, 'Forward Completed.')
    progress(None)

    callback.update_status(('output_result.csv_header', ','.join(row0)))
    callback.update_status(('output_result.column_num', len(row0)))
    callback.update_status(('output_result.data_num', len(rows)))
    callback.update_status('finished')

    return True
Пример #25
0
def compare_with_cpu_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))

    class TrainConfig:
        pass

    class OptConfig:
        pass

    class MonConfig:
        pass

    # Load config with current context
    files = []
    files.append(args.config)

    with nn.parameter_scope('current'):
        info = load.load(files)
        parameters = get_parameters(grad_only=False)

    config = TrainConfig()
    config.global_config = info.global_config
    config.training_config = info.training_config

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config.optimizers[name] = o

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config.monitors[name] = m

    # Load config with cpu context
    files = []
    files.append(args.config2)

    with nn.parameter_scope('cpu'):
        info_cpu = load.load(files)
        cpu_parameters = get_parameters(grad_only=False)

    config_cpu = TrainConfig()
    config_cpu.global_config = info_cpu.global_config
    config_cpu.training_config = info_cpu.training_config

    config_cpu.optimizers = OrderedDict()
    for name, opt in info_cpu.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterator = None
        config_cpu.optimizers[name] = o

    config_cpu.monitors = OrderedDict()
    for name, mon in info_cpu.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterator = None
        config_cpu.monitors[name] = m

    result_array = [['1-Correl']]

    # Profile Optimizer
    with ExitStack() as stack:
        for name, o in config.optimizers.items():
            o.data_iterator = stack.enter_context(
                o.optimizer.data_iterator())
        for name, o in config_cpu.optimizers.items():
            o.data_iterator = stack.enter_context(
                o.optimizer.data_iterator())
        result_array = compare_optimizer(
            config, parameters, config_cpu, cpu_parameters, result_array)

    # Write profiling result
    import csv
    with open(args.outdir + os.sep + 'compare_with_cpu.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(result_array)

    logger.log(99, 'Compare with CPU Completed.')
    progress(None)
Пример #26
0
    def _save_cache(self, args):
        position = args[0]
        cache_csv = args[1]
        # conv dataset
        cache_data = [tuple(self._process_row(row)) for row in cache_csv]

        start_position = position + 1 - len(cache_data)
        end_position = position
        cache_filename = os.path.join(
            self._cache_dir, '{}_{:08d}_{:08d}{}'.format(self._cache_file_name_prefix,
                                                         start_position,
                                                         end_position,
                                                         self._cache_file_format))

        logger.info('Creating cache file {}'.format(cache_filename))

        data = collections.OrderedDict(
            [(n, []) for n in self._variables])
        for _, cd in enumerate(cache_data):
            for i, n in enumerate(self._variables):
                if isinstance(cd[i], numpy.ndarray):
                    d = cd[i]
                else:
                    d = numpy.array(cd[i]).astype(numpy.float32)
                data[n].append(d)

        try:
            if self._cache_file_format == ".h5":
                h5 = h5py.File(cache_filename, 'w')
                for k, v in data.items():
                    h5.create_dataset(k, data=v)
                h5.close()
            else:
                retry_count = 1
                is_create_cache_incomplete = True
                while is_create_cache_incomplete:
                    try:
                        with open(cache_filename, 'wb') as f:
                            for v in data.values():
                                numpy.save(f, v)
                        is_create_cache_incomplete = False
                    except OSError:
                        retry_count += 1
                        if retry_count > 10:
                            raise
                        logger.info(
                            'Creating cache retry {}/10'.format(retry_count))
        except:
            logger.critical(
                'An error occurred while creating cache file from dataset.')
            for k, v in data.items():
                size = v[0].shape
                for d in v:
                    if size != d.shape:
                        logger.critical('The sizes of data "{}" are not the same. ({} != {})'.format(
                            k, size, d.shape))
            raise

        self.current_cache_position += 1
        if single_or_rankzero():
            if self.current_cache_position % int(self.num_of_cache_file/20+1) == 0:
                progress('Create cache', self.current_cache_position /
                         self.num_of_cache_file)
        return cache_filename, len(cache_data)
Пример #27
0
def _evaluate(args, config, monitoring_report, best_error):
    error_str = ''
    valid_error = 0.0
    for name, mon in config.monitors.items():
        m = mon.monitor
        error_sum_monitor = 0.0
        error_count = 0
        di = mon.data_iterator
        dp_epoch = di.epoch
        while dp_epoch == di.epoch:
            # Set data to variable
            datas = di.next()
            for v, d in m.dataset_assign.items():
                dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance, datas[
                                     di.variables.index(d)], ctx=dest_context)

            # Generate data
            for v, generator in m.generator_assign.items():
                dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance,
                                     data=generator(v.shape), ctx=dest_context)

            # Sum error before forward to prepare input data while processing
            # on GPU
            if error_count > 0:
                for v in m.monitor_variables:
                    error_sum_monitor += np.mean(v.variable_instance.d)
                progress('Evaluating "{0}"'.format(
                    name) + ' : error={0:0.6f}'.format(
                    error_sum_monitor / error_count),
                    di.position * 1.0 / di.size)
            error_count += 1

            # Forward recursive
            m.network.forward(m.forward_sequence)

        # Sum error at the end of dataset
        for v in m.monitor_variables:
            error_sum_monitor += np.mean(v.variable_instance.d)

        error = error_sum_monitor / error_count
        monitoring_report.append('  {}: {}\n'.format(name, error))
        if error_str != '':
            error_str += ', '
        else:
            error_str = ' {'
        error_str += '{}={:.6f}'.format(name, error)
        if name == 'valid_error':
            valid_error = error
    if error_str != '':
        error_str += '}'

    # Save Parameters
    if (not config.training_config.save_best) or \
       (not best_error) or \
       (best_error is not None and valid_error <= best_error):
        best_error = valid_error
        save_parameters(os.path.join(args.outdir, 'parameters.h5'))

    return best_error, error_str
Пример #28
0
def _evaluate(args, config, monitoring_report, best_error):
    error_str = ''
    valid_error = 0.0
    for name, mon in config.monitors.items():
        m = mon.monitor
        error_sum_monitor = 0.0
        error_count = 0
        di = mon.data_iterator
        dp_epoch = di.epoch
        while dp_epoch == di.epoch:
            # Set data to variable
            datas = di.next()
            for v, d in m.dataset_assign.items():
                dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance,
                                     datas[di.variables.index(d)],
                                     ctx=dest_context)

            # Generate data
            for v, generator in m.generator_assign.items():
                dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance,
                                     data=generator(v.shape),
                                     ctx=dest_context)

            # Sum error before forward to prepare input data while processing
            # on GPU
            if error_count > 0:
                for v in m.monitor_variables:
                    error_sum_monitor += np.mean(v.variable_instance.d)
                progress(
                    'Evaluating "{0}"'.format(name) +
                    ' : error={0:0.6f}'.format(
                        error_sum_monitor / error_count),
                    di.position * 1.0 / di.size)
            error_count += 1

            # Forward recursive
            m.network.forward(m.forward_sequence)

        # Sum error at the end of dataset
        for v in m.monitor_variables:
            error_sum_monitor += np.mean(v.variable_instance.d)

        error = error_sum_monitor / error_count
        monitoring_report.append('  {}: {}\n'.format(name, error))
        if error_str != '':
            error_str += ', '
        else:
            error_str = ' {'
        error_str += '{}={:.6f}'.format(name, error)
        if name == 'valid_error':
            valid_error = error
    if error_str != '':
        error_str += '}'

    # Save Parameters
    if (not config.training_config.save_best) or \
       (not best_error) or \
       (best_error is not None and valid_error <= best_error):
        best_error = valid_error
        save_parameters(os.path.join(args.outdir, 'parameters.h5'))

    return best_error, error_str
Пример #29
0
def forward_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)
    batch_size = args.batch_size
    if batch_size < 1:
        batch_size = None

    class ForwardConfig:
        pass

    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False, batch_size=batch_size)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} is not found.'.format(
                config.executor.network.name))
            return False

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset:
            normalize = d.normalize
    for e in config.executors:
        normalize = normalize and not e.no_image_normalization

    data_iterator = (lambda: data_iterator_csv_dataset(uri=args.dataset,
                                                       batch_size=config.
                                                       networks[0].batch_size,
                                                       shuffle=False,
                                                       normalize=normalize,
                                                       with_memory_cache=False,
                                                       with_file_cache=False))

    # load dataset as csv
    filereader = FileReader(args.dataset)
    with filereader.open(textmode=True) as f:
        rows = [row for row in csv.reader(f)]
    row0 = rows.pop(0)
    root_path = os.path.dirname(args.dataset)
    root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
    rows = list(
        map(
            lambda row: list(
                map(
                    lambda x: x
                    if is_float(x) else compute_full_path(root_path, x), row)),
            rows))

    with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        with data_iterator() as di:
            index = 0
            while index < di.size:
                data = di.next()
                result, outputs = _forward(args, index, config, data,
                                           di.variables)
                if index == 0:
                    for name, dim in zip(result.names, result.dims):
                        if dim == 1:
                            row0.append(name)
                        else:
                            for d in range(dim):
                                row0.append(name + '__' + str(d))
                    writer.writerow(row0)
                for i, output in enumerate(outputs):
                    if index + i < len(rows):
                        import copy
                        row = copy.deepcopy(rows[index + i])
                        row.extend(output)
                        writer.writerow(row)
                index += len(outputs)
                logger.log(
                    99, 'data {} / {}'.format(min([index, len(rows)]),
                                              len(rows)))

    logger.log(99, 'Forward Completed.')
    progress(None)
    return True
Пример #30
0
def _evaluate(args, config, monitoring_report, best_error, epoch):
    comm = current_communicator()
    error_str = ''
    valid_error = 0.0

    def _sum_error(sum, error):
        ret = None
        if comm:
            # logger.log(99, "Calc error with communicator")
            var = [nn.NdArray()]
            var[0].data = error
            _all_reduce(comm, var, division=False, inplace=True)
            ret = sum + var[0].data
        else:
            ret = sum + error
        return ret

    for name, mon in config.monitors.items():
        m = mon.monitor
        error_sum_monitor = 0.0
        error_count = 0
        data_size = max([di.size for di in mon.data_iterators])
        batch_size = max([di.batch_size for di in mon.data_iterators])

        for i in range(data_size // batch_size):
            # Load dataset
            data = OrderedDict()
            for di in mon.data_iterators:
                data.update(zip(di.variables, di.next()))

            # Set data to variable
            for v, d in m.dataset_assign.items():
                dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance,
                                     data[d],
                                     ctx=dest_context,
                                     data_name=d,
                                     variable_name=v.name)

            # Generate data
            for v, generator in m.generator_assign.items():
                dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[
                    0].inputs else None
                let_data_to_variable(v.variable_instance,
                                     data=generator(v.shape),
                                     ctx=dest_context,
                                     variable_name=v.name)

            # Sum error before forward to prepare input data while processing
            # on GPU
            if error_count > 0:
                error_sum = 0.0
                for v in m.monitor_variables:
                    error_sum += np.mean(v.variable_instance.d)
                    # v.variable_instance.data.zero()
                error_sum_monitor = _sum_error(error_sum_monitor, error_sum)
                if single_or_rankzero():
                    progress(
                        'Evaluating "{0}"'.format(name) +
                        ' : error={0:0.6f}'.format(
                            error_sum_monitor / error_count),
                        di.position * 1.0 / di.size)
            error_count += comm.size if comm else 1

            # Forward recursive
            m.network.forward(m.forward_sequence)

        # Sum error at the end of dataset
        error_sum = 0.0
        for v in m.monitor_variables:
            error_sum += np.mean(v.variable_instance.d)
            # v.variable_instance.data.zero()
        error_sum_monitor = _sum_error(error_sum_monitor, error_sum)

        if error_count == 0:
            error = 0
        else:
            error = error_sum_monitor / error_count

        if np.isnan(error) or np.isinf(error):
            logger.log(99, 'Validation error is Nan')
            error = 0.0

        monitoring_report.append('  {}: {}\n'.format(name, error))

        callback.update_status((['monitoring_report', epoch, name], error))
        callback.update_status((['last', name], error))  # save last value

        if error_str != '':
            error_str += ', '
        else:
            error_str = ' {'
        error_str += '{}={:.6f}'.format(name, error)
        if name == 'valid_error':
            valid_error = error

    if error_str != '':
        error_str += '}'

    # Save Parameters
    if single_or_rankzero():
        if (not config.training_config.save_best) or \
           (not best_error) or \
           (best_error is not None and valid_error <= best_error):
            best_error = valid_error
            callback.update_status(('best.valid_error', best_error))
            callback.update_status(('best.epoch', epoch))
            _save_parameters(args, 'best', epoch, config, True)

    return best_error, error_str
Пример #31
0
def forward_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)

    class ForwardConfig:
        pass
    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} does not found.'.format(
                config.executor.network.name))
            return

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset:
            normalize = d.normalize
    data_iterator = (lambda: data_iterator_csv_dataset(
        args.dataset, config.networks[0].batch_size, False, padding=True, normalize=normalize))

    # load dataset as csv
    with open(args.dataset, 'rt') as f:
        rows = [row for row in csv.reader(f)]
    row0 = rows.pop(0)
    root_path = os.path.dirname(args.dataset)
    root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
    rows = map(lambda row: map(lambda x: x if is_float(
        x) else compute_full_path(root_path, x), row), rows)

    with data_iterator() as di:
        index = 0
        while index < di.size:
            data = di.next()
            result, outputs = forward(args, index, config, data, di.variables)
            if index == 0:
                for name, dim in zip(result.names, result.dims):
                    if dim == 1:
                        row0.append(name)
                    else:
                        for d in range(dim):
                            row0.append(name + '__' + str(d))
            for i, output in enumerate(outputs):
                if index + i < len(rows):
                    rows[index + i].extend(output)
            index += len(outputs)
            logger.log(
                99, 'data {} / {}'.format(min([index, len(rows)]), len(rows)))

    with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(row0)
        writer.writerows(rows)

    logger.log(99, 'Forward Completed.')
    progress(None)
Пример #32
0
def train_command(args):
    callback.update_status(args)

    if single_or_rankzero():
        configure_progress(os.path.join(args.outdir, 'progress.txt'))

    info = load.load([args.config],
                     prepare_data_iterator=None,
                     exclude_parameter=True)

    # Check dataset uri is empty.
    dataset_error = False
    for dataset in info.datasets.values():
        if dataset.uri.strip() == '':
            dataset_error = True
    if dataset_error:
        logger.log(99, 'Fatal error. Dataset URI is empty.')
        return False

    class TrainConfig:
        pass

    config = TrainConfig()
    config.timelimit = -1
    if args.param:
        load.load([args.param], parameter_only=True)

    config.timelimit = callback.get_timelimit(args)

    config.global_config = info.global_config
    config.training_config = info.training_config

    if single_or_rankzero():
        logger.log(99, 'Train with contexts {}'.format(available_contexts))

    class OptConfig:
        pass

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterators = []
        config.optimizers[name] = o

    class MonConfig:
        pass

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterators = []
        config.monitors[name] = m

    # Training
    comm = current_communicator()
    config.training_config.iter_per_epoch //= comm.size if comm else 1
    max_iteration = config.training_config.max_epoch * \
        config.training_config.iter_per_epoch

    global _save_parameter_info
    _save_parameter_info = {}
    _, config_ext = os.path.splitext(args.config)
    if config_ext == '.prototxt' or config_ext == '.nntxt':
        _save_parameter_info['config'] = args.config
    elif config_ext == '.nnp':
        with zipfile.ZipFile(args.config, 'r') as nnp:
            for name in nnp.namelist():
                _, ext = os.path.splitext(name)
                if ext == '.nntxt' or ext == '.prototxt':
                    nnp.extract(name, args.outdir)
                    _save_parameter_info['config'] = os.path.join(
                        args.outdir, name)

    result = False
    restart = False
    if max_iteration > 0:
        rng = np.random.RandomState(comm.rank if comm else 0)
        with ExitStack() as stack:
            # Create data_iterator instance only once for each dataset in optimizers
            optimizer_data_iterators = {}
            for name, o in config.optimizers.items():
                for di in o.optimizer.data_iterators.values():
                    if di not in optimizer_data_iterators:
                        di_instance = stack.enter_context(di())
                        if comm and comm.size > 1:
                            di_instance = di_instance.slice(
                                rng, comm.size, comm.rank)
                        optimizer_data_iterators[di] = di_instance
                    else:
                        di_instance = optimizer_data_iterators[di]
                    o.data_iterators.append(di_instance)

            # Create data_iterator instance only once for each dataset in monitors
            monitor_data_iterators = {}
            for name, m in config.monitors.items():
                for di in m.monitor.data_iterators.values():
                    if di not in monitor_data_iterators:
                        di_instance = stack.enter_context(di())
                        if comm and comm.size > 1:
                            di_instance = di_instance.slice(
                                rng, comm.size, comm.rank)
                        monitor_data_iterators[di] = di_instance
                    else:
                        di_instance = monitor_data_iterators[di]
                    m.data_iterators.append(di_instance)
            monitor_data_iterators.update(optimizer_data_iterators)

            result, restart = _train(args, config)
    else:
        # save parameters without training (0 epoch learning)
        logger.log(99, '0 epoch learning. (Just save parameter.)')
        if single_or_rankzero():
            _save_parameters(args, None, 0, config, True)
        result = True

    if single_or_rankzero() and not restart:
        if result:
            logger.log(99, 'Training Completed.')
            callback.update_status('finished')
        else:
            logger.log(99, 'Training Incompleted.')
            callback.update_status('failed')
    if single_or_rankzero():
        progress(None)
    return True
Пример #33
0
def profile_command(args):
    callback.update_status(args)

    configure_progress(os.path.join(args.outdir, 'progress.txt'))

    class TrainConfig:
        pass

    config = TrainConfig()
    info = load.load(args.config)

    config.global_config = info.global_config
    config.training_config = info.training_config

    class OptConfig:
        pass

    config.optimizers = OrderedDict()
    for name, opt in info.optimizers.items():
        o = OptConfig()
        o.optimizer = opt
        o.data_iterators = []
        config.optimizers[name] = o

    class MonConfig:
        pass

    config.monitors = OrderedDict()
    for name, mon in info.monitors.items():
        m = MonConfig()
        m.monitor = mon
        m.data_iterators = []
        config.monitors[name] = m

    ext_module = import_extension_module(
        config.global_config.default_context.backend[0].split(':')[0])

    def synchronize():
        return ext_module.synchronize(
            device_id=config.global_config.default_context.device_id)

    result_array = [['time in ms']]

    callback.update_status('processing', True)

    # Profile Optimizer
    with ExitStack() as stack:
        # Create data_iterator instance only once for each dataset in optimizers
        optimizer_data_iterators = {}
        for name, o in config.optimizers.items():
            for di in o.optimizer.data_iterators.values():
                if di not in optimizer_data_iterators:
                    di_instance = stack.enter_context(di())
                    optimizer_data_iterators[di] = di_instance
                else:
                    di_instance = optimizer_data_iterators[di]
                o.data_iterators.append(di_instance)
        result_array = profile_optimizer(config, result_array, synchronize)

    # Write profiling result
    import csv
    with open(args.outdir + os.sep + 'profile.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(result_array)

    logger.log(99, 'Profile Completed.')
    progress(None)
    callback.update_status('finished')
    return True