def __init__(self, input_csv_filename, rng=None, shuffle=False): self._cache_size = int( nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size')) logger.info('Cache size is {}'.format(self._cache_size)) self._filereader = FileReader(input_csv_filename) self._original_source_uri = input_csv_filename if rng is None: self._rng = numpy.random.RandomState(313) else: self._rng = rng self._shuffle = shuffle # Binary mode is required to use seek and tell function. self._file = open(input_csv_filename, 'rb') self._line_positions = [] line = self._file.readline().decode('utf-8') csvreader = csv.reader([line]) self._process_header(next(csvreader)) # Store file positions of each data. self._size = 0 while True: self._line_positions.append(self._file.tell()) line = self._file.readline() if line is None or len(line) == 0: break self._size += 1 # rewind self._file.seek(0) self._cache_file_order = [] self._cache_file_data_orders = [] self._cache_file_names = [] # Adjust data size into reseted position. In most case it means # multiple of bunch(mini-batch) size. num_of_cache_files = int( numpy.ceil(float(self._size) / self._cache_size)) self._cache_file_order = self._cache_file_order[0:num_of_cache_files] self._cache_file_data_orders = self._cache_file_data_orders[ 0:num_of_cache_files] if self._size % self._cache_size != 0: self._cache_file_data_orders[num_of_cache_files - 1] = self._cache_file_data_orders[ num_of_cache_files - 1][0:self._size % self._cache_size] self._original_order = list(range(self._size)) self._order = list(range(self._size)) self._variables = tuple(self._variables_dict.keys()) # Shuffle if self._shuffle: self._order = list(self._rng.permutation(list(range(self._size)))) else: self._order = list(range(self._size))
def __init__(self, input_csv_filename, rng=None, shuffle=False, num_of_threads=None): self._cache_size = int( nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size')) logger.info('Cache size is {}'.format(self._cache_size)) self._filereader = FileReader(input_csv_filename) self._original_source_uri = input_csv_filename if rng is None: self._rng = numpy.random.RandomState(313) else: self._rng = rng self._shuffle = shuffle # read index.csv self._file = open(input_csv_filename, 'r', encoding='utf-8') csvreader = csv.reader(self._file) header = next(csvreader) # Store file positions of each data. self._csv_data = list(csvreader) self._size = len(self._csv_data) self._file.close() self._remove_comment_cols(header, self._csv_data) self._process_header(header) self._variables = tuple(self._variables_dict.keys()) self._original_order = list(range(self._size)) # Shuffle, the order is processing csv file order if self._shuffle: self._order = list(self._rng.permutation(list(range(self._size)))) else: self._order = list(range(self._size)) if num_of_threads: self._num_of_threads = num_of_threads else: self._num_of_threads = int( nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_num_of_threads')) logger.info('Num of thread is {}'.format(self._num_of_threads))
def __init__(self, input_csv_filename, rng=None, shuffle=False, process_num=None): self._cache_size = int( nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size')) logger.info('Cache size is {}'.format(self._cache_size)) self._filereader = FileReader(input_csv_filename) self._original_source_uri = input_csv_filename if rng is None: self._rng = numpy.random.RandomState(313) else: self._rng = rng self._shuffle = shuffle # read index.csv self._file = open(input_csv_filename, 'r') csvreader = csv.reader(self._file) self._process_header(next(csvreader)) self._variables = tuple(self._variables_dict.keys()) # Store file positions of each data. self._csv_data = list(csvreader) self._size = len(self._csv_data) self._file.close() self._original_order = list(range(self._size)) # Shuffle, the order is processing csv file order if self._shuffle: self._order = list(self._rng.permutation(list(range(self._size)))) else: self._order = list(range(self._size)) # multiprocess num if process_num: self._process_num = process_num else: self._process_num = multiprocessing.cpu_count() logger.info('Num of process is {}'.format(self._process_num))
def forward_command(args): callback.update_status(args) configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} is not found.'.format( config.executor.network.name)) return False normalize = True for d in info.datasets.values(): if d.uri == args.dataset or d.cache_dir == args.dataset: normalize = d.normalize for e in config.executors: normalize = normalize and not e.no_image_normalization orders = {} # With CSV if os.path.splitext(args.dataset)[1] == '.csv': data_iterator = (lambda: data_iterator_csv_dataset( uri=args.dataset, batch_size=config.networks[0].batch_size, shuffle=False, normalize=normalize, with_memory_cache=False, with_file_cache=False)) # load dataset as csv filereader = FileReader(args.dataset) with filereader.open(textmode=True, encoding='utf-8-sig') as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) if args.replace_path: root_path = os.path.dirname(args.dataset) root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep)) else: root_path = '.' rows = [row for row in rows if len(row)] rows = list( map( lambda row: list( map( lambda i, x: x if row0[i][0] == '#' or is_float( x) else compute_full_path(root_path, x), range(len(row)), row)), rows)) for i in range(len(rows)): orders[i] = i # With Cache elif os.path.splitext(args.dataset)[1] == '.cache': data_iterator = (lambda: data_iterator_cache(uri=args.dataset, batch_size=config. networks[0].batch_size, shuffle=False, normalize=normalize)) # Get original CSV original_csv = os.path.join(args.dataset, 'original.csv') try: # load dataset as csv filereader = FileReader(original_csv) with filereader.open(textmode=True, encoding='utf-8-sig') as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) root_path = '.' rows = list( map( lambda row: list( map( lambda x: x if is_float(x) else compute_full_path( root_path, x), row)), rows)) except: print('Cannot open', original_csv) pass # Get original Data order. order_csv = os.path.join(args.dataset, 'order.csv') try: filereader = FileReader(order_csv) with filereader.open(textmode=True) as f: for original, shuffled in [[int(x) for x in row] for row in csv.reader(f)]: orders[original] = shuffled except: print('Cannot open', order_csv) for i in range(len(rows)): orders[i] = i else: print('Unsupported extension "{}" in "{}".'.format( os.path.splitext(args.dataset)[1], args.dataset)) callback.update_status(('data.max', len(rows))) callback.update_status(('data.current', 0)) callback.update_status('processing', True) result_csv_filename = os.path.join(args.outdir, args.outfile) with open(result_csv_filename, 'w', encoding='utf-8') as f: writer = csv.writer(f, lineterminator='\n') with data_iterator() as di: index = 0 while index < di.size: data = di.next() result, outputs = _forward(args, index, config, data, di.variables) if index == 0: for name, dim in zip(result.names, result.dims): if dim == 1: if e.repeat_evaluation_type == "std": name = "Uncertainty(Std)" row0.append(name) else: for d in range(dim): row0.append(name + '__' + str(d)) writer.writerow(row0) for i, output in enumerate(outputs): if index + i < len(rows): import copy row = copy.deepcopy(rows[orders[index + i]]) row.extend(output) writer.writerow(row) index += len(outputs) callback.update_status(('data.current', min([index, len(rows)]))) callback.update_forward_time() callback.update_status() logger.log( 99, 'data {} / {}'.format(min([index, len(rows)]), len(rows))) callback.process_evaluation_result(args.outdir, result_csv_filename) logger.log(99, 'Forward Completed.') progress(None) callback.update_status(('output_result.csv_header', ','.join(row0))) callback.update_status(('output_result.column_num', len(row0))) callback.update_status(('output_result.data_num', len(rows))) callback.update_status('finished') return True
def forward_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} is not found.'.format( config.executor.network.name)) return False normalize = True for d in info.datasets.values(): if d.uri == args.dataset: normalize = d.normalize for e in config.executors: normalize = normalize and not e.no_image_normalization data_iterator = (lambda: data_iterator_csv_dataset(uri=args.dataset, batch_size=config. networks[0].batch_size, shuffle=False, normalize=normalize, with_memory_cache=False, with_file_cache=False)) # load dataset as csv filereader = FileReader(args.dataset) with filereader.open(textmode=True) as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) root_path = os.path.dirname(args.dataset) root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep)) rows = list( map( lambda row: list( map( lambda x: x if is_float(x) else compute_full_path(root_path, x), row)), rows)) with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f: writer = csv.writer(f, lineterminator='\n') with data_iterator() as di: index = 0 while index < di.size: data = di.next() result, outputs = _forward(args, index, config, data, di.variables) if index == 0: for name, dim in zip(result.names, result.dims): if dim == 1: row0.append(name) else: for d in range(dim): row0.append(name + '__' + str(d)) writer.writerow(row0) for i, output in enumerate(outputs): if index + i < len(rows): import copy row = copy.deepcopy(rows[index + i]) row.extend(output) writer.writerow(row) index += len(outputs) logger.log( 99, 'data {} / {}'.format(min([index, len(rows)]), len(rows))) logger.log(99, 'Forward Completed.') progress(None) return True