def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" index_expr = params[defs.KEY_INDEX_EXPR] # type: expr.IndexExpression subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) if self.cached_file_root is None: self.cached_file_root = byte_converter.convert_to_string( reader.read(defs.LOC_FILES_ROOT)) file_root = self.cached_file_root for category in self.categories: rel_file_paths = byte_converter.convert_to_string( reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr)) loaded = [] for rel_file_path in rel_file_paths: file_path = os.path.join(file_root, rel_file_path) loaded.append(self.load_fn(file_path, category)) data = np.stack(loaded, axis=-1) if not self.ignore_indexing: data = data[index_expr.expression] extracted[category] = data
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" extracted[defs.KEY_SUBJECT_INDEX] = params[defs.KEY_SUBJECT_INDEX] subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) extracted[defs.KEY_SUBJECT] = byte_converter.convert_to_string( reader.read(defs.LOC_SUBJECT, subject_index_expr))
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" if not self.cache or self.cached_result is None: d = self._extract(reader) self.cached_result = d else: d = self.cached_result for k, v in d.items(): extracted[k] = byte_converter.convert_to_string(v)
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" subject_index_expr = expr.IndexExpression( params[defs.KEY_SUBJECT_INDEX]) if not self.cache or self.cached_file_root is None: file_root = reader.read(defs.LOC_FILES_ROOT) self.cached_file_root = file_root else: file_root = self.cached_file_root extracted[defs.KEY_FILE_ROOT] = byte_converter.convert_to_string( file_root) for category in self.categories: extracted[defs.KEY_PLACEHOLDER_FILES.format( category)] = byte_converter.convert_to_string( reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr))
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" if defs.KEY_PLACEHOLDER_NAMES.format(self.category) not in extracted: if self.names_extractor is None: self.names_extractor = NamesExtractor( cache=True, categories=(self.category, )) self.names_extractor.extract(reader, {}, extracted) if self.subject_entries is None: self.subject_entries = reader.get_subject_entries() if not reader.has(defs.LOC_DATA_PLACEHOLDER.format(self.category)): raise ValueError( f'SelectiveDataExtractor requires {self.category} to exist') subject_index = params[defs.KEY_SUBJECT_INDEX] index_expr = params[defs.KEY_INDEX_EXPR] index_str = self.subject_entries[subject_index] data = reader.read( '{}/{}'.format(defs.LOC_DATA_PLACEHOLDER.format(self.category), index_str), index_expr) entry_names = extracted[defs.KEY_PLACEHOLDER_NAMES.format( self.category)] # type: list if self.selection is None: extracted[self.category] = byte_converter.convert_to_string(data) else: selection_indices = np.array( [entry_names.index(s) for s in self.selection]) extracted[self.category] = np.take(data, selection_indices, axis=-1) if isinstance(data, list): # convert back to list extracted[self.category] = byte_converter.convert_to_string( extracted[self.category].tolist()) extracted[defs.KEY_PLACEHOLDER_NAMES_SELECTED.format( self.category)] = list(self.selection)
def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None: """see :meth:`.Extractor.extract`""" if self.subject_entries is None: self.subject_entries = reader.get_subject_entries() subject_index = params[defs.KEY_SUBJECT_INDEX] index_expr = params[defs.KEY_INDEX_EXPR] index_str = self.subject_entries[subject_index] for category in self.categories: if self.ignore_indexing: data = reader.read('{}/{}'.format( defs.LOC_DATA_PLACEHOLDER.format(category), index_str)) else: data = reader.read( '{}/{}'.format(defs.LOC_DATA_PLACEHOLDER.format(category), index_str), index_expr) extracted[category] = byte_converter.convert_to_string(data)
def get_subjects(self) -> list: """see :meth:`.Reader.get_subjects`""" return byte_converter.convert_to_string(self.read(defs.LOC_SUBJECT))