Пример #1
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        index_expr = params[defs.KEY_INDEX_EXPR]  # type: expr.IndexExpression
        subject_index_expr = expr.IndexExpression(
            params[defs.KEY_SUBJECT_INDEX])

        if self.cached_file_root is None:
            self.cached_file_root = byte_converter.convert_to_string(
                reader.read(defs.LOC_FILES_ROOT))

        file_root = self.cached_file_root

        for category in self.categories:
            rel_file_paths = byte_converter.convert_to_string(
                reader.read(defs.LOC_FILES_PLACEHOLDER.format(category),
                            subject_index_expr))

            loaded = []
            for rel_file_path in rel_file_paths:
                file_path = os.path.join(file_root, rel_file_path)
                loaded.append(self.load_fn(file_path, category))
            data = np.stack(loaded, axis=-1)
            if not self.ignore_indexing:
                data = data[index_expr.expression]
            extracted[category] = data
Пример #2
0
 def extract(self, reader: rd.Reader, params: dict,
             extracted: dict) -> None:
     """see :meth:`.Extractor.extract`"""
     extracted[defs.KEY_SUBJECT_INDEX] = params[defs.KEY_SUBJECT_INDEX]
     subject_index_expr = expr.IndexExpression(
         params[defs.KEY_SUBJECT_INDEX])
     extracted[defs.KEY_SUBJECT] = byte_converter.convert_to_string(
         reader.read(defs.LOC_SUBJECT, subject_index_expr))
Пример #3
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        if not self.cache or self.cached_result is None:
            d = self._extract(reader)
            self.cached_result = d
        else:
            d = self.cached_result

        for k, v in d.items():
            extracted[k] = byte_converter.convert_to_string(v)
Пример #4
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        subject_index_expr = expr.IndexExpression(
            params[defs.KEY_SUBJECT_INDEX])

        if not self.cache or self.cached_file_root is None:
            file_root = reader.read(defs.LOC_FILES_ROOT)
            self.cached_file_root = file_root
        else:
            file_root = self.cached_file_root

        extracted[defs.KEY_FILE_ROOT] = byte_converter.convert_to_string(
            file_root)

        for category in self.categories:
            extracted[defs.KEY_PLACEHOLDER_FILES.format(
                category)] = byte_converter.convert_to_string(
                    reader.read(defs.LOC_FILES_PLACEHOLDER.format(category),
                                subject_index_expr))
Пример #5
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        if defs.KEY_PLACEHOLDER_NAMES.format(self.category) not in extracted:
            if self.names_extractor is None:
                self.names_extractor = NamesExtractor(
                    cache=True, categories=(self.category, ))
            self.names_extractor.extract(reader, {}, extracted)

        if self.subject_entries is None:
            self.subject_entries = reader.get_subject_entries()

        if not reader.has(defs.LOC_DATA_PLACEHOLDER.format(self.category)):
            raise ValueError(
                f'SelectiveDataExtractor requires {self.category} to exist')

        subject_index = params[defs.KEY_SUBJECT_INDEX]
        index_expr = params[defs.KEY_INDEX_EXPR]

        index_str = self.subject_entries[subject_index]
        data = reader.read(
            '{}/{}'.format(defs.LOC_DATA_PLACEHOLDER.format(self.category),
                           index_str), index_expr)
        entry_names = extracted[defs.KEY_PLACEHOLDER_NAMES.format(
            self.category)]  # type: list

        if self.selection is None:
            extracted[self.category] = byte_converter.convert_to_string(data)
        else:
            selection_indices = np.array(
                [entry_names.index(s) for s in self.selection])
            extracted[self.category] = np.take(data,
                                               selection_indices,
                                               axis=-1)
            if isinstance(data, list):
                # convert back to list
                extracted[self.category] = byte_converter.convert_to_string(
                    extracted[self.category].tolist())

            extracted[defs.KEY_PLACEHOLDER_NAMES_SELECTED.format(
                self.category)] = list(self.selection)
Пример #6
0
    def extract(self, reader: rd.Reader, params: dict,
                extracted: dict) -> None:
        """see :meth:`.Extractor.extract`"""
        if self.subject_entries is None:
            self.subject_entries = reader.get_subject_entries()

        subject_index = params[defs.KEY_SUBJECT_INDEX]
        index_expr = params[defs.KEY_INDEX_EXPR]

        index_str = self.subject_entries[subject_index]
        for category in self.categories:
            if self.ignore_indexing:
                data = reader.read('{}/{}'.format(
                    defs.LOC_DATA_PLACEHOLDER.format(category), index_str))
            else:
                data = reader.read(
                    '{}/{}'.format(defs.LOC_DATA_PLACEHOLDER.format(category),
                                   index_str), index_expr)
            extracted[category] = byte_converter.convert_to_string(data)
Пример #7
0
 def get_subjects(self) -> list:
     """see :meth:`.Reader.get_subjects`"""
     return byte_converter.convert_to_string(self.read(defs.LOC_SUBJECT))