Python FeatureReader примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyemma.coordinates.data

Класс/Тип: FeatureReader

Примеров на hotexamples.com: 3

Python FeatureReader - 3 примера найдено. Это лучшие примеры Python кода для pyemma.coordinates.data.FeatureReader, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FeatureReader(3)

supports_format(2)

Основные методы

FeatureReader (3)

supports_format (2)

Пример #1

Показать файл

def create_file_reader(input_files,
                       topology,
                       featurizer,
                       chunk_size=1000,
                       **kw):
    r"""
    Creates a (possibly featured) file reader by a number of input files and either a topology file or a featurizer.
    Parameters
    ----------
    :param input_files:
        A single input file or a list of input files.
    :param topology:
        A topology file. If given, the featurizer argument can be None.
    :param featurizer:
        A featurizer. If given, the topology file can be None.
    :param chunk_size:
        The chunk size with which the corresponding reader gets initialized.
    :return: Returns the reader.
    """
    from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
    from pyemma.coordinates.data.py_csv_reader import PyCSVReader
    from pyemma.coordinates.data import FeatureReader
    from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader

    # fragmented trajectories
    if (isinstance(input_files, (list, tuple)) and len(input_files) > 0
            and any(isinstance(item, (list, tuple)) for item in input_files)):
        return FragmentedTrajectoryReader(input_files, topology, chunk_size,
                                          featurizer)

    # normal trajectories
    if (isinstance(input_files, string_types)
            or (isinstance(input_files, (list, tuple)) and
                (any(isinstance(item, string_types)
                     for item in input_files) or len(input_files) is 0))):
        reader = None
        # check: if single string create a one-element list
        if isinstance(input_files, string_types):
            input_list = [input_files]
        elif len(input_files) > 0 and all(
                isinstance(item, string_types) for item in input_files):
            input_list = input_files
        else:
            if len(input_files) is 0:
                raise ValueError("The passed input list should not be empty.")
            else:
                raise ValueError(
                    "The passed list did not exclusively contain strings or was a list of lists "
                    "(fragmented trajectory).")

        # TODO: this does not handle suffixes like .xyz.gz (rare)
        _, suffix = os.path.splitext(input_list[0])

        # check: do all files have the same file type? If not: raise ValueError.
        if all(item.endswith(suffix) for item in input_list):

            # do all the files exist? If not: Raise value error
            all_exist = True
            err_msg = ""
            for item in input_list:
                if not os.path.isfile(item):
                    err_msg += "\n" if len(err_msg) > 0 else ""
                    err_msg += "File %s did not exist or was no file" % item
                    all_exist = False
            if not all_exist:
                raise ValueError(
                    "Some of the given input files were directories"
                    " or did not exist:\n%s" % err_msg)

            if all_exist:
                from mdtraj.formats.registry import FormatRegistry

                # CASE 1.1: file types are MD files
                if suffix in list(FormatRegistry.loaders.keys()):
                    # check: do we either have a featurizer or a topology file name? If not: raise ValueError.
                    # create a MD reader with file names and topology
                    if not featurizer and not topology:
                        raise ValueError(
                            "The input files were MD files which makes it mandatory to have either a "
                            "featurizer or a topology file.")

                    reader = FeatureReader(input_list,
                                           featurizer=featurizer,
                                           topologyfile=topology,
                                           chunksize=chunk_size)
                else:
                    if suffix in ['.npy', '.npz']:
                        reader = NumPyFileReader(input_list,
                                                 chunksize=chunk_size)
                    # otherwise we assume that given files are ascii tabulated data
                    else:
                        reader = PyCSVReader(input_list,
                                             chunksize=chunk_size,
                                             **kw)
        else:
            raise ValueError(
                "Not all elements in the input list were of the type %s!" %
                suffix)
    else:
        raise ValueError("Input \"%s\" was no string or list of strings." %
                         input)
    return reader

Пример #2

Показать файл

Файл: test_random_access_stride.py Проект: yuxuanzhuang/PyEMMA

 def _get_reader_instance(self, instance_number):
     if instance_number == 0:
         return DataInMemory(self.data)
     elif instance_number == 1:
         return FeatureReader(self.data_feature_reader,
                              topologyfile=self.topfile)

Пример #3

Показать файл

def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw):
    r"""
    Creates a (possibly featured) file reader by a number of input files and either a topology file or a featurizer.
    Parameters
    ----------
    :param input_files:
        A single input file or a list of input files.
    :param topology:
        A topology file. If given, the featurizer argument can be None.
    :param featurizer:
        A featurizer. If given, the topology file can be None.
    :param chunksize:
        The chunk size with which the corresponding reader gets initialized.
    :return: Returns the reader.
    """
    from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
    from pyemma.coordinates.data.py_csv_reader import PyCSVReader
    from pyemma.coordinates.data import FeatureReader
    from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader

    # fragmented trajectories
    if (isinstance(input_files, (list, tuple)) and len(input_files) > 0 and
            any(isinstance(item, (list, tuple)) for item in input_files)):
        return FragmentedTrajectoryReader(input_files, topology, chunksize, featurizer)

    # normal trajectories
    if (isinstance(input_files, (Path, str))
            or (isinstance(input_files, (list, tuple))
                and (any(isinstance(item, (Path, str)) for item in input_files)
                     or len(input_files) == 0))):
        # check: if single string create a one-element list
        if isinstance(input_files, (Path, str)):
            input_list = [input_files]
        elif len(input_files) > 0 and all(isinstance(item, (Path, str)) for item in input_files):
            input_list = input_files
        else:
            if len(input_files) == 0:
                raise ValueError("The passed input list should not be empty.")
            else:
                raise ValueError("The passed list did not exclusively contain strings or was a list of lists "
                                 "(fragmented trajectory).")

        # convert to list of paths
        input_list = [Path(f) for f in input_list]

        # TODO: this does not handle suffixes like .xyz.gz (rare)
        suffix = input_list[0].suffix

        # check: do all files have the same file type? If not: raise ValueError.
        if all(item.suffix == suffix for item in input_list):

            # do all the files exist? If not: Raise value error
            all_exist = True
            from six import StringIO
            err_msg = StringIO()
            for item in input_list:
                if not item.is_file():
                    err_msg.write('\n' if err_msg.tell() > 0 else "")
                    err_msg.write('File %s did not exist or was no file' % item)
                    all_exist = False
            if not all_exist:
                raise ValueError('Some of the given input files were directories'
                                 ' or did not exist:\n%s' % err_msg.getvalue())
            featurizer_or_top_provided = featurizer is not None or topology is not None
            # we need to check for h5 first, because of mdtraj custom HDF5 traj format (which is deprecated).
            if suffix in ('.h5', '.hdf5') and not featurizer_or_top_provided:
                # This check is potentially expensive for lots of files, we also re-open the file twice (causing atime updates etc.)
                # So we simply require that no featurizer option is given.
                # and not all((_is_mdtraj_hdf5_file(f) for f in input_files)):
                from pyemma.coordinates.data.h5_reader import H5Reader
                reader = H5Reader(filenames=input_files, chunk_size=chunksize, **kw)
            # CASE 1.1: file types are MD files
            elif FeatureReader.supports_format(suffix):
                # check: do we either have a featurizer or a topology file name? If not: raise ValueError.
                # create a MD reader with file names and topology
                if not featurizer_or_top_provided:
                    raise ValueError('The input files were MD files which makes it mandatory to have either a '
                                     'Featurizer or a topology file.')

                if suffix in ('.pdb', '.pdb.gz'):
                    raise ValueError('PyEMMA can not read PDB-fake-trajectories. '
                                     'Please consider using a sane trajectory format (e.g. xtc, dcd).')

                reader = FeatureReader(input_list, featurizer=featurizer, topologyfile=topology,
                                       chunksize=chunksize)
            elif suffix in ('.npy', '.npz'):
                reader = NumPyFileReader(input_list, chunksize=chunksize)
            # otherwise we assume that given files are ascii tabulated data
            else:
                reader = PyCSVReader(input_list, chunksize=chunksize, **kw)
        else:
            raise ValueError('Not all elements in the input list were of the type %s!' % suffix)
    else:
        raise ValueError('Input "{}" was no string or list of strings.'.format(input_files))
    return reader