示例#1
0
    def _check_registered_widgets(self, *args):
        """Factory helper function"""
        candidate_widget_types = list()
        for key in self.registry:

            if self.registry[key](*args):
                candidate_widget_types.append(key)

        n_matches = len(candidate_widget_types)
        if n_matches == 0:
            # There is no default client
            raise NoMatchError("This query was not understood by any clients. Did you miss an OR?")
        elif n_matches == 2:
            # If two clients have reported they understand this query, and one
            # of them is the VSOClient, then we ignore VSOClient.
            if VSOClient in candidate_widget_types:
                candidate_widget_types.remove(VSOClient)

        # Finally check that we only have one match.
        if len(candidate_widget_types) > 1:
            candidate_names = [cls.__name__ for cls in candidate_widget_types]
            raise MultipleMatchError("The following clients matched this query. "
                                     "Please make your query more specific.\n"
                                     "{}".format(candidate_names))

        return candidate_widget_types
示例#2
0
    def _check_registered_widgets(self, data, meta, **kwargs):

        candidate_widget_types = list()

        for key in self.registry:

            # Call the registered validation function for each registered class
            if self.registry[key](data, meta, **kwargs):
                candidate_widget_types.append(key)

        n_matches = len(candidate_widget_types)

        if n_matches == 0:
            if self.default_widget_type is None:
                raise NoMatchError("No types match specified arguments and no default is set.")
            else:
                candidate_widget_types = [self.default_widget_type]
        elif n_matches > 1:
            raise MultipleMatchError("Too many candidate types identified ({0})."
                                     "Specify enough keywords to guarantee unique type"
                                     "identification.".format(n_matches))

        # Only one is found
        WidgetType = candidate_widget_types[0]

        return WidgetType(data, meta, **kwargs)
示例#3
0
    def _parse_file(cls, filepath):
        """
        Parses a file - to be implemented in any subclass that may use files.

        Parameters
        ----------
        filepath : `str`
            The path to the file you want to parse.
        """
        raise NoMatchError(f'Could not find any timeseries sources to parse {filepath}')
示例#4
0
    def _check_registered_widgets(self, *args):
        """Factory helper function"""
        candidate_widget_types = list()
        for key in self.registry:
            if self.registry[key](*args):
                candidate_widget_types.append(key)

        n_matches = len(candidate_widget_types)
        if n_matches == 0:
            # There is no default client
            raise NoMatchError("This query was not understood by any clients. Did you miss an OR?")

        return candidate_widget_types
示例#5
0
    def _parse_path(self, path, **kwargs):
        results = parse_path(path, self._read_file)
        all_ts = []

        # r can be either a TimeSeries, path, or a data, header pair
        for r in results:
            if isinstance(r, GenericTimeSeries):
                all_ts += [r]
            elif isinstance(r, pathlib.Path):
                all_ts += [
                    self._check_registered_widgets(filepath=r, **kwargs)
                ]
            else:
                pairs = r
                # Pairs may be x long where x is the number of HDUs in the file.
                headers = [pair.header for pair in pairs]

                types = []
                for header in headers:
                    try:
                        match = self._get_matching_widget(meta=header,
                                                          **kwargs)
                        if not match == GenericTimeSeries:
                            types.append(match)
                    except (MultipleMatchError, NoMatchError):
                        continue

                if not types:
                    # If no specific classes have been found we can read the data
                    # if we only have one data header pair:
                    if len(pairs) == 1:
                        all_ts += [
                            GenericTimeSeries(pairs[0]._data, pairs[0].header)
                        ]
                        continue
                    else:
                        raise NoMatchError(
                            "Input read by sunpy.io can not find a "
                            "matching class for reading multiple HDUs")
                if len(set(types)) > 1:
                    raise MultipleMatchError(
                        "Multiple HDUs return multiple matching classes.")

                cls = types[0]

                data_header_unit_tuple = cls._parse_hdus(pairs)
                all_ts += self._parse_arg(data_header_unit_tuple)

        return all_ts
示例#6
0
    def _get_matching_widget(self, **kwargs):
        candidate_widget_types = list()

        for key in self.registry:
            # Call the registered validation function for each registered class
            if self.registry[key](**kwargs):
                candidate_widget_types.append(key)

        n_matches = len(candidate_widget_types)

        if n_matches == 0:
            if self.default_widget_type is None:
                raise NoMatchError("No types match specified arguments and no default is set.")
            else:
                candidate_widget_types = [self.default_widget_type]
        elif n_matches > 1:
            raise MultipleMatchError("Too many candidate types identified ({0}).  Specify enough keywords to guarantee unique type identification.".format(n_matches))

        # Only one suitable source class is found
        return candidate_widget_types[0]
示例#7
0
    def __call__(self, *args, **kwargs):
        """ Method for running the factory. Takes arbitrary arguments and
        keyword arguments and passes them to a sequence of pre-registered types
        to determine which is the correct TimeSeries source type to build.

        Arguments args and kwargs are passed through to the validation
        function and to the constructor for the final type.  For TimeSeries
        types, validation function must take a data-header pair as an argument.

        Parameters
        ----------

        silence_errors : `bool`, optional
            If set, ignore data-header pairs which cause an exception.

        Notes
        -----
        Extra keyword arguments are passed through to `sunpy.io.read_file` such
        as `memmap` for FITS files.
        """

        # Hack to get around Python 2.x not backporting PEP 3102.
        silence_errors = kwargs.pop('silence_errors', False)

        (data_header_unit_tuples, data_header_pairs,
         already_timeseries, filepaths) = self._parse_args(*args, **kwargs)

        new_timeseries = list()

        # The filepaths for unreadable files
        for filepath in filepaths:
            try:
                new_ts = self._check_registered_widgets(filepath=filepath, **kwargs)
            except (NoMatchError, MultipleMatchError, ValidationFunctionError):
                if not silence_errors:
                    raise
            except:
                raise

            new_timeseries.append(new_ts)

        # data_header_pairs is a list of HDUs as read by sunpy.io
        # For each set of HDus find the matching class and read the
        # data_header_unit_tuples by calling the _parse_hdus method
        # of the class.
        for pairs in data_header_pairs:
            # Pairs may be x long where x is the number of HDUs in the file.
            headers = [pair.header for pair in pairs]

            types = []
            for header in headers:
                try:
                    match = self._get_matching_widget(meta=header, **kwargs)
                    if not match == GenericTimeSeries:
                        types.append(match)
                except (MultipleMatchError, NoMatchError):
                    continue

            if not types:
                # If no specific classes have been found we can read the data
                # if we only have one data header pair:
                if len(pairs) == 1:
                    already_timeseries.append(GenericTimeSeries(pairs[0].data,
                                                                pairs[0].header))
                else:
                    raise NoMatchError("Input read by sunpy.io can not find a "
                                       "matching class for reading multiple HDUs")
            if len(set(types)) > 1:
                raise MultipleMatchError("Multiple HDUs return multiple matching classes.")

            cls = types[0]

            data_header_unit_tuples.append(cls._parse_hdus(pairs))

        # Loop over each registered type and check to see if WidgetType
        # matches the arguments.  If it does, use that type
        for triple in data_header_unit_tuples:
            data, header, units = triple
            # Make a MetaDict from various input types
            meta = header
            if isinstance(meta, astropy.io.fits.header.Header):
                meta = sunpy.io.header.FileHeader(meta)
            meta = MetaDict(meta)

            try:
                new_ts = self._check_registered_widgets(data=data, meta=meta,
                                                        units=units, **kwargs)
            except (NoMatchError, MultipleMatchError, ValidationFunctionError):
                if not silence_errors:
                    raise
            except:
                raise

            new_timeseries.append(new_ts)

        new_timeseries += already_timeseries

        # Concatenate the timeseries into one if specified.
        concatenate = kwargs.get('concatenate', False)
        if concatenate:
            # Merge all these timeseries into one.
            full_timeseries = new_timeseries.pop(0)
            for timeseries in new_timeseries:
                full_timeseries = full_timeseries.concatenate(timeseries)

            new_timeseries = [full_timeseries]

        # Sanitize any units OrderedDict details
        for timeseries in new_timeseries:
            timeseries._sanitize_units()

        # Only return single time series, not in a list if we only have one.
        if len(new_timeseries) == 1:
            return new_timeseries[0]
        return new_timeseries
示例#8
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an args list for data-header pairs.  args can contain any
        mixture of the following entries:
        * tuples of (data, header, unit) (1)
        * data, header not in a tuple (1)
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        (1) Note that header/unit are optional and in either order, but data
        but be the first entry in each group.

        Example
        -------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')

        """

        data_header_unit_tuples = list()
        data_header_pairs = list()
        already_timeseries = list()
        filepaths = list()

        # Take source kwarg if defined
        source = kwargs.get('source', None)

        # Account for nested lists of items. Simply outputs a single list of
        # items, nested lists are expanded to element level.
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):
            arg = args[i]

            # Data-header pair in a tuple
            if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):# and self._validate_meta(args[i+1])):
                # Assume a Pandas Dataframe is given
                data = arg
                units = OrderedDict()
                meta = MetaDict()

                # Convert the data argument into a Pandas DataFrame if needed.
                if isinstance(data, Table):
                    # We have an AstroPy Table:
                    data, meta, units = self._from_table(data)
                elif isinstance(data, np.ndarray):
                    # We have a numpy ndarray. We assume the first column is a dt index
                    data = pd.DataFrame(data=data[:,1:], index=Time(data[:,0]))

                # If there are 1 or 2 more arguments:
                for _ in range(2):
                    if (len(args) > i+1):
                        # If that next argument isn't data but is metaddata or units:
                        if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)):
                            if self._validate_units(args[i+1]):
                                units.update(args[i+1])
                                i += 1  # an extra increment to account for the units
                            elif self._validate_meta(args[i+1]):
                                # if we have an astropy.io FITS header then convert
                                # to preserve multi-line comments
                                if isinstance(args[i+1], astropy.io.fits.header.Header):
                                    args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1]))
                                meta.update(args[i+1])
                                i += 1  # an extra increment to account for the meta

                # Add a 3-tuple for this TimeSeries.
                data_header_unit_tuples.append((data, meta, units))

            # Filepath
            elif (isinstance(arg, six.string_types) and
                  os.path.isfile(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)

                read, result = self._read_file(path, **kwargs)

                if read:
                    data_header_pairs.append(result)
                else:
                    filepaths.append(result)

            # Directory
            elif (isinstance(arg, six.string_types) and
                  os.path.isdir(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    read, result = self._read_file(afile, **kwargs)
                    if read:
                        data_header_pairs.append(result)
                    else:
                        filepaths.append(result)

            # Glob
            elif (isinstance(arg, six.string_types) and '*' in arg):

                files = glob.glob(os.path.expanduser(arg))

                for afile in files:
                    # data_header_unit_tuples += self._read_file(afile, **kwargs)
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    read, result = self._read_file(afile, **kwargs)
                    if read:
                        data_header_pairs.append(result)
                    else:
                        filepaths.append(result)

            # Already a TimeSeries
            elif isinstance(arg, GenericTimeSeries):
                already_timeseries.append(arg)

            # A URL
            elif (isinstance(arg,six.string_types) and
                  _is_url(arg)):
                default_dir = sunpy.config.get("downloads", "download_dir")
                url = arg
                path = download_file(url, default_dir)
                pairs = self._read_file(path, **kwargs)
                #data_header_pairs += pairs
                filepaths.append(pairs[1])

            else:
                #raise ValueError("File not found or invalid input")
                raise NoMatchError("File not found or invalid input")
            i += 1

        # TODO:
        # In the end, if there are already TimeSeries it should be put in the
        # same order as the input, currently they are not.
        return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
示例#9
0
 def _parse_arg(self, arg, **kwargs):
     """
     Parse a single arg and return a list of timeseries.
     """
     raise NoMatchError("File not found or invalid input")