def _check_registered_widgets(self, *args): """Factory helper function""" candidate_widget_types = list() for key in self.registry: if self.registry[key](*args): candidate_widget_types.append(key) n_matches = len(candidate_widget_types) if n_matches == 0: # There is no default client raise NoMatchError("This query was not understood by any clients. Did you miss an OR?") elif n_matches == 2: # If two clients have reported they understand this query, and one # of them is the VSOClient, then we ignore VSOClient. if VSOClient in candidate_widget_types: candidate_widget_types.remove(VSOClient) # Finally check that we only have one match. if len(candidate_widget_types) > 1: candidate_names = [cls.__name__ for cls in candidate_widget_types] raise MultipleMatchError("The following clients matched this query. " "Please make your query more specific.\n" "{}".format(candidate_names)) return candidate_widget_types
def _check_registered_widgets(self, data, meta, **kwargs): candidate_widget_types = list() for key in self.registry: # Call the registered validation function for each registered class if self.registry[key](data, meta, **kwargs): candidate_widget_types.append(key) n_matches = len(candidate_widget_types) if n_matches == 0: if self.default_widget_type is None: raise NoMatchError("No types match specified arguments and no default is set.") else: candidate_widget_types = [self.default_widget_type] elif n_matches > 1: raise MultipleMatchError("Too many candidate types identified ({0})." "Specify enough keywords to guarantee unique type" "identification.".format(n_matches)) # Only one is found WidgetType = candidate_widget_types[0] return WidgetType(data, meta, **kwargs)
def _parse_file(cls, filepath): """ Parses a file - to be implemented in any subclass that may use files. Parameters ---------- filepath : `str` The path to the file you want to parse. """ raise NoMatchError(f'Could not find any timeseries sources to parse {filepath}')
def _check_registered_widgets(self, *args): """Factory helper function""" candidate_widget_types = list() for key in self.registry: if self.registry[key](*args): candidate_widget_types.append(key) n_matches = len(candidate_widget_types) if n_matches == 0: # There is no default client raise NoMatchError("This query was not understood by any clients. Did you miss an OR?") return candidate_widget_types
def _parse_path(self, path, **kwargs): results = parse_path(path, self._read_file) all_ts = [] # r can be either a TimeSeries, path, or a data, header pair for r in results: if isinstance(r, GenericTimeSeries): all_ts += [r] elif isinstance(r, pathlib.Path): all_ts += [ self._check_registered_widgets(filepath=r, **kwargs) ] else: pairs = r # Pairs may be x long where x is the number of HDUs in the file. headers = [pair.header for pair in pairs] types = [] for header in headers: try: match = self._get_matching_widget(meta=header, **kwargs) if not match == GenericTimeSeries: types.append(match) except (MultipleMatchError, NoMatchError): continue if not types: # If no specific classes have been found we can read the data # if we only have one data header pair: if len(pairs) == 1: all_ts += [ GenericTimeSeries(pairs[0]._data, pairs[0].header) ] continue else: raise NoMatchError( "Input read by sunpy.io can not find a " "matching class for reading multiple HDUs") if len(set(types)) > 1: raise MultipleMatchError( "Multiple HDUs return multiple matching classes.") cls = types[0] data_header_unit_tuple = cls._parse_hdus(pairs) all_ts += self._parse_arg(data_header_unit_tuple) return all_ts
def _get_matching_widget(self, **kwargs): candidate_widget_types = list() for key in self.registry: # Call the registered validation function for each registered class if self.registry[key](**kwargs): candidate_widget_types.append(key) n_matches = len(candidate_widget_types) if n_matches == 0: if self.default_widget_type is None: raise NoMatchError("No types match specified arguments and no default is set.") else: candidate_widget_types = [self.default_widget_type] elif n_matches > 1: raise MultipleMatchError("Too many candidate types identified ({0}). Specify enough keywords to guarantee unique type identification.".format(n_matches)) # Only one suitable source class is found return candidate_widget_types[0]
def __call__(self, *args, **kwargs): """ Method for running the factory. Takes arbitrary arguments and keyword arguments and passes them to a sequence of pre-registered types to determine which is the correct TimeSeries source type to build. Arguments args and kwargs are passed through to the validation function and to the constructor for the final type. For TimeSeries types, validation function must take a data-header pair as an argument. Parameters ---------- silence_errors : `bool`, optional If set, ignore data-header pairs which cause an exception. Notes ----- Extra keyword arguments are passed through to `sunpy.io.read_file` such as `memmap` for FITS files. """ # Hack to get around Python 2.x not backporting PEP 3102. silence_errors = kwargs.pop('silence_errors', False) (data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths) = self._parse_args(*args, **kwargs) new_timeseries = list() # The filepaths for unreadable files for filepath in filepaths: try: new_ts = self._check_registered_widgets(filepath=filepath, **kwargs) except (NoMatchError, MultipleMatchError, ValidationFunctionError): if not silence_errors: raise except: raise new_timeseries.append(new_ts) # data_header_pairs is a list of HDUs as read by sunpy.io # For each set of HDus find the matching class and read the # data_header_unit_tuples by calling the _parse_hdus method # of the class. for pairs in data_header_pairs: # Pairs may be x long where x is the number of HDUs in the file. headers = [pair.header for pair in pairs] types = [] for header in headers: try: match = self._get_matching_widget(meta=header, **kwargs) if not match == GenericTimeSeries: types.append(match) except (MultipleMatchError, NoMatchError): continue if not types: # If no specific classes have been found we can read the data # if we only have one data header pair: if len(pairs) == 1: already_timeseries.append(GenericTimeSeries(pairs[0].data, pairs[0].header)) else: raise NoMatchError("Input read by sunpy.io can not find a " "matching class for reading multiple HDUs") if len(set(types)) > 1: raise MultipleMatchError("Multiple HDUs return multiple matching classes.") cls = types[0] data_header_unit_tuples.append(cls._parse_hdus(pairs)) # Loop over each registered type and check to see if WidgetType # matches the arguments. If it does, use that type for triple in data_header_unit_tuples: data, header, units = triple # Make a MetaDict from various input types meta = header if isinstance(meta, astropy.io.fits.header.Header): meta = sunpy.io.header.FileHeader(meta) meta = MetaDict(meta) try: new_ts = self._check_registered_widgets(data=data, meta=meta, units=units, **kwargs) except (NoMatchError, MultipleMatchError, ValidationFunctionError): if not silence_errors: raise except: raise new_timeseries.append(new_ts) new_timeseries += already_timeseries # Concatenate the timeseries into one if specified. concatenate = kwargs.get('concatenate', False) if concatenate: # Merge all these timeseries into one. full_timeseries = new_timeseries.pop(0) for timeseries in new_timeseries: full_timeseries = full_timeseries.concatenate(timeseries) new_timeseries = [full_timeseries] # Sanitize any units OrderedDict details for timeseries in new_timeseries: timeseries._sanitize_units() # Only return single time series, not in a list if we only have one. if len(new_timeseries) == 1: return new_timeseries[0] return new_timeseries
def _parse_args(self, *args, **kwargs): """ Parses an args list for data-header pairs. args can contain any mixture of the following entries: * tuples of (data, header, unit) (1) * data, header not in a tuple (1) * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. (1) Note that header/unit are optional and in either order, but data but be the first entry in each group. Example ------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_unit_tuples = list() data_header_pairs = list() already_timeseries = list() filepaths = list() # Take source kwarg if defined source = kwargs.get('source', None) # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header pair in a tuple if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):# and self._validate_meta(args[i+1])): # Assume a Pandas Dataframe is given data = arg units = OrderedDict() meta = MetaDict() # Convert the data argument into a Pandas DataFrame if needed. if isinstance(data, Table): # We have an AstroPy Table: data, meta, units = self._from_table(data) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:,1:], index=Time(data[:,0])) # If there are 1 or 2 more arguments: for _ in range(2): if (len(args) > i+1): # If that next argument isn't data but is metaddata or units: if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)): if self._validate_units(args[i+1]): units.update(args[i+1]) i += 1 # an extra increment to account for the units elif self._validate_meta(args[i+1]): # if we have an astropy.io FITS header then convert # to preserve multi-line comments if isinstance(args[i+1], astropy.io.fits.header.Header): args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1])) meta.update(args[i+1]) i += 1 # an extra increment to account for the meta # Add a 3-tuple for this TimeSeries. data_header_unit_tuples.append((data, meta, units)) # Filepath elif (isinstance(arg, six.string_types) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) read, result = self._read_file(path, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Directory elif (isinstance(arg, six.string_types) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source read, result = self._read_file(afile, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Glob elif (isinstance(arg, six.string_types) and '*' in arg): files = glob.glob(os.path.expanduser(arg)) for afile in files: # data_header_unit_tuples += self._read_file(afile, **kwargs) # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source read, result = self._read_file(afile, **kwargs) if read: data_header_pairs.append(result) else: filepaths.append(result) # Already a TimeSeries elif isinstance(arg, GenericTimeSeries): already_timeseries.append(arg) # A URL elif (isinstance(arg,six.string_types) and _is_url(arg)): default_dir = sunpy.config.get("downloads", "download_dir") url = arg path = download_file(url, default_dir) pairs = self._read_file(path, **kwargs) #data_header_pairs += pairs filepaths.append(pairs[1]) else: #raise ValueError("File not found or invalid input") raise NoMatchError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already TimeSeries it should be put in the # same order as the input, currently they are not. return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
def _parse_arg(self, arg, **kwargs): """ Parse a single arg and return a list of timeseries. """ raise NoMatchError("File not found or invalid input")