class DKISTQueryResponseTable(QueryResponseTable): """ Results of a DKIST Dataset search. """ # Define some class properties to better format the results table. hide_keys: List[str] = [ "Storage Bucket", "Full Stokes", "asdf Filename", "Recipie Instance ID", "Recipie Run ID", "Recipe ID", "Movie Filename", "Level 0 Frame count", "Creation Date", "Last Updated", "Experiment IDs", "Proposal IDs", "Preview URL" ] # These keys are shown in the repr and str representations of this class. _core_keys = TableAttribute( default=["Start Time", "End Time", "Instrument", "Wavelength"]) # Map the keys in the response to human friendly ones. key_map: Mapping[str, str] = DefaultMap( None, { "asdfObjectKey": "asdf Filename", "boundingBox": "Bounding Box", "browseMovieObjectKey": "Movie Filename", "browseMovieUrl": "Preview URL", "bucket": "Storage Bucket", "contributingExperimentIds": "Experiment IDs", "contributingProposalIds": "Proposal IDs", "createDate": "Creation Date", "datasetId": "Dataset ID", "datasetSize": "Dataset Size", "embargoEndDate": "Embargo End Date", "endTime": "End Time", "experimentDescription": "Experiment Description", "exposureTime": "Exposure Time", "filterWavelengths": "Filter Wavelengths", "frameCount": "Number of Frames", "hasAllStokes": "Full Stokes", "instrumentName": "Instrument", "isDownloadable": "Downloadable", "isEmbargoed": "Embargoed", "observables": "Observables", "originalFrameCount": "Level 0 Frame count", "primaryExperimentId": "Primary Experiment ID", "primaryProposalId": "Primary Proposal ID", "qualityAverageFriedParameter": "Average Fried Parameter", "qualityAveragePolarimetricAccuracy": "Average Polarimetric Accuracy", "recipeId": "Recipe ID", "recipeInstanceId": "Recipie Instance ID", "recipeRunId": "Recipie Run ID", "startTime": "Start Time", "stokesParameters": "Stokes Parameters", "targetTypes": "Target Types", "updateDate": "Last Updated", "wavelengthMax": "Wavelength Max", "wavelengthMin": "Wavelength Min", }) @staticmethod def _process_table( results: "DKISTQueryResponseTable") -> "DKISTQueryResponseTable": times = [ "Creation Date", "End Time", "Start Time", "Last Updated", "Embargo End Date" ] units = { "Exposure Time": u.s, "Wavelength Min": u.nm, "Wavelength Max": u.nm, "Dataset Size": u.Gibyte, "Filter Wavelengths": u.nm } for colname in times: if colname not in results.colnames: continue # pragma: no cover if not any([v is None for v in results[colname]]): results[colname] = Time(results[colname]) for colname, unit in units.items(): if colname not in results.colnames: continue # pragma: no cover results[colname] = u.Quantity(results[colname], unit=unit) if results: results["Wavelength"] = u.Quantity( [results["Wavelength Min"], results["Wavelength Max"]]).T results.remove_columns(("Wavelength Min", "Wavelength Max")) return results @classmethod def from_results( cls, results: Iterable[Mapping[str, Any]], *, client: "DKISTDatasetClient") -> "DKISTQueryResponseTable": """ Construct the results table from the API results. """ new_results = defaultdict(list) for result in results: for key, value in result.items(): new_results[cls.key_map[key]].append(value) data = cls._process_table(cls(new_results, client=client)) data = data._reorder_columns(cls._core_keys.default, remove_empty=True) return data
class QueryResponseTable(QTable): __doc__ = QTable.__doc__ Row = QueryResponseRow Column = QueryResponseColumn client = TableAttribute() display_keys = TableAttribute(default=slice(None)) hide_keys = TableAttribute() size_column = None def unhide_columns(self): """ Modify this table so that all columns are displayed. """ self.display_keys = slice(None) self.hide_keys = None return self def _reorder_columns(self, first_columns, remove_empty=True): """ Generate a new version of this table with ``first_columns`` at the start. Parameters ---------- first_columns : list The column names to put at the start of the table. remove_empty : bool, optional Remove columns where all values are `None`. Defaults to ``True``. Returns ------- new_table : QueryResponseTable A sliced version of this table instance so that the columns are reordered. """ all_cols = list(self.colnames) first_names = [n for n in first_columns if n in all_cols] extra_cols = [col for col in all_cols if col not in first_names] all_cols = first_names + extra_cols new_table = self[[col for col in all_cols if self[col] is not None]] if remove_empty: empty_cols = [ col.info.name for col in self.itercols() if col.info.dtype.kind == 'O' and all(val is None for val in col) ] new_table.remove_columns(empty_cols) return new_table @property def _display_table(self): """ Apply the display_keys and hide_keys attributes to the table. This removes any keys in hide keys and then slices by any keys in display_keys to return the correct table. """ keys = list(self.colnames) if self.hide_keys: # Index only the keys not in hide keys in order [keys.remove(key) for key in self.hide_keys if key in keys] if self.display_keys != slice(None): keys = [dk for dk in self.display_keys if dk in keys] table = self[keys] # The slicing operation resets display and hide keys to default, but we # have already applied it table.unhide_columns() return table def __str__(self): """Print out human-readable summary of records retrieved""" return '\n'.join(self._display_table.pformat(show_dtype=False)) def __repr__(self): """Print out human-readable summary of records retrieved""" return object.__repr__(self) + "\n" + str(self._display_table) def _repr_html_(self): return QTable._repr_html_(self._display_table) def show(self, *cols): """ Return a table with only ``cols`` present. If no ``cols`` are specified, all columns will be shown, including any hidden by default. This differs slightly from ``QueryResponseTable[cols]`` as it allows keys which are not in the table to be requested. """ table = self.copy() table.unhide_columns() if len(cols) == 0: return table valid_cols = [col for col in cols if col in table.colnames] table = table[valid_cols] # The slicing operation resets display and hide keys to default, but we # want to bypass it here. table.unhide_columns() return table def path_format_keys(self): """ Returns all the names that can be used to format filenames. Each one corresponds to a single column in the table, and the format syntax should match the dtype of that column, i.e. for a ``Time`` object or a ``Quantity``. """ rbp = set(self[0].response_block_map.keys()) for row in self[1:]: rbp.intersection(row.response_block_map.keys()) return rbp def total_size(self): """ Returns the total size of all files in a query. Derived classes must set the 'size_column' class attribute to make use of this. """ if self.size_column not in self.colnames: return np.nan * u.byte sizes = self[self.size_column] # Strip negative filesizes total = np.nansum(sizes[sizes > 0]) if not (total > 0 * u.byte): return np.nan * u.byte # Find the first power of 3 below the total filesize power = 10**(np.floor(np.log10(total.to_value(u.byte)) // 3) * 3) # Create mapping from prefix value to prefix name prefix_dict = {p[2]: p[0][0] for p in u.si_prefixes} prefix_unit = u.Unit(f'{prefix_dict[power]}byte') return total.to(prefix_unit).round(3)
class QueryResponseTable(QTable): __doc__ = QTable.__doc__ Row = QueryResponseRow Column = QueryResponseColumn client = TableAttribute() display_keys = TableAttribute(default=slice(None)) hide_keys = TableAttribute() # This is a work around for https://github.com/astropy/astropy/pull/11217 # TODO Remove when min astropy version is > 4.2.1 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) for attr in list(kwargs): descr = getattr(self.__class__, attr, None) if isinstance(descr, TableAttribute): setattr(self, attr, kwargs.pop(attr)) def unhide_columns(self): """ Modify this table so that all columns are displayed. """ self.display_keys = slice(None) self.hide_keys = None return self def _reorder_columns(self, first_columns, remove_empty=True): """ Generate a new version of this table with ``first_columns`` at the start. Parameters ---------- first_columns : list The column names to put at the start of the table. remove_empty : bool, optional Remove columns where all values are `None`. Defaults to ``True``. Returns ------- new_table : QueryResponseTable A sliced version of this table instance so that the columns are reordered. """ all_cols = list(self.colnames) first_names = [n for n in first_columns if n in all_cols] extra_cols = [col for col in all_cols if col not in first_names] all_cols = first_names + extra_cols new_table = self[[col for col in all_cols if self[col] is not None]] if remove_empty: empty_cols = [ col.info.name for col in self.itercols() if col.info.dtype.kind == 'O' and all(val is None for val in col) ] new_table.remove_columns(empty_cols) return new_table @property def _display_table(self): """ Apply the display_keys and hide_keys attributes to the table. This removes any keys in hide keys and then slices by any keys in display_keys to return the correct table. """ keys = list(self.colnames) if self.hide_keys: # Index only the keys not in hide keys in order [keys.remove(key) for key in self.hide_keys if key in keys] if self.display_keys != slice(None): keys = [dk for dk in self.display_keys if dk in keys] table = self[keys] # The slicing operation resets display and hide keys to default, but we # have already applied it table.unhide_columns() return table def __str__(self): """Print out human-readable summary of records retrieved""" return '\n'.join(self._display_table.pformat(show_dtype=False)) def __repr__(self): """Print out human-readable summary of records retrieved""" return object.__repr__(self) + "\n" + str(self._display_table) def _repr_html_(self): return QTable._repr_html_(self._display_table) def show(self, *cols): """ Return a table with only ``cols`` present. If no ``cols`` are specified, all columns will be shown, including any hidden by default. This differs slightly from ``QueryResponseTable[cols]`` as it allows keys which are not in the table to be requested. """ table = self.copy() table.unhide_columns() if len(cols) == 0: return table valid_cols = [col for col in cols if col in table.colnames] table = table[valid_cols] # The slicing operation resets display and hide keys to default, but we # want to bypass it here. table.unhide_columns() return table def path_format_keys(self): """ Returns all the names that can be used to format filenames. Each one corresponds to a single column in the table, and the format syntax should match the dtype of that column, i.e. for a ``Time`` object or a ``Quantity``. """ rbp = set(self[0].response_block_map.keys()) for row in self[1:]: rbp.intersection(row.response_block_map.keys()) return rbp
class VSOQueryResponseTable(QueryResponseTable): hide_keys = ['fileid', 'fileurl'] errors = TableAttribute(default=[]) @classmethod def from_zeep_response(cls, response, *, client, _sort=True): """ Construct a table response from the zeep response. """ # _sort is a hack to be able to convert from a legacy QueryResponse to # a table response. if _sort: records = iter_sort_response(response) else: records = response data = [] for record in records: row = defaultdict(lambda: None) for key, value in serialize_object(record).items(): if not isinstance(value, Mapping): if key == "size": # size is in bytes with a very high degree of precision. value = (value * u.Kibyte).to(u.Mibyte).round(5) key = key.capitalize() if key not in cls.hide_keys else key row[key] = value else: if key == "wave": # Some records in the VSO have 'kev' which astropy # doesn't recognise as a unit, so fix it. waveunit = value['waveunit'] waveunit = 'keV' if waveunit == 'kev' else waveunit row["Wavelength"] = None if value['wavemin'] is not None and value[ 'wavemax'] is not None: row["Wavelength"] = u.Quantity([ float(value['wavemin']), float(value['wavemax']) ], unit=waveunit) row["Wavetype"] = value['wavetype'] continue for subkey, subvalue in value.items(): key_template = f"{key.capitalize()} {subkey.capitalize()}" if key == "time" and subvalue is not None: key_template = f"{subkey.capitalize()} {key.capitalize()}" subvalue = parse_time(subvalue) # Change the display to the 'T'-less version subvalue.format = 'iso' row[key_template] = subvalue data.append(row) # Reorder the columns to put the most useful ones first. data = cls(data, client=client) return data._reorder_columns([ 'Start Time', 'End Time', 'Source', 'Instrument', 'Type', 'Wavelength' ], remove_empty=True) def total_size(self): if 'size' not in self.colnames: return np.nan return np.nansum(self['size']) def add_error(self, exception): self.errors.append(exception)
class Features(Table): """A class for holding PAHFIT features and their associated parameter information. Note that each parameter has an associated `kind', and that each kind has an associated set of allowable parameters (see _kind_params, below). """ TableFormatter = BoundedParTableFormatter MaskedColumn = BoundedMaskedColumn param_covar = TableAttribute(default=[]) _kind_params = { 'starlight': {'temperature', 'tau'}, 'dust_continuum': {'temperature', 'tau'}, 'line': {'wavelength', 'power'}, # 'fwhm', Instrument Pack detail! 'dust_feature': {'wavelength', 'fwhm', 'power'}, 'attenuation': {'model', 'tau', 'geometry'}, 'absorption': {'wavelength', 'fwhm', 'tau', 'geometry'} } _units = {'temperature': u.K, 'wavelength': u.um, 'fwhm': u.um} _group_attrs = set( ('bounds', 'features', 'kind')) # group-level attributes _param_attrs = set( ('value', 'bounds')) # Each parameter can have these attributes _no_bounds = set(('name', 'group', 'geometry', 'model')) # String attributes (no bounds) @classmethod def read(cls, file, *args, **kwargs): """Read a table from file. If reading a YAML file, read it in as a science pack and return the new table. Otherwise, use astropy's normal Table reader. """ if file.endswith(".yaml") or file.endswith(".yml"): return cls._read_scipack(file) else: return super().read(file, *args, **kwargs) @classmethod def _read_scipack(cls, file): """Read a science pack specification from YAML file. Arguments: ---------- file: the name of the file, either a full valid path, or named file in the PAHFIT science_packs directory.! Returns: -------- table: A filled pahfit.features.Features table. """ feat_tables = dict() if not os.path.isfile(file): pack_path = resource_filename("pahfit", "packs/science") file = os.path.join(pack_path, file) try: with open(file) as fd: scipack = yaml.load(fd, Loader=UniqueKeyLoader) except IOError as e: raise PAHFITFeatureError("Error reading science pack file\n" f"\t{file}\n\t{repr(e)}") for (name, elem) in scipack.items(): try: keys = elem.keys() except AttributeError: raise PAHFITFeatureError("Invalid science pack" f" format at {name}\n\t{file}") try: kind = elem.pop('kind') except KeyError: raise PAHFITFeatureError(f"No kind found for {name}\n\t{file}") try: valid_params = cls._kind_params[kind] except KeyError: raise PAHFITFeatureError( f"Unknown kind {kind} for {name}\n\t{file}") unknown_params = [ x for x in keys if not (x in valid_params or x in cls._group_attrs) ] if unknown_params: raise PAHFITFeatureError( f"Unknown {kind} parameters:" f" {', '.join(unknown_params)}\n\t{file}") hasFeatures = 'features' in elem hasLists = any(k not in cls._group_attrs and ( isinstance(v, (tuple, list)) or (isinstance( v, dict) and cls._param_attrs.isdisjoint(v.keys()))) for (k, v) in elem.items()) if hasFeatures and hasLists: raise PAHFITFeatureError( "A single group cannot contain both 'features'" f" and parameter list(s): {name}\n\t{file}") isGroup = (hasFeatures or hasLists) bounds = None if isGroup: # A named group of features if 'bounds' in elem: if not isinstance(elem['bounds'], dict): for p in cls._no_bounds: if p in elem: raise PAHFITFeatureError( f"Parameter {p} cannot have " f"bounds: {name}\n\t{file}") if sum(p in elem for p in valid_params) > 1: raise PAHFITFeatureError( "Groups with simple bounds " "can only specify a single " f"parameter: {name}\n\t{file}") if hasFeatures: raise PAHFITFeatureError( "Groups with simple bounds " "cannot specify " f"'features': {name}\n\t{file}") bounds = elem.pop('bounds') if hasFeatures: # our group uses a features dict for n, v in elem['features'].items(): if bounds and 'bounds' not in v: # inherit bounds v['bounds'] = bounds cls._add_feature(kind, feat_tables, n, group=name, **v) elif hasLists: # a "shortcut" feature group, using lists llen = [] for k, v in elem.items(): if k in cls._group_attrs: continue if not isinstance(v, (tuple, list, dict)): raise PAHFITFeatureError( f"All non-group parameters in {name} " f"must be lists or dicts:\n\t{file}") llen.append(len(v)) if not all(x == llen[0] for x in llen): raise PAHFITFeatureError( f"All parameter lists in group {name} " f"must be the same length:\n\t{file}") ngroup = llen[0] feat_names = None for k, v in elem.items(): if isinstance(elem[k], dict): if not feat_names: # First names win feat_names = list(elem[k].keys()) elem[k] = list(elem[k].values() ) # turn back into a value list if not feat_names: # no names: construct one for each group feature feat_names = [f"{name}{x:02}" for x in range(ngroup)] for i in range( ngroup): # Iterate over list(s) adding feature v = {k: elem[k][i] for k in valid_params if k in elem} cls._add_feature(kind, feat_tables, feat_names[i], group=name, bounds=bounds, **v) else: raise PAHFITFeatureError( f"Group {name} needs either 'features' or" f"parameter list(s):\n\t{file}") else: # Just one standalone feature cls._add_feature(kind, feat_tables, name, **elem) return cls._construct_table(feat_tables) @classmethod def _add_feature(cls, kind: str, t: dict, name: str, *, bounds=None, group='_none_', **pars): """Adds an individual feature to the passed dictionary t.""" if kind not in t: t[kind] = {} # group by kind if name not in t[kind]: t[kind][name] = {} t[kind][name]['group'] = group t[kind][name]['kind'] = kind for (param, val) in pars.items(): if param not in cls._kind_params[kind]: continue if isinstance(val, dict): # A param attribute dictionary unknown_attrs = [ x for x in val.keys() if x not in cls._param_attrs ] if unknown_attrs: raise PAHFITFeatureError("Unknown parameter attributes for" f" {name} ({kind}, {group}): " f"{', '.join(unknown_attrs)}") if 'value' not in val: raise PAHFITFeatureError("Missing 'value' attribute for " f"{name} ({kind}, {group})") value = val['value'] if 'bounds' in val: # individual param bounds if param in cls._no_bounds: raise PAHFITFeatureError( "Parameter {param} cannot have bounds: " f"{name} ({kind}, {group})") bounds = val['bounds'] else: value = val # a bare value if isinstance(bounds, dict): b = bounds.get(param) if b and param in cls._no_bounds: raise PAHFITFeatureError( "Parameter {param} cannot have bounds: " f"{name} ({kind}, {group})") else: # Simple bounds b = bounds try: t[kind][name][param] = (value if param in cls._no_bounds else value_bounds(value, b)) except ValueError as e: raise PAHFITFeatureError( "Error initializing value and bounds for" f" {name} ({kind}, {group}):\n\t{e}") @classmethod def _construct_table(cls, inp: dict): """Construct a masked table with units from input dictionary INP. INP is a dictionary with feature names as the key, and a dictionary of feature parameters as value. Each value in the feature parameter dictionary is either a value or tuple of 3 values for bounds. """ tables = [] for (kind, features) in inp.items(): kind_params = cls._kind_params[kind] # All params for this kind rows = [] for (name, params) in features.items(): for missing in kind_params - params.keys(): if missing in cls._no_bounds: params[missing] = 0.0 else: params[missing] = value_bounds(0.0, bounds=(0.0, None)) rows.append(dict(name=name, **params)) table_columns = rows[0].keys() t = cls(rows, names=table_columns) for p in cls._kind_params[kind]: if p not in cls._no_bounds: t[p].info.format = "0.4g" # Nice format (customized by Formatter) tables.append(t) tables = vstack(tables) for cn, col in tables.columns.items(): if cn in cls._units: col.unit = cls._units[cn] tables.add_index('name') return tables
class VSOQueryResponseTable(QueryResponseTable): hide_keys = ['fileid', 'fileurl'] errors = TableAttribute(default=[]) size_column = 'Size' @classmethod def from_zeep_response(cls, response, *, client, _sort=True): """ Construct a table response from the zeep response. """ # _sort is a hack to be able to convert from a legacy QueryResponse to # a table response. if _sort: records = iter_sort_response(response) else: records = response data = [] for record in records: row = defaultdict(lambda: None) for key, value in serialize_object(record).items(): if not isinstance(value, Mapping): if key == "size": # size is in bytes with a very high degree of precision. value = (value * u.Kibyte).to(u.Mibyte).round(5) key = key.capitalize() if key not in cls.hide_keys else key row[key] = value else: if key == "wave": # Some records in the VSO have 'kev' which astropy # doesn't recognise as a unit, so fix it. waveunit = value['waveunit'] waveunit = 'keV' if waveunit == 'kev' else waveunit row["Wavelength"] = None if value['wavemin'] is not None and value['wavemax'] is not None: row["Wavelength"] = u.Quantity( [float(value['wavemin']), float(value['wavemax'])], unit=waveunit) row["Wavetype"] = value['wavetype'] continue for subkey, subvalue in value.items(): if key == "time" and subvalue is not None: key_template = f"{subkey.capitalize()} {key.capitalize()}" else: key_template = f"{key.capitalize()} {subkey.capitalize()}" row[key_template] = subvalue data.append(row) data = cls(data, client=client) # Parse times for col in data.colnames: if col.endswith('Time'): try: # Try to use a vectorised call to parse_time data[col] = parse_time(data[col]) except Exception: # If that fails, parse dates one by one. This is needed if # VSO returns a variety of different date format strings times = [] mask = [] for i, t in enumerate(data[col]): if t is not None: times.append(parse_time(t)) else: # Create a dummy time and mask it later times.append(Time(val=0, format='mjd')) mask.append(i) data[col] = Time(times) data[col][mask] = np.ma.masked data[col].format = 'iso' # Reorder the columns to put the most useful ones first. return data._reorder_columns(['Start Time', 'End Time', 'Source', 'Instrument', 'Type', 'Wavelength'], remove_empty=True) def add_error(self, exception): self.errors.append(exception)