Exemplo n.º 1
0
def flatten_run_metadata(md):
    # Flatten the tags field. Note this sets it to an empty string
    # if it does not exist.
    return strax.flatten_dict(
        md,
        separator='.',
        keep=[strax.RUN_DEFAULTS_KEY, 'sub_run_spec', 'tags'])
Exemplo n.º 2
0
 def run_metadata(self, run_id, projection=None):
     path = self._run_meta_path(run_id)
     if osp.exists(path):
         with open(path, mode='r') as f:
             md = json.loads(f.read(), object_hook=json_util.object_hook)
         if not projection:
             return md
         md = strax.flatten_dict(md, separator='.')
         return {k: v for k, v in md.items() if k in projection}
     else:
         raise strax.RunMetadataNotAvailable(
             f"No file at {path}, cannot find run metadata for {run_id}")
Exemplo n.º 3
0
    def scan_runs(self, check_available=tuple(), store_fields=tuple()):
        """Update and return self.runs with runs currently available
        in all storage frontends.
        :param check_available: Check whether these data types are available
        Availability of xxx is stored as a boolean in the xxx_available
        column.
        :param store_fields: Additional fields from run doc to include
        as rows in the dataframe.

        The context options scan_availability and store_run_fields list
        data types and run fields, respectively, that will always be scanned.
        """
        store_fields = tuple(
            set(
                list(strax.to_str_tuple(store_fields)) +
                ['name', 'number', 'tags', 'mode'] +
                list(self.context_config['store_run_fields'])))
        check_available = tuple(
            set(
                list(strax.to_str_tuple(check_available)) +
                list(self.context_config['check_available'])))

        docs = None
        for sf in self.storage:
            _temp_docs = []
            for doc in sf._scan_runs(store_fields=store_fields):
                # If there is no number, make one from the name
                if 'number' not in doc:
                    if 'name' not in doc:
                        raise ValueError(f"Invalid run doc {doc}, contains "
                                         f"neither name nor number.")
                    doc['number'] = int(doc['name'])

                # If there is no name, make one from the number
                doc.setdefault('name', str(doc['number']))

                doc.setdefault('mode', '')

                # Flatten the tags field, if it exists
                doc['tags'] = ','.join(
                    [t['name'] for t in doc.get('tags', [])])

                # Flatten the rest of the doc (mainly in case the mode field
                # is something deeply nested)
                doc = strax.flatten_dict(doc, separator='.')

                _temp_docs.append(doc)

            if len(_temp_docs):
                new_docs = pd.DataFrame(_temp_docs)
            else:
                new_docs = pd.DataFrame([], columns=store_fields)

            if docs is None:
                docs = new_docs
            else:
                # Keep only new runs (not found by earlier frontends)
                docs = pd.concat(
                    [docs, new_docs[~np.in1d(new_docs['name'], docs['name'])]],
                    sort=False)

        self.runs = docs

        for d in tqdm(check_available, desc='Checking data availability'):
            self.runs[d + '_available'] = np.in1d(self.runs.name.values,
                                                  self.list_available(d))

        return self.runs