def trigger(self): # save file stash file name self._result.clear() for idx, (name, reading) in enumerate(super().read().items()): # Save the actual reading['value'] to disk and create a record # in FileStore. np.save('{}_{}.npy'.format(self._path_stem, idx), reading['value']) datum_id = new_uid() self.fs.insert_datum(self._resource_id, datum_id, dict(index=idx)) # And now change the reading in place, replacing the value with # a reference to FileStore. reading['value'] = datum_id self._result[name] = reading delay_time = self.exposure_time if delay_time: if self.loop.is_running(): st = SimpleStatus() self.loop.call_later(delay_time, st._finished) return st else: ttime.sleep(delay_time) return NullStatus()
def test_dots_not_allowed_in_keys(): doc = {'time': 0, 'uid': new_uid()} schema_validators[DocumentNames.start].validate(doc) # Add a legal key. doc.update({'b': 'c'}) schema_validators[DocumentNames.start].validate(doc) # Now add illegal key. doc.update({'b.': 'c'}) with pytest.raises(jsonschema.ValidationError): schema_validators[DocumentNames.start].validate(doc) doc = { 'time': 0, 'uid': new_uid(), 'data_keys': { 'a': { 'source': '', 'dtype': 'number', 'shape': [] } }, 'run_start': new_uid() } schema_validators[DocumentNames.descriptor].validate(doc) # Add a legal key. doc.update({'b': 'c'}) schema_validators[DocumentNames.descriptor].validate(doc) # Now add illegal key. doc.update({'b.c': 'd'}) with pytest.raises(jsonschema.ValidationError): schema_validators[DocumentNames.descriptor].validate(doc) doc = { 'time': 0, 'uid': new_uid(), 'exit_status': 'success', 'reason': '', 'run_start': new_uid() } schema_validators[DocumentNames.stop].validate(doc) # Add a legal key. doc.update({'b': 'c'}) schema_validators[DocumentNames.stop].validate(doc) # Now add illegal key. doc.update({'.b': 'c'}) with pytest.raises(jsonschema.ValidationError): schema_validators[DocumentNames.stop].validate(doc)
def start(self, doc, _md=None): """Receive a raw start document, re-emit it for the modified stream""" self._stream_start_uid = new_uid() _md = _md or dict() # Create a new start document with a new uid, start time, and the uid # of the original start document. Preserve the rest of the metadata # that we retrieved from the start document md = ChainMap({'uid': self._stream_start_uid, 'original_run_uid': doc['uid'], 'time': ttime.time()}, _md, doc) # Dispatch the start document for anyone subscribed to our Dispatcher self.emit(DocumentNames.start, dict(md)) super().start(doc)
def trigger(self): # save file stash file name self._result.clear() for idx, (name, reading) in enumerate(super().read().items()): # Save the actual reading['value'] to disk and create a record # in FileStore. np.save('{}_{}.npy'.format(self._path_stem, idx), reading['value']) datum_id = new_uid() self.fs.insert_datum(self._resource_id, datum_id, dict(index=idx)) # And now change the reading in place, replacing the value with # a reference to FileStore. reading['value'] = datum_id self._result[name] = reading return NullStatus()
def trigger(self): # save file stash file name self._result.clear() for idx, (name, reading) in enumerate(super().read().items()): # Save the actual reading['value'] to disk and create a record # in FileStore. self.save_func('{}_{}.{}'.format(self._path_stem, idx, self.save_ext), reading['value']) datum_id = new_uid() self.fs.insert_datum(self._resource_id, datum_id, dict(index=idx)) # And now change the reading in place, replacing the value with # a reference to FileStore. reading['value'] = datum_id self._result[name] = reading return NullStatus()
def stop(self, doc, _md=None): """Receive a raw stop document, re-emit it for the modified stream""" # Create a new stop document with a new_uid, pointing to the correct # start document uid, and tally the number of events we have emitted. # The rest of the stop information is passed on to the next callback _md = _md or dict() num_events = dict((stream, len(self._descriptors[stream])) for stream in self._descriptors.keys()) md = ChainMap(dict(run_start=self._stream_start_uid, time=ttime.time(), uid=new_uid(), num_events=num_events), doc) self.emit(DocumentNames.stop, dict(md)) # Clear the local caches for the run self.seq_count = 0 self.raw_descriptors.clear() self._descriptors.clear() self._stream_start_uid = None super().stop(doc)
def test_push_start_document(capsys): """ Pass the start document to BEC and verify if the scan information is printed correctly""" bec = BestEffortCallback() uid = new_uid() time = ttime.time() scan_id = 113435 # Just some arbitrary number # Include minimum information needed to print the header bec("start", {"scan_id": scan_id, "time": time, "uid": uid}) captured = capsys.readouterr() assert f"Transient Scan ID: {scan_id}" in captured.out, \ "BestEffortCallback: Scan ID is not printed correctly" tt = datetime.fromtimestamp(time).utctimetuple() assert f"Time: {ttime.strftime('%Y-%m-%d %H:%M:%S', tt)}" in captured.out, \ "BestEffortCallback: Scan time is not printed correctly" assert f"Persistent Unique Scan ID: '{uid}'" in captured.out, \ "BestEffortCallback: Scan UID is not printed correctly"
def trigger(self): if self.shutter and self._dark_fields and \ self.shutter.read()['rad']['value'] == 0: read_v = { field: { 'value': func(), 'timestamp': ttime.time() } for field, func in self._dark_fields.items() if field in self.read_attrs } self._result.clear() for idx, (name, reading) in enumerate(read_v.items()): # Save the actual reading['value'] to disk and create a record # in FileStore. np.save('{}_{}.npy'.format(self._path_stem, idx), reading['value']) datum_id = new_uid() self.fs.insert_datum(self._resource_id, datum_id, dict(index=idx)) # And now change the reading in place, replacing the value with # a reference to FileStore. reading['value'] = datum_id self._result[name] = reading delay_time = self.exposure_time if delay_time: if self.loop.is_running(): st = be.SimpleStatus() self.loop.call_later(delay_time, st._finished) return st else: ttime.sleep(delay_time) return be.NullStatus() else: return super().trigger()
def parse_hdf5(fn): """Parse hdf5 file from the PAL-XFEL beamline into an event stream Parameters ---------- fn: str The path to the hdf5 file Yields ------- name: str The name of the document doc: dict The event model document """ f = h5py.File(fn, 'r') suid = new_uid() # loop through the scans for scans in f.keys(): # Create start doc start_doc = { 'uid': suid, 'i0thresh': 2.e9, 'i0amp': 1.e10, 'calibration_md': calib_config_dict, 'time': time.time(), 'sideloaded': True, 'composition_string': 'Au', 'experimenters': ['Tadesse', 'Assefa', 'Jane', 'Doe', 'Richard', 'Roe'], 'bt_wavelength': (12.398 / 9.70803 * 1.0e-10) } yield 'start', start_doc # Create most of the descriptor duid = new_uid() descriptor_doc = {'uid': duid, 'name': 'primary', 'run_start': suid, 'data_keys': {'delay_timestamp': { 'source': 'delay_stage', 'dtype': 'float', 'shape': [], 'unit': 'ps'}, 'shot_number': { 'source': 'sidewinder', 'dtype': 'int', 'shape': [], 'unit': 'NA'}}, 'time': time.time()} yielded_descriptor = False # loop through the delay points for i, delay_point in enumerate(f[scans].keys()): delay = f[format_keys(scans, delay_point)].attrs[ 'delay_time_readback'] events = [] timestamps = list( f[format_keys(scans, delay_point, 'RayMX')].keys()) # loop through the shots per delay point for shot_number, timestamp in enumerate(timestamps): ts = timestamp.strip('timestamp_') event = {'uid': new_uid(), 'descriptor': duid, 'filled': {'image': True}, 'data': {'shot_number': shot_number, 'delay_timestamp': delay}, 'timestamps': {'shot_number': ts, 'delay_timestamp': ts}, 'seq_num': i, 'time': time.time(), } # loop through the detectors for data_source in [ff for ff in f[format_keys(scans, delay_point)].keys() if ff in ['RayMX', 'photodiode']]: data_key = format_keys(scans, delay_point, data_source, timestamp) v = f[data_key].value # fill in missing descriptor info if isinstance(v, np.ndarray): s = v.shape if s == (1,): v = float(v) s = [] else: v = v.astype('float32') else: s = [] event['data'][key_data_map[data_source]] = v event['timestamps'][key_data_map[data_source]] = int(ts) if not yielded_descriptor: dtype = str(getattr(v, 'dtype', type(v))) descriptor_doc['data_keys'][ key_data_map[data_source]] = { 'source': data_source, 'dtype': dtype, 'shape': s} events.append(event) if not yielded_descriptor: yielded_descriptor = True yield 'descriptor', descriptor_doc # yield the events for i, e in enumerate(events): print(i, e['data']['shot_number']) pprint(e) assert i == e['data']['shot_number'] yield 'event', e yield 'stop', {'uid': new_uid(), 'run_start': suid, 'time': time.time()}
def parse_hdf5(fn): """Parse hdf5 file from the PAL-XFEL beamline into an event stream Parameters ---------- fn: str The path to the hdf5 file Yields ------- name: str The name of the document doc: dict The event model document """ f = h5py.File(fn, 'r') suid = new_uid() # loop through the scans for scans in f.keys(): # Create start doc start_doc = { 'uid': suid, 'i0thresh': 2.e9, 'i0amp': 1.e10, 'calibration_md': calib_config_dict, 'time': time.time(), 'sideloaded': True, 'composition_string': 'Au', 'experimenters': ['Tadesse', 'Assefa', 'Jane', 'Doe', 'Richard', 'Roe'], 'bt_wavelength': (12.398 / 9.70803 * 1.0e-10) } yield 'start', start_doc # Create most of the descriptor duid = new_uid() descriptor_doc = { 'uid': duid, 'name': 'primary', 'run_start': suid, 'data_keys': { 'delay_timestamp': { 'source': 'delay_stage', 'dtype': 'float', 'shape': [], 'unit': 'ps' }, 'shot_number': { 'source': 'sidewinder', 'dtype': 'int', 'shape': [], 'unit': 'NA' } }, 'time': time.time() } yielded_descriptor = False # loop through the delay points for i, delay_point in enumerate(f[scans].keys()): delay = f[format_keys(scans, delay_point)].attrs['delay_time_readback'] events = [] timestamps = list(f[format_keys(scans, delay_point, 'RayMX')].keys()) # loop through the shots per delay point for shot_number, timestamp in enumerate(timestamps): ts = timestamp.strip('timestamp_') event = { 'uid': new_uid(), 'descriptor': duid, 'filled': { 'image': True }, 'data': { 'shot_number': shot_number, 'delay_timestamp': delay }, 'timestamps': { 'shot_number': ts, 'delay_timestamp': ts }, 'seq_num': i, 'time': time.time(), } # loop through the detectors for data_source in [ ff for ff in f[format_keys(scans, delay_point)].keys() if ff in ['RayMX', 'photodiode'] ]: data_key = format_keys(scans, delay_point, data_source, timestamp) v = f[data_key].value # fill in missing descriptor info if isinstance(v, np.ndarray): s = v.shape if s == (1, ): v = float(v) s = [] else: v = v.astype('float32') else: s = [] event['data'][key_data_map[data_source]] = v event['timestamps'][key_data_map[data_source]] = int(ts) if not yielded_descriptor: dtype = str(getattr(v, 'dtype', type(v))) descriptor_doc['data_keys'][ key_data_map[data_source]] = { 'source': data_source, 'dtype': dtype, 'shape': s } events.append(event) if not yielded_descriptor: yielded_descriptor = True yield 'descriptor', descriptor_doc # yield the events for i, e in enumerate(events): print(i, e['data']['shot_number']) pprint(e) assert i == e['data']['shot_number'] yield 'event', e yield 'stop', { 'uid': new_uid(), 'run_start': suid, 'time': time.time() }
def parse(file_dir): """Parse a folder full of GSAS and FullProf filesfrom the NOMAD beamline into an event stream Parameters ---------- file_dir: str The path to the folder containing the data Yields ------- name: str The name of the document doc: dict The event model document """ gsas_root = os.path.join(file_dir, 'GSAS') gsas_files = [f for f in os.listdir(gsas_root) if f.endswith('.gsa')] for gsas_file in gsas_files: suid = new_uid() start_doc = {'facility': 'NOMAD', 'uid': suid, 'sideloaded': True, 'time': time.time(), 'filename': os.path.splitext(gsas_file)[0]} full_prof_root = os.path.join(file_dir, 'fullprof') if '_' in gsas_file: a = gsas_file.split('_') else: a = gsas_file.split('.') with open(os.path.join(gsas_root, gsas_file), 'r') as f: start_doc.update(gsas_header_subparser(f.read())) bank_info = {} with open(os.path.join(gsas_root, gsas_file), 'r') as f: data = f.read().split('\n') for bank in range(1, 7): bank_info[bank - 1] = parse_bank_data( data[data.index('# Data for spectrum ' ':{}'.format(bank)) - 1]) print(a) start_doc['sample_name'] = a[1] start_doc['composition_string'] = a[1] if 'gas' in a: start_doc.update({'gas': a[3]}) if 'dry' in a: start_doc.update({'dry': True}) if len(a) > 5 and 'C' in a[6]: start_doc.update({'temperature': a[6].replace('C', '')}) if 'cycle' in gsas_file: start_doc.update({'cycle': a[-1].split('cycle')[1].split('.')[0]}) yield 'start', start_doc for bank in range(6): duid = new_uid() descriptor_doc = {'uid': duid, 'name': 'bank {}'.format(bank), 'run_start': suid, 'data_keys': {'tof': {'source': 'file', 'dtype': 'array', 'unit': 'time'}, 'intensity': {'source': 'file', 'dtype': 'array', 'unit': 'arb'}, 'error': {'source': 'file', 'dtype': 'array', 'unit': 'arb'} }, 'time': time.time()} descriptor_doc.update(bank_info[bank]) yield 'descriptor', descriptor_doc full_prof_file_name = gsas_file.replace('.gsa', '-{}.dat'.format(bank)) tof, i, err = np.loadtxt(os.path.join(full_prof_root, full_prof_file_name)).T event = {'uid': new_uid(), 'descriptor': duid, 'filled': {'tof': True, 'intensity': True, 'error': True}, 'data': {'tof': tof, 'intensity': i, 'error': err}, 'timestamps': {'tof': time.time(), 'intensity': time.time(), 'error': time.time()}, 'seq_num': i, 'time': time.time(), } yield 'event', event yield 'stop', {'uid': new_uid(), 'run_start': suid, 'time': time.time()}
def parse(file_dir): """Parse a folder full of GSAS and FullProf filesfrom the NOMAD beamline into an event stream Parameters ---------- file_dir: str The path to the folder containing the data Yields ------- name: str The name of the document doc: dict The event model document """ gsas_root = os.path.join(file_dir, 'GSAS') gsas_files = [f for f in os.listdir(gsas_root) if f.endswith('.gsa')] for gsas_file in gsas_files: suid = new_uid() start_doc = { 'facility': 'NOMAD', 'uid': suid, 'sideloaded': True, 'time': time.time(), 'filename': os.path.splitext(gsas_file)[0] } full_prof_root = os.path.join(file_dir, 'fullprof') if '_' in gsas_file: a = gsas_file.split('_') else: a = gsas_file.split('.') with open(os.path.join(gsas_root, gsas_file), 'r') as f: start_doc.update(gsas_header_subparser(f.read())) bank_info = {} with open(os.path.join(gsas_root, gsas_file), 'r') as f: data = f.read().split('\n') for bank in range(1, 7): bank_info[bank - 1] = parse_bank_data( data[data.index('# Data for spectrum ' ':{}'.format(bank)) - 1]) print(a) start_doc['sample_name'] = a[1] start_doc['composition_string'] = a[1] if 'gas' in a: start_doc.update({'gas': a[3]}) if 'dry' in a: start_doc.update({'dry': True}) if len(a) > 5 and 'C' in a[6]: start_doc.update({'temperature': a[6].replace('C', '')}) if 'cycle' in gsas_file: start_doc.update({'cycle': a[-1].split('cycle')[1].split('.')[0]}) yield 'start', start_doc for bank in range(6): duid = new_uid() descriptor_doc = { 'uid': duid, 'name': 'bank {}'.format(bank), 'run_start': suid, 'data_keys': { 'tof': { 'source': 'file', 'dtype': 'array', 'unit': 'time' }, 'intensity': { 'source': 'file', 'dtype': 'array', 'unit': 'arb' }, 'error': { 'source': 'file', 'dtype': 'array', 'unit': 'arb' } }, 'time': time.time() } descriptor_doc.update(bank_info[bank]) yield 'descriptor', descriptor_doc full_prof_file_name = gsas_file.replace('.gsa', '-{}.dat'.format(bank)) tof, i, err = np.loadtxt( os.path.join(full_prof_root, full_prof_file_name)).T event = { 'uid': new_uid(), 'descriptor': duid, 'filled': { 'tof': True, 'intensity': True, 'error': True }, 'data': { 'tof': tof, 'intensity': i, 'error': err }, 'timestamps': { 'tof': time.time(), 'intensity': time.time(), 'error': time.time() }, 'seq_num': i, 'time': time.time(), } yield 'event', event yield 'stop', { 'uid': new_uid(), 'run_start': suid, 'time': time.time() }
def process_event(self, doc, stream_name='primary', id_args=None, config=None): """ Process a modified event document then emit it for the modified stream This will pass an Event document to the dispatcher. If we have received a new event descriptor from the original stream, or we have recieved a new set of `id_args` or `descriptor_id` , a new descriptor document is first issued and passed through to the dispatcher. When issuing a new event, the new descriptor is given a new source field. Parameters ---------- doc : event stream_name : str, optional String identifier for a particular stream id_args : tuple, optional Additional tuple of hashable objects to identify the stream config: dict, optional Additional configuration information to be included in the event descriptor Notes ----- Any callback subscribed to the `Dispatcher` will receive these event streams. If nothing is subscribed, these documents will not go anywhere. """ id_args = id_args or (doc['descriptor'],) config = config or dict() # Determine the descriptor id desc_id = frozenset((tuple(doc['data'].keys()), stream_name, id_args)) # If we haven't described this configuration # Send a new document to our subscribers if (stream_name not in self._descriptors or desc_id not in self._descriptors[stream_name]): # Create a new description document for the output of the stream data_keys = dict() # Parse the event document creating a new description. If the key # existed in the original source description, just assume that it # is the same type, units and shape. Otherwise do some # investigation raw_desc = self.raw_descriptors.get(doc['descriptor'], {}) for key, val in doc['data'].items(): # Described priorly if key in raw_desc['data_keys']: key_desc = raw_desc['data_keys'][key] # String key elif isinstance(val, str): key_desc = {'dtype': 'string', 'shape': []} # Iterable elif isinstance(val, Iterable): key_desc = {'dtype': 'array', 'shape': np.shape(val)} # Number else: key_desc = {'dtype': 'number', 'shape': []} # Modify the source key_desc['source'] = 'Stream' # Store in our new descriptor data_keys[key] = key_desc # Create our complete description document desc = ChainMap({'uid': new_uid(), 'time': ttime.time(), 'run_start': self._stream_start_uid, 'data_keys': data_keys, 'configuration': config, 'object_keys': {'stream': list(data_keys.keys())}}, raw_desc) # Store information about our descriptors desc = dict(desc) if stream_name not in self._descriptors: self._descriptors[stream_name] = dict() self._descriptors[stream_name][desc_id] = desc # Emit the document to all subscribers self.emit(DocumentNames.descriptor, desc) # Clean the Event document produced by graph network. The data is left # untouched, but the relevant uids, timestamps, seq_num are modified so # that this event is not confused with the raw data stream self.seq_count += 1 desc_uid = self._descriptors[stream_name][desc_id]['uid'] current_time = ttime.time() evt = ChainMap({'uid': new_uid(), 'descriptor': desc_uid, 'timestamps': dict((key, current_time) for key in doc['data'].keys()), 'seq_num': self.seq_count, 'time': current_time}, doc) # Emit the event document self.emit(DocumentNames.event, dict(evt))