def find_clips_for_keyword(keyword, use_only_video=False): ism = {} for vm in tqdm(video_metadata): if not vm["annotation_filename"] or (not vm["only_video"] and use_only_video): continue try: h5 = eeghdf.Eeghdf(vm["annotation_filename"]) except: print(vm["annotation_filename"]) os.remove(vm["annotation_filename"]) continue starts = [start / 10**7 for start in h5._annotation_start100ns] texts = h5._annotation_text if not keyword or any(keyword.lower() in text.lower() for text in texts): interval_set = IntervalSet([ Interval( Bounds3D(start, start + 5), # we set the duration { 'spatial_type': SpatialType_Caption(">>" + text + "\n"), 'metadata': {} }) for start, text in zip(starts, texts) ]) ism[vm["id"]] = interval_set print( f"Found {len(ism)} videos with keyword {keyword} in the annotations.") vgrid_spec = VGridSpec(video_meta=video_metadata_wrapper, vis_format=VideoBlockFormat(imaps=[('bboxes', ism)]), video_endpoint='http://localhost:8080') return VGridWidget(vgrid_spec=vgrid_spec.to_json_compressed())
def test_reader_duration(): eeg = eeghdf.Eeghdf(EEGFILE2) dur = eeg.duration_seconds calc_dur = eeg.number_samples_per_channel / eeg.sample_frequency check_val = dur - calc_dur # example: 446000/200 = 2230 print('dur:', dur, 'calc_dur:', calc_dur) assert check_val * check_val < 1.0
def test_phys_signals_non_zero_offset_all(): tsdict = create_synthetic_eeghdf_file() hf = tsdict['hf'] U = tsdict['U'] # original "phys_signals" # D = tsdict['D'] # original "digital" samples hf.close() hf = eeghdf.Eeghdf('synthetic.eeg.h5') for ss in indexing2D_tests_constrained: res = hf.phys_signals[ss] tar = U[ss] print('index:', ss, 'res.shape:', res.shape, 'tar.shape:', tar.shape) print('tar:', tar) print('res:', res) assert np.all(res.shape == tar.shape) assert np.all(np.abs(res - tar) < 0.1)
def test_min_maxes(): eeg = eeghdf.Eeghdf(EEGFILE1) assert np.all(eeg.signal_physical_mins) assert np.all(eeg.signal_physical_maxs) assert np.all(eeg.signal_digital_maxs == np.array([ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767 ])) assert np.all(eeg.signal_digital_mins == np.array([ -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768 ]))
import stacklineplot # local copy of eegvis.stacklineplot # - # Make all the figures bigger and easier to see in this notebook # matplotlib.rcParams['figure.figsize'] = (18.0, 12.0) FIGSIZE = (12.0, 8.0) # use with %matplotlib inline matplotlib.rcParams["figure.figsize"] = FIGSIZE # ### Access via eeghdf library # We have written a helper library eeghdf to conveniently access these hdf5 files. # Note but you are not required to use this as you can access all the data via hdf5 libraries. # + # first open the hdf5 file eegf = eeghdf.Eeghdf("../data/absence_epilepsy.eeghdf") # show the groups at the root of the tree as a list # - # We can focus on the patient group and access it via hdf['patient'] as if it was a python dictionary. Here are the key,value pairs in that group. Note that the patient information has been anonymized. Everyone is given the same set of birthdays. This shows that this file is for Subject 2619, who is male. # + # here is some basic info print(f"eegf.file_name: {eegf.file_name}") print(f"eegf.age_years: {eegf.age_years}") print(f"eegf.number_channels: {eegf.number_channels}") print(f"sample_frquency: {eegf.sample_frequency}") print(f"eegf.patient: {eegf.patient}")
# current testing based upon a anaconda=5.2 conda environment # note if you get an h5py depecation warning then may update h5py to version 2.8 to prevent # for example: conda update -c anaconda h5py OR conda install -c anaconda h5py=2.8 # %% from IPython.core.display import display, HTML display(HTML("<style>.container { width:100% !important; }</style>")) # %% ARCHIVEDIR = r'../../eeghdf/data/' #EEGFILE = ARCHIVEDIR + 'spasms.eeghdf' EEGFILE = ARCHIVEDIR + 'absence_epilepsy.eeghdf' # %% hf = eeghdf.Eeghdf(EEGFILE) # %% slideshow={"slide_type": "slide"} eegbrow = nb_eegview.EeghdfBrowser(hf, montage='double banana', start_seconds=1385, plot_width=1800, plot_height=800) # %% eegbrow.show() # %% f = eegbrow._highpass_cache['5 Hz'] # grab one of the filters # %%
# import eegvis.stacklineplot import eegvis.montageview as montageview import eegvis.stackplot_bokeh as sbokplot from bokeh.io import output_notebook, push_notebook import bokeh.plotting as bplt from bokeh.plotting import show output_notebook() ARCHIVEDIR = r'../../eeghdf/data' EEGFILE = os.path.join(ARCHIVEDIR, 'spasms.eeghdf') # %% slideshow={"slide_type": "slide"} #hdf = h5py.File('./archive/YA2741BS_1-1+.eeghdf') # 5mo boy print(EEGFILE) hf = eeghdf.Eeghdf(EEGFILE) # absence 10yo # %% hf.electrode_labels # %% hf.shortcut_elabels # %% # %% slideshow={"slide_type": "slide"} tmp = sbokplot.IpyHdfEegPlot( hf, page_width_seconds=15, showchannels=(0, 19)) # doing this just to make the labels # %% [markdown] slideshow={"slide_type": "slide"}
import scikits.samplerate as sk_samplerate import eeghdf import eegvis.stacklineplot as stackplot #%% # check versions print('scikits.samplerate (Secret Rabbit code) version:', sk_samplerate.__version__) print('scipy:', scipy.__version__) print('matplotlib.__version__') #%% plt.rcParams['figure.figsize'] = (24,9) #%% hf = eeghdf.Eeghdf('/home/clee/eegml/eeg-hdfstorage/data/absence_epilepsy.eeghdf') hf.phys_signals.shape eegsig = hf.phys_signals[0:30, 0:100000] eegsigt = eegsig.transpose() #%% fs0 = hf.sample_frequency # usually 200 fs1 = 156 fs2 = 100 fs3 = 50 A = 0 B = 5 #%% stackplot.stackplot_t(eegsigt[0:int(20000),0:5])
cur_data = data[:,n*sfreq*2:(n+1)*sfreq*2] segments[:,n,:] = cur_data kurt[:,n] = univariate.compute_kurtosis(cur_data) s[:,n] = univariate.compute_line_length(cur_data) H[:,n] = univariate.compute_spect_entropy(sfreq, cur_data, psd_method='welch') #Average across channels kurt_avg = np.mean(kurt,axis=0) s_avg = np.mean(s,axis=0)*np.power(10,6) H_avg = np.mean(H,axis=0) return kurt_avg, s_avg, H_avg #Abnormal Recording raw_fname = 'CA7551E5_1-3+.eeghdf' hf = eeghdf.Eeghdf(raw_fname) raw, info, channels = convert.hdf2mne(hf) #Extract actual data data, times = raw[:] sfreq = int(np.rint(raw.info['sfreq'])) ab_kurt, ab_length, ab_ent = feature_extraction(data, sfreq) print('Abnormal Done') #Normal Recording raw_fname = 'CA84303Q_1-1+.eeghdf' hf = eeghdf.Eeghdf(raw_fname)
# work on adding annotations starts_sec = [1e-7 * t100ns for t100ns in hf._annotation_start100ns] mne_annot = mne.Annotations(onset=starts_sec, duration=hf._annotation_durations_sec, description=hf._annotation_text) customraw = mne.io.RawArray(data, info) customraw.set_annotations(mne_annot) return customraw, info, useful_channels if __name__ == '__main__': #%% hf = eeghdf.Eeghdf(DATAPATH + '/absence_epilepsy.eeghdf') channel_number, num_samples = hf.phys_signals.shape print('original shape:', (channel_number, num_samples)) print('number channels:', hf.number_channels) #%% # the eeghdf annotations currently only have useful info about start # time and description. The duration field seems to always be cut # off but that may not always be the case # oaccording to the edf spec # the durations are stored as text (ascii) numbers in seconds # if they are null I will consider duration = 0 # hf.hdf['record-0']['edf_annotations'] raw, info, chans = ehdf2mne(hf)
# invert recon = pywt.idwt(ws1a, ws1d, wtype) plt.plot(recon) #%% error = np.sqrt((recon - s1)**2) plt.plot(error) #%% f'error.max(): {error.max()}' #%% #raw = mne.io.read_raw_fif('connectivity/bects_raw.fif', preload=True) print('will need to change this filename to appropriate one') FILENAME = r'C:/Users/clee/code/eegml/eeg-hdfstorage/data/spasms.eeghdf' hf = eeghdf.Eeghdf(FILENAME) #%% arr = hf.phys_signals[5, 3000:3000 + 10 * 200] #%% plt.plot(arr) plt.show() #%% print(f'find transformd and coef') ch5A, ch5D = pywt.dwt(arr, wtype) #%% plt.subplot(2, 1, 1) plt.plot(ch5A)
def test_calc_sample_units(): eeg = eeghdf.Eeghdf(EEGFILE1) eeg._calc_sample2units() assert np.all(eeg._s2u)
def test_reader_open(): eeg = eeghdf.Eeghdf(EEGFILE1) assert eeg != None
import eeghdf ## module level globals try: ROOT = path.dirname(__file__) except NameError: ROOT = path.curdir ARFILE1 = path.join(ROOT, r"../data/absence_epilepsy.eeghdf") ARFILE2 = path.join(ROOT, r"../data/spasms.eeghdf") EEGFILE1 = path.normpath(ARFILE1) EEGFILE2 = path.normpath(ARFILE2) #print(ARFILE1) eeg = eeghdf.Eeghdf(EEGFILE1) ####### # assume original shape at least (4,10) indexing2D_test_fancy_list1 = [1, 3, 4] indexing2D_test_fancy_list2 = [2] indexing2D_tests_constrained = [ (slice(0, 2), slice(0, 10)), # arr[0:2,0:10] (1, slice(2, 4)), # arr[1,2:4] (slice(2, 4), 1), # arr[2:4,1] (3, 2), # arr[3,2] (indexing2D_test_fancy_list1, slice(0, 9)), (indexing2D_test_fancy_list2, slice(0, 9)), (slice(0, 3), indexing2D_test_fancy_list1), (slice(0, 3), indexing2D_test_fancy_list2),
# In[23]: # check versions print('scikits.samplerate (Secret Rabbit code) version:', sk_samplerate.__version__) print('scipy:', scipy.__version__) print('matplotlib.__version__') #%% plt.rcParams['figure.figsize'] = (24, 9) #%% # In[24]: hf = eeghdf.Eeghdf('../../eeg-hdfstorage/data/absence_epilepsy.eeghdf') print('original shape:', hf.phys_signals.shape) eegsig = hf.phys_signals[0:30, 0:100000] eegsigt = eegsig.transpose() #%% fs0 = hf.sample_frequency # usually 200 fs1 = 156 fs2 = 100 fs3 = 50 A = 0 B = 3 # In[25]:
from pprint import pprint import eegvis.stacklineplot as stacklineplot import eegvis.montageview as montageview #%% matplotlib.rcParams["figure.dpi"] = 100 # 100 dpi matplotlib.rcParams["figure.figsize"] = (8, 6) # %% # first open an eeghdf file # %% eeg_file_name = "/home/clee/code/eegml/eeghdf/data/absence_epilepsy.eeghdf" # %% hf = eeghdf.Eeghdf(eeg_file_name) # %% signals = hf.phys_signals goto_sec = 5.0 # note since this centered here, to show first 10 seconds need to set this to 5 or epoch_width_sec/2 epoch_width_sec = 10.0 # seconds FS = hf.sample_frequency chstart = 0 chstop = 19 ylabels = hf.electrode_labels yscale = 1.0 # %% [markdown] # ``` # Signature: # stacklineplot.show_epoch_centered(
eegvis.stacklineplot.stackplot( S[:, t0 * fs:fs * (t0 + int(15))], seconds=T, start_time=t0, ylabels=ef.get_signal_text_labels(), yscale=1.5, ) # %% [markdown] {"colab_type": "text", "id": "LtmTa2y9CszR"} # ### Demonstrate how to use eeg hdf5 storage # This form of storage has multiple advantages. It is well defined and supported by virtually all languages. # # It allows for accessing waveform data without reading in the entirety of the image as if it was a continuous array. Automatic conversion to physical units (usually microvolts) is available as well, again simulating a numpy like array interface. # %% {"colab": {}, "colab_type": "code", "id": "tEGiMlkNFVSa"} hf = eeghdf.Eeghdf('/mnt/data1/eegdbs/stevenson_neonatal_eeg/hdf/eeg10.eeg.h5') # %% {"colab": {"base_uri": "https://localhost:8080/", "height": 34}, "colab_type": "code", "id": "4sMBuzC6Fa-K", "outputId": "4498d06c-9288-48af-cfdb-b3914327c8ea"} hf.age_years # %% {"colab": {"base_uri": "https://localhost:8080/", "height": 34}, "colab_type": "code", "id": "QcKawC4AF0ca", "outputId": "ece261b9-e619-47ca-8707-906d6ae2c918"} hf.duration_seconds_float # %% {"colab": {"base_uri": "https://localhost:8080/", "height": 374}, "colab_type": "code", "id": "5zuD5kUpGetx", "outputId": "371a3b1b-54a1-4c76-9504-7a116f95c89a"} hf.physical_dimensions # %% {"colab": {"base_uri": "https://localhost:8080/", "height": 374}, "colab_type": "code", "id": "GqT_DrtEGoOi", "outputId": "8abc51c5-2ade-405f-be0e-c7d8aae1f6ad"} hf.electrode_labels # %% {"colab": {"base_uri": "https://localhost:8080/", "height": 374}, "colab_type": "code", "id": "Y3CAg-ZSGKc6", "outputId": "58ffa55f-69c1-4860-dc7d-e42dc401a4c4"} hf.shortcut_elabels