def get_all_data(self, channels=None): """ Return a TimeSeries containing all the data. """ if channels is None: channels = np.arange(self.nchannels) dur_samp = self.nsamples data = self._load_data(channels, [0], dur_samp, 0) # remove events dimension data = data[:, 0, :] # turn it into a TimeSeries # get the samplesize samplesize = 1. / self.samplerate # set timerange samp_start = 0 * samplesize samp_end = samp_start + (dur_samp - 1) * samplesize time_range = np.linspace(samp_start, samp_end, dur_samp) # make it a timeseries dims = [ Dim(self.channels[channels], 'channels'), Dim(time_range, 'time') ] data = TimeSeries(np.asarray(data), 'time', self.samplerate, dims=dims) return data
def get_data(self,channel,dur,offset,buf,resampled_rate=None, filt_freq=None,filt_type='stop', filt_order=4,keep_buffer=False): """ Return the requested range of data for each event by using the proper data retrieval mechanism for each event. The result will be an TimeSeries instance with dimensions (events,time). """ # get ready to load dat eventdata = [] events = [] # speed up by getting unique event sources first usources = np.unique1d(self['esrc']) # loop over unique sources for src in usources: # get the eventOffsets from that source ind = np.atleast_1d(self['esrc']==src) if len(ind) == 1: event_offsets=self['eoffset'] events.append(self) else: event_offsets = self[ind]['eoffset'] events.append(self[ind]) #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events eventdata.append(src.get_event_data(channel, event_offsets, dur, offset, buf, resampled_rate, filt_freq, filt_type, filt_order, keep_buffer)) # concatenate (must eventually check that dims match) tdim = eventdata[0]['time'] srate = eventdata[0].samplerate events = np.concatenate(events).view(self.__class__) eventdata = TimeSeries(np.concatenate(eventdata), 'time', srate, dims=[Dim(events,'events'),tdim]) return eventdata
def get_event_data(self, channels, events, start_time, end_time, buffer_time=0.0, resampled_rate=None, filt_freq=None, filt_type='stop', filt_order=4, keep_buffer=False, loop_axis=None, num_mp_procs=0, eoffset='eoffset', eoffset_in_time=True): """ Return an TimeSeries containing data for the specified channel in the form [events,duration]. Parameters ---------- channels: {int} or {dict} Channels from which to load data. events: {array_like} or {recarray} Array/list of event offsets (in time or samples as specified by eoffset_in_time; in time by default) into the data, specifying each event onset time. start_time: {float} Start of epoch to retrieve (in time-unit of the data). end_time: {float} End of epoch to retrieve (in time-unit of the data). buffer_time: {float},optional Extra buffer to add on either side of the event in order to avoid edge effects when filtering (in time unit of the data). resampled_rate: {float},optional New samplerate to resample the data to after loading. filt_freq: {array_like},optional The range of frequencies to filter (depends on the filter type.) filt_type = {scipy.signal.band_dict.keys()},optional Filter type. filt_order = {int},optional The order of the filter. keep_buffer: {boolean},optional Whether to keep the buffer when returning the data. eoffset_in_time: {boolean},optional If True, the unit of the event offsets is taken to be time (unit of the data), otherwise samples. """ # translate back to dur and offset dur = end_time - start_time offset = start_time buf = buffer_time # get the event offsets if ((not (hasattr(events, 'dtype') or hasattr(events, 'columns'))) or (hasattr(events, 'dtype') and events.dtype.names is None)): # they just passed in a list event_offsets = events elif ((hasattr(events, 'dtype') and (eoffset in events.dtype.names)) or (hasattr(events, 'columns') and (eoffset in events.columns))): event_offsets = events[eoffset] else: raise ValueError(eoffset + ' must be a valid fieldname ' + 'specifying the offset for the data.') # Sanity checks: if (dur < 0): raise ValueError('Duration must not be negative! ' + 'Specified duration: ' + str(dur)) if (np.min(event_offsets) < 0): raise ValueError('Event offsets must not be negative!') # make sure the events are an actual array: event_offsets = np.asarray(event_offsets) if eoffset_in_time: # convert to samples event_offsets = np.atleast_1d( np.int64(np.round(event_offsets * self.samplerate))) # set event durations from rate # get the samplesize samplesize = 1. / self.samplerate # get the number of buffer samples buf_samp = int(np.ceil(buf / samplesize)) # calculate the offset samples that contains the desired offset offset_samp = int( np.ceil((np.abs(offset) - samplesize * .5) / samplesize) * np.sign(offset)) # finally get the duration necessary to cover the desired span #dur_samp = int(np.ceil((dur - samplesize*.5)/samplesize)) dur_samp = (int(np.ceil( (dur + offset - samplesize * .5) / samplesize)) - offset_samp + 1) # add in the buffer dur_samp += 2 * buf_samp offset_samp -= buf_samp # check that we have all the data we need before every event: if (np.min(event_offsets + offset_samp) < 0): bad_evs = ((event_offsets + offset_samp) < 0) raise ValueError('The specified values for offset and buffer ' + 'require more data than is available before ' + str(np.sum(bad_evs)) + ' of all ' + str(len(bad_evs)) + ' events.') # process the channels if isinstance(channels, dict): # turn into indices ch_info = self.channels key = channels.keys()[0] channels = [ np.nonzero(ch_info[key] == c)[0][0] for c in channels[key] ] elif isinstance(channels, str): # find that channel by name channels = np.nonzero(self.channels['name'] == channels)[0][0] if channels is None or len(np.atleast_1d(channels)) == 0: channels = np.arange(self.nchannels) channels = np.atleast_1d(channels) channels.sort() # load the timeseries (this must be implemented by subclasses) eventdata = self._load_data(channels, event_offsets, dur_samp, offset_samp) # calc the time range # get the samplesize samp_start = offset_samp * samplesize samp_end = samp_start + (dur_samp - 1) * samplesize time_range = np.linspace(samp_start, samp_end, dur_samp) # make it a timeseries dims = [ Dim(self.channels[channels], 'channels'), # can index into channels Dim(events, 'events'), Dim(time_range, 'time') ] eventdata = TimeSeries(np.asarray(eventdata), 'time', self.samplerate, dims=dims) # filter if desired if not (filt_freq is None): # filter that data eventdata = eventdata.filtered(filt_freq, filt_type=filt_type, order=filt_order) # resample if desired if (not (resampled_rate is None) and not (resampled_rate == eventdata.samplerate)): # resample the data eventdata = eventdata.resampled(resampled_rate, loop_axis=loop_axis, num_mp_procs=num_mp_procs) # remove the buffer and set the time range if buf > 0 and not (keep_buffer): # remove the buffer eventdata = eventdata.remove_buffer(buf) # return the timeseries return eventdata
def get_data(self,channels,start_time,end_time,buffer_time=0.0, resampled_rate=None, filt_freq=None,filt_type='stop',filt_order=4, keep_buffer=False,esrc='esrc',eoffset='eoffset'): """ Return the requested range of data for each event by using the proper data retrieval mechanism for each event. Parameters ---------- channels: {list,int,None} Channels from which to load data. start_time: {float} Start of epoch to retrieve (in time-unit of the data). end_time: {float} End of epoch to retrieve (in time-unit of the data). buffer_time: {float},optional Extra buffer to add on either side of the event in order to avoid edge effects when filtering (in time unit of the data). resampled_rate: {float},optional New samplerate to resample the data to after loading. filt_freq: {array_like},optional The range of frequencies to filter (depends on the filter type.) filt_type = {scipy.signal.band_dict.keys()},optional Filter type. filt_order = {int},optional The order of the filter. keep_buffer: {boolean},optional Whether to keep the buffer when returning the data. esrc : {string},optional Name for the field containing the source for the time series data corresponding to the event. eoffset: {string},optional Name for the field containing the offset (in samples) for the event within the specified source. Returns ------- A TimeSeries instance with dimensions (channels,events,time). """ # check for necessary fields if not (esrc in self.dtype.names and eoffset in self.dtype.names): raise ValueError(esrc+' and '+eoffset+' must be valid fieldnames '+ 'specifying source and offset for the data.') # get ready to load dat eventdata = [] events = [] # speed up by getting unique event sources first usources = np.unique(self[esrc]) # loop over unique sources eventdata = None for src in usources: # get the eventOffsets from that source ind = np.atleast_1d(self[esrc]==src) if len(ind) == 1: event_offsets=self[eoffset] events.append(self) else: event_offsets = self[ind][eoffset] events.append(self[ind]) #print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events newdat = src.get_event_data(channels, event_offsets, start_time, end_time, buffer_time, resampled_rate, filt_freq, filt_type, filt_order, keep_buffer) if eventdata is None: eventdata = newdat else: eventdata.extend(newdat,axis=1) # concatenate (must eventually check that dims match) tdim = eventdata['time'] cdim = eventdata['channels'] srate = eventdata.samplerate events = np.concatenate(events).view(self.__class__) eventdata = TimeSeries(eventdata, 'time', srate, dims=[cdim,Dim(events,'events'),tdim]) return eventdata
def get_data(self, channels, start_time, end_time, buffer_time=0.0, resampled_rate=None, filt_freq=None, filt_type='stop', filt_order=4, keep_buffer=False, esrc='esrc', eoffset='eoffset', loop_axis=None, num_mp_procs=0, eoffset_in_time=True, **kwds): """ Return the requested range of data for each event by using the proper data retrieval mechanism for each event. Parameters ---------- channels: {list,int,None} Channels from which to load data. start_time: {float} Start of epoch to retrieve (in time-unit of the data). end_time: {float} End of epoch to retrieve (in time-unit of the data). buffer_time: {float},optional Extra buffer to add on either side of the event in order to avoid edge effects when filtering (in time unit of the data). resampled_rate: {float},optional New samplerate to resample the data to after loading. filt_freq: {array_like},optional The range of frequencies to filter (depends on the filter type.) filt_type = {scipy.signal.band_dict.keys()},optional Filter type. filt_order = {int},optional The order of the filter. keep_buffer: {boolean},optional Whether to keep the buffer when returning the data. esrc : {string},optional Name for the field containing the source for the time series data corresponding to the event. eoffset: {string},optional Name for the field containing the offset (in seconds) for the event within the specified source. eoffset_in_time: {boolean},optional If True, the unit of the event offsets is taken to be time (unit of the data), otherwise samples. verbose: {bool} turns on verbose printout of the function - e.g. timing information will be output to the screen Returns ------- A TimeSeries instance with dimensions (channels,events,time). or A TimeSeries instance with dimensions (channels,events,time) and xray.DataArray with dimensions (channels,events,time) """ import time start = time.time() verbose = False try: verbose = kwds['verbose'] except LookupError: pass return_both = False try: return_both = kwds['return_both'] except LookupError: pass # check for necessary fields if not (esrc in self.dtype.names and eoffset in self.dtype.names): raise ValueError(esrc + ' and ' + eoffset + ' must be valid fieldnames ' + 'specifying source and offset for the data.') events = [] newdat_list = [] # speed up by getting unique event sources first # ORIGINAL CODE - the order of usources is basically undefined because np.unique will sort according to # self[esrs] hash. This means that order of newdat arrays will depend on the memory assignment of RawBinaryWrapper # if more than one binary wrappers are present in the events (i.e. in self) usources = np.unique(self[esrc]) ordered_indices = np.arange(len(self)) event_indices_list = [] # loop over unique sources for s, src in enumerate(usources): # get the eventOffsets from that source ind = np.atleast_1d(self[esrc] == src) event_indices_list.append(ordered_indices[ind]) if verbose: if not s % 10: print 'Reading event %d' % s if len(ind) == 1: event_offsets = self[eoffset] events.append(self) else: event_offsets = self[ind][eoffset] events.append(self[ind]) # print "Loading %d events from %s" % (ind.sum(),src) # get the timeseries for those events newdat = src.get_event_data(channels, event_offsets, start_time, end_time, buffer_time, resampled_rate, filt_freq, filt_type, filt_order, keep_buffer, loop_axis, num_mp_procs, eoffset, eoffset_in_time) newdat_list.append(newdat) event_indices_array = np.hstack(event_indices_list) event_indices_restore_sort_order_array = event_indices_array.argsort() # new code start_extend_time = time.time() eventdata = newdat_list[0] eventdata = eventdata.extend(newdat_list[1:], axis=1) end_extend_time = time.time() # concatenate (must eventually check that dims match) tdim = eventdata['time'] cdim = eventdata['channels'] srate = eventdata.samplerate events = np.concatenate(events).view(self.__class__) # restoring original event ordering eventdata_raw_array_sorted = eventdata.base.base.base[:, event_indices_restore_sort_order_array, :] events_sorted = events[event_indices_restore_sort_order_array] eventdata = TimeSeries(eventdata_raw_array_sorted, 'time', srate, dims=[cdim, Dim(events_sorted, 'events'), tdim]) end = time.time() if verbose: print 'get_data tuntime=', (end - start), 's' print 'extend_time = =', (end_extend_time - start_extend_time), 's' return eventdata
def _load_timeseries(self, channel, eventOffsets, dur_samp, offset_samp): """ """ # determine the file eegfname = '%s.%03i' % (self.dataroot, channel) if os.path.isfile(eegfname): efile = open(eegfname, 'rb') else: # try unpadded lead eegfname = '%s.%i' % (self.dataroot, channel) if os.path.isfile(eegfname): efile = open(eegfname, 'rb') else: raise IOError( 'EEG file not found for channel %i and file root %s\n' % (channel, self.dataroot)) # loop over events eventdata = [] # # get the eventOffsets # if isinstance(eventInfo,TsEvents): # eventOffsets = eventInfo['eegoffset'] # else: # eventOffsets = eventInfo # eventOffsets = np.asarray(eventOffsets) # if len(eventOffsets.shape)==0: # eventOffsets = [eventOffsets] for evOffset in eventOffsets: # seek to the position in the file thetime = offset_samp + evOffset efile.seek(self.nBytes * thetime, 0) # read the data data = efile.read(int(self.nBytes * dur_samp)) # convert from string to array based on the format # hard codes little endian data = np.array( struct.unpack('<' + str(len(data) / self.nBytes) + self.fmtStr, data)) # make sure we got some data if len(data) < dur_samp: raise IOError( 'Event with offset %d is outside the bounds of file %s.\n' % (evOffset, eegfname)) # append it to the events eventdata.append(data) # calc the time range sampStart = offset_samp * samplesize sampEnd = sampStart + (dur_samp - 1) * samplesize timeRange = np.linspace(sampStart, sampEnd, dur_samp) # make it a timeseries if isinstance(eventInfo, TsEvents): dims = [ Dim('event', eventInfo.data, 'event'), Dim('time', timeRange) ] else: dims = [ Dim('eventOffsets', eventOffsets, 'samples'), Dim('time', timeRange) ] eventdata = TimeSeries(np.array(eventdata), dims, tdim='time', self.samplerate) # multiply by the gain eventdata *= self.gain return eventdata
def get_event_data(self, channels, event_offsets, start_time, end_time, buffer_time=0.0, resampled_rate=None, filt_freq=None, filt_type='stop', filt_order=4, keep_buffer=False): """ Return an TimeSeries containing data for the specified channel in the form [events,duration]. Parameters ---------- channels: {int} Channels from which to load data. event_offsets: {array_like} Array/list of event offsets (in samples) into the data, specifying each event onset time. start_time: {float} Start of epoch to retrieve (in time-unit of the data). end_time: {float} End of epoch to retrieve (in time-unit of the data). buffer_time: {float},optional Extra buffer to add on either side of the event in order to avoid edge effects when filtering (in time unit of the data). resampled_rate: {float},optional New samplerate to resample the data to after loading. filt_freq: {array_like},optional The range of frequencies to filter (depends on the filter type.) filt_type = {scipy.signal.band_dict.keys()},optional Filter type. filt_order = {int},optional The order of the filter. keep_buffer: {boolean},optional Whether to keep the buffer when returning the data. """ # translate back to dur and offset dur = end_time - start_time offset = start_time buf = buffer_time # Sanity checks: if (dur < 0): raise ValueError('Duration must not be negative! ' + 'Specified duration: ' + str(dur)) if (np.min(event_offsets) < 0): raise ValueError('Event offsets must not be negative!') # make sure the events are an actual array event_offsets = np.asarray(event_offsets) # set event durations from rate # get the samplesize samplesize = 1. / self.samplerate # get the number of buffer samples buf_samp = int(np.ceil(buf / samplesize)) # calculate the offset samples that contains the desired offset offset_samp = int( np.ceil((np.abs(offset) - samplesize * .5) / samplesize) * np.sign(offset)) # finally get the duration necessary to cover the desired span #dur_samp = int(np.ceil((dur - samplesize*.5)/samplesize)) dur_samp = (int(np.ceil( (dur + offset - samplesize * .5) / samplesize)) - offset_samp + 1) # add in the buffer dur_samp += 2 * buf_samp offset_samp -= buf_samp # check that we have all the data we need before every event: if (np.min(event_offsets + offset_samp) < 0): bad_evs = ((event_offsets + offset_samp) < 0) raise ValueError('The specified values for offset and buffer ' + 'require more data than is available before ' + str(np.sum(bad_evs)) + ' of all ' + str(len(bad_evs)) + ' events.') # process the channels if channels is None or len(np.atleast_1d(channels)) == 0: channels = np.arange(self.nchannels) channels = np.atleast_1d(channels) # load the timeseries (this must be implemented by subclasses) eventdata = self._load_data(channels, event_offsets, dur_samp, offset_samp) # calc the time range # get the samplesize samp_start = offset_samp * samplesize samp_end = samp_start + (dur_samp - 1) * samplesize time_range = np.linspace(samp_start, samp_end, dur_samp) # make it a timeseries dims = [ Dim(channels, 'channels'), Dim(event_offsets, 'event_offsets'), Dim(time_range, 'time') ] eventdata = TimeSeries(np.asarray(eventdata), 'time', self.samplerate, dims=dims) # filter if desired if not (filt_freq is None): # filter that data eventdata = eventdata.filtered(filt_freq, filt_type=filt_type, order=filt_order) # resample if desired if (not (resampled_rate is None) and not (resampled_rate == eventdata.samplerate)): # resample the data eventdata = eventdata.resampled(resampled_rate) # remove the buffer and set the time range if buf > 0 and not (keep_buffer): # remove the buffer eventdata = eventdata.remove_buffer(buf) # return the timeseries return eventdata