def change_time_base(input_data, new_time_base): '''New from SH.... ''' from pyfusion.data.base import DataSet from pyfusion.data.timeseries import Signal, Timebase if isinstance(input_data, DataSet): #output_dataset = input_data.copy() #output_dataset.clear() output_dataset = DataSet(input_data.label+'_new_time_base') for data in input_data: try: output_dataset.append(data.change_time_base(new_time_base)) except AttributeError: pyfusion.logger.warning("Data filter 'change_time_base' not applied to item in dataset") return output_dataset #cut the signal and timebase matrices to the correct size new_data = copy.deepcopy(input_data) n_channels = input_data.signal.shape[0] new_data.signal = Signal(np.zeros((n_channels,new_time_base.shape[0]),dtype=np.float32)) new_data.timebase = Timebase(new_time_base) for i in range(input_data.signal.shape[0]): new_data.signal[i,:] = np.interp(new_time_base, input_data.timebase, input_data.signal[i,:]) #if input_data.signal.ndim == 1: # input_data.signal = input_data.signal[new_time_args[0]:new_time_args[1]] #else: # input_data.signal = input_data.signal[:,new_time_args[0]:new_time_args[1]] return new_data
def test_remove_noncontiguous(self): tb1 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tb2 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tb3 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) # nonzero signal mean tsd1 = TimeseriesData(timebase=tb1, signal=Signal(np.arange(len(tb1))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) tsd2 = TimeseriesData(timebase=tb2, signal=Signal(np.arange(len(tb2))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) tsd3 = TimeseriesData(timebase=tb3, signal=Signal(np.arange(len(tb3))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) self.assertTrue(tb1.is_contiguous()) self.assertTrue(tb2.is_contiguous()) self.assertTrue(tb3.is_contiguous()) tsd2.timebase[-50:] += 1.0 self.assertFalse(tb2.is_contiguous()) ds = DataSet('ds') for tsd in [tsd1, tsd2, tsd3]: ds.add(tsd) for tsd in [tsd1, tsd2, tsd3]: self.assertTrue(tsd in ds) filtered_ds = ds.remove_noncontiguous() for tsd in [tsd1, tsd3]: self.assertTrue(tsd in filtered_ds) self.assertFalse(tsd2 in filtered_ds)
def test_dataset_label(self): test_ds = DataSet('test_ds_1') test_ds.save() self.assertEqual(test_ds.label, 'test_ds_1') if pyfusion.orm_manager.IS_ACTIVE: session = pyfusion.orm_manager.Session() db_ods = session.query(DataSet).filter_by(label='test_ds_1')
def downsample(input_data, skip=10, chan=None, copy=False): """ downsample by a factor 'skip' default 10 Good example of filter that changes the size of the data. """ from .base import DataSet from .timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii, data in enumerate(input_data): try: output_dataset.update(data.downsample(skip)) except AttributeError: pyfusion.logger.warning( "Data filter 'downsample' not applied to item in dataset") return output_dataset # python3 check this if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[::skip], signal=input_data.signal[::skip], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData(timebase=input_data.timebase[::skip], signal=input_data.signal[:, ::skip], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied if hasattr(input_data, 'utc'): tmp_data.utc = input_data.utc return tmp_data
def normalise(input_data, method=None, separate=False): """ method=None -> default, method=0 -> DON'T normalise """ from numpy import mean, sqrt, max, abs, var, atleast_2d from pyfusion.data.base import DataSet # this allows method='0'(or 0) to prevent normalisation for cleaner code # elsewhere if pyfusion.DBG() > 3: print('separate = %d' % (separate)) if (method == 0) or (method == '0'): return (input_data) if (method is None) or (method.lower() == "none"): method = 'rms' if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label + "_normalise") for d in input_data: output_dataset.add(normalise(d, method=method, separate=separate)) return output_dataset if method.lower() in ['rms', 'r']: if input_data.signal.ndim == 1: norm_value = sqrt(mean(input_data.signal**2)) else: rms_vals = sqrt(mean(input_data.signal**2, axis=1)) if separate == False: rms_vals = max(rms_vals) norm_value = atleast_2d(rms_vals).T elif method.lower() in ['peak', 'p']: if input_data.signal.ndim == 1: norm_value = abs(input_data.signal).max(axis=0) else: max_vals = abs(input_data.signal).max(axis=1) if separate == False: max_vals = max(max_vals) norm_value = atleast_2d(max_vals).T elif method.lower() in ['var', 'variance', 'v']: # this is strange because it over-compensates - if we use sqrt(var()) it would be the same as RMS if input_data.signal.ndim == 1: norm_value = var(input_data.signal) else: var_vals = var(input_data.signal, axis=1) if separate == False: var_vals = max(var_vals) norm_value = atleast_2d(var_vals).T input_data.signal = input_data.signal / norm_value #print('norm_value = %s' % norm_value) norm_hist = ','.join( ["{0:.2g}".format(float(v)) for v in norm_value.flatten()]) input_data.history += "\n:: norm_value =[{0}]".format(norm_hist) input_data.history += ", method={0}, separate={1}".format(method, separate) input_data.scales = norm_value input_data.norm_method = method + ':' + ['all', 'sep'][separate] debug_(pyfusion.DEBUG, level=2, key='normalise', msg='about to return from normalise') return input_data
def test_dataset(self): tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)), (3,len(tb)))), channels=get_n_channels(3)) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)+1),(3,len(tb)))), channels=get_n_channels(3)) input_dataset = DataSet('test_dataset') input_dataset.add(tsd_1) input_dataset.add(tsd_2) seg_dataset = input_dataset.segment(n_samples=10) self.assertTrue(len(seg_dataset)==20)
def subtract_mean(input_data): from pyfusion.data.base import DataSet if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_subtract_mean") for d in input_data: output_dataset.add(subtract_mean(d)) return output_dataset if input_data.signal.ndim == 1: mean_value = mean(input_data.signal) else: mean_vector = mean(input_data.signal, axis=1) mean_value = resize(repeat(mean_vector, input_data.signal.shape[1]), input_data.signal.shape) input_data.signal -= mean_value return input_data
def test_dataset(self): ch=get_n_channels(5) new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=ch) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb))+1, (5,len(tb)))), channels=ch) test_dataset = DataSet('test_ds_1') test_dataset.add(tsd_1) test_dataset.add(tsd_2) self.assertTrue(tsd_1 in test_dataset) """
def segment(input_data, n_samples, overlap=1.0, datalist= 0): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 if datalist = 0 returns a DataSet object otherwise, returns a OrderedDataSet object """ from pyfusion.data.base import DataSet, OrderedDataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii,data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning("Data filter 'segment' not applied to item in dataset") return output_dataset #SH modification incase ordering is important... i.e you are doing #two processing two different arrays at the same time (in different Timeseries objects) #and you don't want to loose the time relationship between them if datalist: output_data = OrderedDataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) else: output_data = DataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) #SH : 24May2013 fixed bug here - before, the index was allowed to go past #the length of samples, gives smalled length data towards the end - fixed to finish the #last time we can get n_samples length #for el in arange(0,len(input_data.timebase), n_samples/overlap): for el in arange(0,len(input_data.timebase) - n_samples, n_samples/overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[el:el+n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[:,el:el+n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() if datalist: output_data.append(tmp_data) else: output_data.add(tmp_data) return output_data
def normalise(input_data, method=None, separate=False): """ method=None -> default, method=0 -> DON'T normalise """ from numpy import mean, sqrt, max, abs, var, atleast_2d from pyfusion.data.base import DataSet # this allows method='0'(or 0) to prevent normalisation for cleaner code # elsewhere if pyfusion.DEBUG>3: print('separate = %d' % (separate)) if (method == 0) or (method == '0'): return(input_data) if (method == None) or (method.lower() == "none"): method='rms' if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_normalise") for d in input_data: output_dataset.add(normalise(d, method=method, separate=separate)) return output_dataset if method.lower() in ['rms', 'r']: if input_data.signal.ndim == 1: norm_value = sqrt(mean(input_data.signal**2)) else: rms_vals = sqrt(mean(input_data.signal**2, axis=1)) if separate == False: rms_vals = max(rms_vals) norm_value = atleast_2d(rms_vals).T elif method.lower() in ['peak', 'p']: if input_data.signal.ndim == 1: norm_value = abs(input_data.signal).max(axis=0) else: max_vals = abs(input_data.signal).max(axis=1) if separate == False: max_vals = max(max_vals) norm_value = atleast_2d(max_vals).T elif method.lower() in ['var', 'variance', 'v']: if input_data.signal.ndim == 1: norm_value = var(input_data.signal) else: var_vals = var(input_data.signal, axis=1) if separate == False: var_vals = max(var_vals) norm_value = atleast_2d(var_vals).T input_data.signal = input_data.signal / norm_value #print('norm_value = %s' % norm_value) norm_hist = ','.join(["{0:.2g}".format(v) for v in norm_value.flatten()]) input_data.history += "\n:: norm_value =[{0}]".format(norm_hist) input_data.history += ", method={0}, separate={1}".format(method, separate) input_data.scales = norm_value debug_(pyfusion.DEBUG, key='normalise',msg='about to return from normalise') return input_data
def subtract_mean(input_data): from pyfusion.data.base import DataSet if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_subtract_mean") for d in input_data: output_dataset.add(subtract_mean(d)) return output_dataset if input_data.signal.ndim == 1: mean_value = mean(input_data.signal) input_data.history += "\n:: mean_value\n%s" %(mean_value) else: mean_vector = mean(input_data.signal, axis=1) input_data.history += "\n:: mean_vector\n%s" %(mean_vector) mean_value = resize(repeat(mean_vector, input_data.signal.shape[1]), input_data.signal.shape) input_data.signal -= mean_value return input_data
def reduce_time(input_data, new_time_range, fftopt=False): """ reduce the time range of the input data in place(copy=False) or the returned Dataset (copy=True - default at present). if fftopt, then extend time if possible, or if not reduce it so that ffts run reasonably fast. Should consider moving this to actual filters? But this way users can obtain optimum fft even without filters. The fftopt is only visited when it is a dataset, and this isn't happening """ from pyfusion.data.base import DataSet if pyfusion.VERBOSE > 1: print('Entering reduce_time, fftopt={0}, isinst={1}'.format( fftopt, isinstance(input_data, DataSet))) pyfusion.logger.warning("Testing: can I see this?") if isinstance(input_data, DataSet): if fftopt: new_time_range = get_optimum_time_range(input_data, new_time_range) #output_dataset = input_data.copy() #output_dataset.clear() print('****new time range={n}'.format(n=new_time_range)) output_dataset = DataSet(input_data.label + '_reduce_time') for data in input_data: try: output_dataset.append(data.reduce_time(new_time_range)) except AttributeError: pyfusion.logger.warning( "Data filter 'reduce_time' not applied to item in dataset") return output_dataset #??? this should not need to be here - should only be called from # above when passed as a dataset (more efficient) if fftopt: new_time_range = get_optimum_time_range(input_data, new_time_range) new_time_args = searchsorted(input_data.timebase, new_time_range) input_data.timebase = input_data.timebase[ new_time_args[0]:new_time_args[1]] if input_data.signal.ndim == 1: input_data.signal = input_data.signal[ new_time_args[0]:new_time_args[1]] else: input_data.signal = input_data.signal[:, new_time_args[0]: new_time_args[1]] if pyfusion.VERBOSE > 1: print( 'reduce_time to length {l}'.format(l=np.shape(input_data.signal))), return input_data
def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 n_samples < 1 implies a time interval, which is adjusted to suit fft otherwise n_samples is taken literally and not adjusted. fractional n_samples>1 allows the step size to be fine-tuned. """ from .base import DataSet from .timeseries import TimeseriesData if n_samples < 1: dt = np.average(np.diff(input_data.timebase)) n_samples = next_nice_number(n_samples / dt) if pyfusion.VERBOSE > 0: print('used {n} sample segments'.format(n=n_samples)) if isinstance(input_data, DataSet): output_dataset = DataSet() for ii, data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning( "Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' % (datetime.now(), n_samples, overlap)) # python3 check this ## for el in range(0,len(input_data.timebase), int(n_samples/overlap)): ## prior to python3 was for el in arange(0,len(input_data.timebase), n_samples/overlap): # this FP version allows locking to a precise frequency by use of a non-integral number of samples for el in arange(0, len(input_data.timebase), n_samples / float(overlap)): nsint = int(n_samples) el = int(el + 0.5) if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el + nsint], signal=input_data.signal[el:el + nsint], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData( timebase=input_data.timebase[el:el + nsint], signal=input_data.signal[:, el:el + nsint], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def test_ORM_flucstrucs(self): """ check that flucstrucs can be saved to database""" n_ch = 10 n_samples = 1024 multichannel_data = get_multimode_test_data( channels=get_n_channels(n_ch), timebase=Timebase(np.arange(n_samples) * 1.e-6), noise=0.01) # produce a dataset of flucstrucs #print ">> ", multichannel_data.channels fs_data = multichannel_data.flucstruc(min_dphase=-2 * np.pi) print type(fs_data) #print list(fs_data)[0].dphase[0].channel_1 #print '---' # save our dataset to the database fs_data.save() if pyfusion.orm_manager.IS_ACTIVE: session = pyfusion.orm_manager.Session() d1 = DataSet('test_dataset_1') d1.save() d2 = DataSet('test_dataset_2') d2.save() # get our dataset from database our_dataset = session.query(DataSet).order_by("id").first() self.assertEqual(our_dataset.created, fs_data.created) self.assertEqual(len([i for i in our_dataset.data]), len(our_dataset)) #check flucstrucs have freq, t0 and d_phase.. #for i in our_dataset.data: # print i #print 'w' #assert False #our guinea pig flucstruc: test_fs = our_dataset.pop() self.assertTrue(isinstance(test_fs.freq, float)) self.assertTrue(isinstance(test_fs.t0, float)) # now, are the phase data correct? self.assertTrue(isinstance(test_fs.dphase, BaseOrderedDataSet)) self.assertEqual(len(test_fs.dphase), n_ch - 1) # what if we close the session and try again? session.close() session = pyfusion.orm_manager.Session() ds_again = session.query(DataSet).order_by("id").first() fs_again = list(ds_again)[0] """
def reduce_time(input_data, new_time_range): from pyfusion.data.base import DataSet if isinstance(input_data, DataSet): #output_dataset = input_data.copy() #output_dataset.clear() output_dataset = DataSet(input_data.label+'_reduce_time') for data in input_data: try: output_dataset.append(data.reduce_time(new_time_range)) except AttributeError: pyfusion.logger.warning("Data filter 'reduce_time' not applied to item in dataset") return output_dataset new_time_args = searchsorted(input_data.timebase, new_time_range) input_data.timebase =input_data.timebase[new_time_args[0]:new_time_args[1]] if input_data.signal.ndim == 1: input_data.signal = input_data.signal[new_time_args[0]:new_time_args[1]] else: input_data.signal = input_data.signal[:,new_time_args[0]:new_time_args[1]] return input_data
def test_single_channel_fakedata(self): test_acq = FakeDataAcquisition('test_fakedata') channel_data = test_acq.getdata(self.shot_number, "test_timeseries_channel_2") channel_data_norm_no_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise() channel_data_rms_norm_by_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise(method='rms') channel_data_peak_norm_by_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise(method='peak') channel_data_var_norm_by_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise(method='var') rms_value = np.sqrt(np.mean(channel_data.signal**2)) peak_value = max(abs(channel_data.signal)) var_value = np.var(channel_data.signal) assert_array_almost_equal(channel_data.signal/rms_value, channel_data_rms_norm_by_arg.signal) assert_array_almost_equal(channel_data.signal/peak_value, channel_data_peak_norm_by_arg.signal) assert_array_almost_equal(channel_data.signal/var_value, channel_data_var_norm_by_arg.signal) # check that default is peak assert_array_almost_equal(channel_data_peak_norm_by_arg.signal, channel_data_norm_no_arg.signal) # try for dataset channel_data_for_set = test_acq.getdata(self.shot_number, "test_timeseries_channel_2") test_dataset = DataSet('test_dataset') test_dataset.add(channel_data_for_set) test_dataset.normalise(method='rms') for d in test_dataset: assert_array_almost_equal(channel_data.signal/rms_value, d.signal)
def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 """ from pyfusion.data.base import DataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii,data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning("Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) for el in arange(0,len(input_data.timebase), n_samples/overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[el:el+n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[:,el:el+n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def normalise(input_data, method='peak', separate=False): from numpy import mean, sqrt, max, abs, var, atleast_2d from pyfusion.data.base import DataSet # this allows method='0'(or 0) to prevent normalisation for cleaner code # elsewhere if (method == 0) or (method == '0'): return(input_data) if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_normalise") for d in input_data: output_dataset.add(normalise(d, method=method, separate=separate)) return output_dataset if method.lower() in ['rms', 'r']: if input_data.signal.ndim == 1: norm_value = sqrt(mean(input_data.signal**2)) else: rms_vals = sqrt(mean(input_data.signal**2, axis=1)) if separate == False: rms_vals = max(rms_vals) norm_value = atleast_2d(rms_vals).T elif method.lower() in ['peak', 'p']: if input_data.signal.ndim == 1: norm_value = abs(input_data.signal).max(axis=0) else: max_vals = abs(input_data.signal).max(axis=1) if separate == False: max_vals = max(max_vals) norm_value = atleast_2d(max_vals).T elif method.lower() in ['var', 'variance', 'v']: if input_data.signal.ndim == 1: norm_value = var(input_data.signal) else: var_vals = var(input_data.signal, axis=1) if separate == False: var_vals = max(var_vals) norm_value = atleast_2d(var_vals).T input_data.signal = input_data.signal / norm_value #print('norm_value = %s' % norm_value) input_data.scales = norm_value return input_data
def reduce_time(input_data, new_time_range, fftopt=False): """ reduce the time range of the input data in place(copy=False) or the returned Dataset (copy=True - default at present). if fftopt, then extend time if possible, or if not reduce it so that ffts run reasonably fast. Should consider moving this to actual filters? But this way users can obtain optimum fft even without filters. The fftopt is only visited when it is a dataset, and this isn't happening """ from pyfusion.data.base import DataSet if pyfusion.VERBOSE>1: print('Entering reduce_time, fftopt={0}, isinst={1}' .format(fftopt,isinstance(input_data, DataSet) )) pyfusion.logger.warning("Testing: can I see this?") if isinstance(input_data, DataSet): if fftopt: new_time_range = get_optimum_time_range(input_data, new_time_range) #output_dataset = input_data.copy() #output_dataset.clear() print('****new time range={n}'.format(n=new_time_range)) output_dataset = DataSet(input_data.label+'_reduce_time') for data in input_data: try: output_dataset.append(data.reduce_time(new_time_range)) except AttributeError: pyfusion.logger.warning("Data filter 'reduce_time' not applied to item in dataset") return output_dataset #??? this should not need to be here - should only be called from # above when passed as a dataset (more efficient) if fftopt: new_time_range = get_optimum_time_range(input_data, new_time_range) new_time_args = searchsorted(input_data.timebase, new_time_range) input_data.timebase =input_data.timebase[new_time_args[0]:new_time_args[1]] if input_data.signal.ndim == 1: input_data.signal = input_data.signal[new_time_args[0]:new_time_args[1]] else: input_data.signal = input_data.signal[:,new_time_args[0]:new_time_args[1]] if pyfusion.VERBOSE>1: print('reduce_time to length {l}' .format(l=np.shape(input_data.signal))), return input_data
def test_reduce_time_dataset(self): new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=get_n_channels(5)) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb))+1,(5,len(tb)))), channels=get_n_channels(5)) test_dataset = DataSet('test_dataset') test_dataset.add(tsd_1) test_dataset.add(tsd_2) test_dataset.reduce_time(new_times)
def flucstruc(input_data, min_dphase=-pi, group=fs_group_geometric, method='rms', separate=True, label=None, segment=0, segment_overlap=DEFAULT_SEGMENT_OVERLAP): """If segment is 0, then we dont segment the data (assume already done)""" from pyfusion.data.base import DataSet from pyfusion.data.timeseries import FlucStruc if label: fs_dataset = DataSet(label) else: fs_dataset = DataSet('flucstrucs_%s' % datetime.now()) if segment > 0: for seg in input_data.segment(segment, overlap=segment_overlap): fs_dataset.update( seg.flucstruc(min_dphase=min_dphase, group=group, method=method, separate=separate, label=label, segment=0)) return fs_dataset svd_data = input_data.subtract_mean().normalise(method, separate).svd() for fs_gr in group(svd_data): tmp = FlucStruc(svd_data, fs_gr, input_data.timebase, min_dphase=min_dphase, phase_pairs=input_data.__dict__.get( "phase_pairs", None)) tmp.meta = input_data.meta tmp.history = svd_data.history tmp.scales = svd_data.scales tmp.norm_method = svd_data.norm_method fs_dataset.add(tmp) return fs_dataset
def test_ORM_flucstrucs(self): """ check that flucstrucs can be saved to database""" n_ch = 10 n_samples = 1024 multichannel_data = get_multimode_test_data(channels=get_n_channels(n_ch), timebase = Timebase(np.arange(n_samples)*1.e-6), noise = 0.01) # produce a dataset of flucstrucs #print ">> ", multichannel_data.channels fs_data = multichannel_data.flucstruc(min_dphase = -2*np.pi) print type(fs_data) #print list(fs_data)[0].dphase[0].channel_1 #print '---' # save our dataset to the database fs_data.save() if pyfusion.orm_manager.IS_ACTIVE: session = pyfusion.orm_manager.Session() d1 = DataSet('test_dataset_1') d1.save() d2 = DataSet('test_dataset_2') d2.save() # get our dataset from database our_dataset = session.query(DataSet).order_by("id").first() self.assertEqual(our_dataset.created, fs_data.created) self.assertEqual(len([i for i in our_dataset.data]), len(our_dataset)) #check flucstrucs have freq, t0 and d_phase.. #for i in our_dataset.data: # print i #print 'w' #assert False #our guinea pig flucstruc: test_fs = our_dataset.pop() self.assertTrue(isinstance(test_fs.freq, float)) self.assertTrue(isinstance(test_fs.t0, float)) # now, are the phase data correct? self.assertTrue(isinstance(test_fs.dphase, BaseOrderedDataSet)) self.assertEqual(len(test_fs.dphase), n_ch-1) # what if we close the session and try again? session.close() session = pyfusion.orm_manager.Session() ds_again = session.query(DataSet).order_by("id").first() fs_again = list(ds_again)[0] """
def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 """ from pyfusion.data.base import DataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii, data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning( "Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' % (datetime.now(), n_samples, overlap)) for el in arange(0, len(input_data.timebase), n_samples / overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData( timebase=input_data.timebase[el:el + n_samples], signal=input_data.signal[el:el + n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData( timebase=input_data.timebase[el:el + n_samples], signal=input_data.signal[:, el:el + n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def flucstruc(input_data, min_dphase = -pi, group=fs_group_geometric, method='rms', separate=True, label=None): from pyfusion.data.base import DataSet from pyfusion.data.timeseries import FlucStruc if label: fs_dataset = DataSet(label) else: fs_dataset = DataSet('flucstrucs_%s' %datetime.now()) svd_data = input_data.subtract_mean().normalise(method, separate).svd() for fs_gr in group(svd_data): tmp = FlucStruc(svd_data, fs_gr, input_data.timebase, min_dphase=min_dphase) tmp.meta = input_data.meta fs_dataset.add(tmp) return fs_dataset
def flucstruc(input_data, min_dphase = -pi, group=fs_group_geometric, method='rms', separate=True, label=None, segment=0, segment_overlap=DEFAULT_SEGMENT_OVERLAP): """If segment is 0, then we dont segment the data (assume already done)""" from pyfusion.data.base import DataSet from pyfusion.data.timeseries import FlucStruc if label: fs_dataset = DataSet(label) else: fs_dataset = DataSet('flucstrucs_%s' %datetime.now()) if segment > 0: for seg in input_data.segment(segment, overlap=segment_overlap): fs_dataset.update(seg.flucstruc(min_dphase=min_dphase, group=group, method=method, separate=separate, label=label, segment=0)) return fs_dataset svd_data = input_data.subtract_mean().normalise(method, separate).svd() for fs_gr in group(svd_data): tmp = FlucStruc(svd_data, fs_gr, input_data.timebase, min_dphase=min_dphase, phase_pairs=input_data.__dict__.get("phase_pairs",None)) tmp.meta = input_data.meta tmp.history = svd_data.history tmp.scales = svd_data.scales fs_dataset.add(tmp) return fs_dataset
def reduce_time(input_data, new_time_range, fftopt=0, copy=True): """ reduce the time range of the input data in place(copy=False) or the returned Dataset (copy=True - default at present). if fftopt, then extend time if possible, or if not reduce it so that ffts run reasonably fast. Should consider moving this to actual filters? But this way users can obtain optimum fft even without filters. The fftopt is only visited when it is a dataset, and this isn't happening """ if not copy: raise ValueError("copy=False not supported now") from pyfusion.data.base import DataSet if pyfusion.VERBOSE > 1: print('Entering reduce_time, fftopt={0}, isinst={1}'.format( fftopt, isinstance(input_data, DataSet))) pyfusion.logger.warning("Testing: can I see this?") # not clear that this helps if (np.nanmin(input_data.timebase) >= new_time_range[0] and np.nanmax(input_data.timebase) <= new_time_range[1]): print('time range is already reduced') return (input_data) if fftopt > 0: new_time_range = get_optimum_time_range(input_data, new_time_range, fftopt) # the -1 option doesn't really do what I want yet elif fftopt < 0: new_time_range = get_optimum_time_range(input_data, new_time_range, try_more=fftopt) if isinstance(input_data, DataSet): # if copy: output_dataset = input_data.copy() # output_dataset.clear() print('****new time range={n}'.format(n=new_time_range)) output_dataset = DataSet(input_data.label + '_reduce_time') for data in input_data: try: output_dataset.append(data.reduce_time(new_time_range)) except AttributeError: pyfusion.logger.warning( "Data filter 'reduce_time' not applied to item in dataset") return output_dataset # else: this is effectively a matching 'else' - omit to save indentation # ??? this should not need to be here - should only be called from # above when passed as a dataset (more efficient) new_time_args = searchsorted(input_data.timebase, new_time_range) input_data.timebase = input_data.timebase[ new_time_args[0]:new_time_args[1]] if input_data.signal.ndim == 1: input_data.signal = input_data.signal[ new_time_args[0]:new_time_args[1]] else: input_data.signal = input_data.signal[:, new_time_args[0]: new_time_args[1]] if pyfusion.VERBOSE > 1: print( 'reduce_time to length {l}'.format(l=np.shape(input_data.signal))), if len(input_data.signal) == 0: raise LookupError('no samples in time_range of {trg} in {nm}'.format( trg=str(new_time_range), nm=input_data.config_name)) return input_data