def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 """ from pyfusion.data.base import DataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii,data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning("Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) for el in arange(0,len(input_data.timebase), n_samples/overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[el:el+n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[:,el:el+n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def test_remove_noncontiguous(self): tb1 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tb2 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tb3 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) # nonzero signal mean tsd1 = TimeseriesData(timebase=tb1, signal=Signal(np.arange(len(tb1))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) tsd2 = TimeseriesData(timebase=tb2, signal=Signal(np.arange(len(tb2))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) tsd3 = TimeseriesData(timebase=tb3, signal=Signal(np.arange(len(tb3))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) self.assertTrue(tb1.is_contiguous()) self.assertTrue(tb2.is_contiguous()) self.assertTrue(tb3.is_contiguous()) tsd2.timebase[-50:] += 1.0 self.assertFalse(tb2.is_contiguous()) ds = DataSet('ds') for tsd in [tsd1, tsd2, tsd3]: ds.add(tsd) for tsd in [tsd1, tsd2, tsd3]: self.assertTrue(tsd in ds) filtered_ds = ds.remove_noncontiguous() for tsd in [tsd1, tsd3]: self.assertTrue(tsd in filtered_ds) self.assertFalse(tsd2 in filtered_ds)
def test_single_channel_fakedata(self): test_acq = FakeDataAcquisition('test_fakedata') channel_data = test_acq.getdata(self.shot_number, "test_timeseries_channel_2") channel_data_norm_no_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise() channel_data_rms_norm_by_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise(method='rms') channel_data_peak_norm_by_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise(method='peak') channel_data_var_norm_by_arg = test_acq.getdata(self.shot_number, "test_timeseries_channel_2").normalise(method='var') rms_value = np.sqrt(np.mean(channel_data.signal**2)) peak_value = max(abs(channel_data.signal)) var_value = np.var(channel_data.signal) assert_array_almost_equal(channel_data.signal/rms_value, channel_data_rms_norm_by_arg.signal) assert_array_almost_equal(channel_data.signal/peak_value, channel_data_peak_norm_by_arg.signal) assert_array_almost_equal(channel_data.signal/var_value, channel_data_var_norm_by_arg.signal) # check that default is peak assert_array_almost_equal(channel_data_peak_norm_by_arg.signal, channel_data_norm_no_arg.signal) # try for dataset channel_data_for_set = test_acq.getdata(self.shot_number, "test_timeseries_channel_2") test_dataset = DataSet('test_dataset') test_dataset.add(channel_data_for_set) test_dataset.normalise(method='rms') for d in test_dataset: assert_array_almost_equal(channel_data.signal/rms_value, d.signal)
def normalise(input_data, method=None, separate=False): """ method=None -> default, method=0 -> DON'T normalise """ from numpy import mean, sqrt, max, abs, var, atleast_2d from pyfusion.data.base import DataSet # this allows method='0'(or 0) to prevent normalisation for cleaner code # elsewhere if pyfusion.DBG() > 3: print('separate = %d' % (separate)) if (method == 0) or (method == '0'): return (input_data) if (method is None) or (method.lower() == "none"): method = 'rms' if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label + "_normalise") for d in input_data: output_dataset.add(normalise(d, method=method, separate=separate)) return output_dataset if method.lower() in ['rms', 'r']: if input_data.signal.ndim == 1: norm_value = sqrt(mean(input_data.signal**2)) else: rms_vals = sqrt(mean(input_data.signal**2, axis=1)) if separate == False: rms_vals = max(rms_vals) norm_value = atleast_2d(rms_vals).T elif method.lower() in ['peak', 'p']: if input_data.signal.ndim == 1: norm_value = abs(input_data.signal).max(axis=0) else: max_vals = abs(input_data.signal).max(axis=1) if separate == False: max_vals = max(max_vals) norm_value = atleast_2d(max_vals).T elif method.lower() in ['var', 'variance', 'v']: # this is strange because it over-compensates - if we use sqrt(var()) it would be the same as RMS if input_data.signal.ndim == 1: norm_value = var(input_data.signal) else: var_vals = var(input_data.signal, axis=1) if separate == False: var_vals = max(var_vals) norm_value = atleast_2d(var_vals).T input_data.signal = input_data.signal / norm_value #print('norm_value = %s' % norm_value) norm_hist = ','.join( ["{0:.2g}".format(float(v)) for v in norm_value.flatten()]) input_data.history += "\n:: norm_value =[{0}]".format(norm_hist) input_data.history += ", method={0}, separate={1}".format(method, separate) input_data.scales = norm_value input_data.norm_method = method + ':' + ['all', 'sep'][separate] debug_(pyfusion.DEBUG, level=2, key='normalise', msg='about to return from normalise') return input_data
def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 n_samples < 1 implies a time interval, which is adjusted to suit fft otherwise n_samples is taken literally and not adjusted. fractional n_samples>1 allows the step size to be fine-tuned. """ from .base import DataSet from .timeseries import TimeseriesData if n_samples < 1: dt = np.average(np.diff(input_data.timebase)) n_samples = next_nice_number(n_samples / dt) if pyfusion.VERBOSE > 0: print('used {n} sample segments'.format(n=n_samples)) if isinstance(input_data, DataSet): output_dataset = DataSet() for ii, data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning( "Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' % (datetime.now(), n_samples, overlap)) # python3 check this ## for el in range(0,len(input_data.timebase), int(n_samples/overlap)): ## prior to python3 was for el in arange(0,len(input_data.timebase), n_samples/overlap): # this FP version allows locking to a precise frequency by use of a non-integral number of samples for el in arange(0, len(input_data.timebase), n_samples / float(overlap)): nsint = int(n_samples) el = int(el + 0.5) if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el + nsint], signal=input_data.signal[el:el + nsint], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData( timebase=input_data.timebase[el:el + nsint], signal=input_data.signal[:, el:el + nsint], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def test_reduce_time_dataset(self): new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=get_n_channels(5)) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb))+1,(5,len(tb)))), channels=get_n_channels(5)) test_dataset = DataSet('test_dataset') test_dataset.add(tsd_1) test_dataset.add(tsd_2) test_dataset.reduce_time(new_times)
def test_dataset(self): tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)), (3,len(tb)))), channels=get_n_channels(3)) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)+1),(3,len(tb)))), channels=get_n_channels(3)) input_dataset = DataSet('test_dataset') input_dataset.add(tsd_1) input_dataset.add(tsd_2) seg_dataset = input_dataset.segment(n_samples=10) self.assertTrue(len(seg_dataset)==20)
def subtract_mean(input_data): from pyfusion.data.base import DataSet if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_subtract_mean") for d in input_data: output_dataset.add(subtract_mean(d)) return output_dataset if input_data.signal.ndim == 1: mean_value = mean(input_data.signal) else: mean_vector = mean(input_data.signal, axis=1) mean_value = resize(repeat(mean_vector, input_data.signal.shape[1]), input_data.signal.shape) input_data.signal -= mean_value return input_data
def test_dataset(self): ch=get_n_channels(5) new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=ch) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb))+1, (5,len(tb)))), channels=ch) test_dataset = DataSet('test_ds_1') test_dataset.add(tsd_1) test_dataset.add(tsd_2) self.assertTrue(tsd_1 in test_dataset) """
def normalise(input_data, method=None, separate=False): """ method=None -> default, method=0 -> DON'T normalise """ from numpy import mean, sqrt, max, abs, var, atleast_2d from pyfusion.data.base import DataSet # this allows method='0'(or 0) to prevent normalisation for cleaner code # elsewhere if pyfusion.DEBUG>3: print('separate = %d' % (separate)) if (method == 0) or (method == '0'): return(input_data) if (method == None) or (method.lower() == "none"): method='rms' if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_normalise") for d in input_data: output_dataset.add(normalise(d, method=method, separate=separate)) return output_dataset if method.lower() in ['rms', 'r']: if input_data.signal.ndim == 1: norm_value = sqrt(mean(input_data.signal**2)) else: rms_vals = sqrt(mean(input_data.signal**2, axis=1)) if separate == False: rms_vals = max(rms_vals) norm_value = atleast_2d(rms_vals).T elif method.lower() in ['peak', 'p']: if input_data.signal.ndim == 1: norm_value = abs(input_data.signal).max(axis=0) else: max_vals = abs(input_data.signal).max(axis=1) if separate == False: max_vals = max(max_vals) norm_value = atleast_2d(max_vals).T elif method.lower() in ['var', 'variance', 'v']: if input_data.signal.ndim == 1: norm_value = var(input_data.signal) else: var_vals = var(input_data.signal, axis=1) if separate == False: var_vals = max(var_vals) norm_value = atleast_2d(var_vals).T input_data.signal = input_data.signal / norm_value #print('norm_value = %s' % norm_value) norm_hist = ','.join(["{0:.2g}".format(v) for v in norm_value.flatten()]) input_data.history += "\n:: norm_value =[{0}]".format(norm_hist) input_data.history += ", method={0}, separate={1}".format(method, separate) input_data.scales = norm_value debug_(pyfusion.DEBUG, key='normalise',msg='about to return from normalise') return input_data
def flucstruc(input_data, min_dphase = -pi, group=fs_group_geometric, method='rms', separate=True, label=None): from pyfusion.data.base import DataSet from pyfusion.data.timeseries import FlucStruc if label: fs_dataset = DataSet(label) else: fs_dataset = DataSet('flucstrucs_%s' %datetime.now()) svd_data = input_data.subtract_mean().normalise(method, separate).svd() for fs_gr in group(svd_data): tmp = FlucStruc(svd_data, fs_gr, input_data.timebase, min_dphase=min_dphase) tmp.meta = input_data.meta fs_dataset.add(tmp) return fs_dataset
def subtract_mean(input_data): from pyfusion.data.base import DataSet if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_subtract_mean") for d in input_data: output_dataset.add(subtract_mean(d)) return output_dataset if input_data.signal.ndim == 1: mean_value = mean(input_data.signal) input_data.history += "\n:: mean_value\n%s" %(mean_value) else: mean_vector = mean(input_data.signal, axis=1) input_data.history += "\n:: mean_vector\n%s" %(mean_vector) mean_value = resize(repeat(mean_vector, input_data.signal.shape[1]), input_data.signal.shape) input_data.signal -= mean_value return input_data
def flucstruc(input_data, min_dphase=-pi, group=fs_group_geometric, method='rms', separate=True, label=None, segment=0, segment_overlap=DEFAULT_SEGMENT_OVERLAP): """If segment is 0, then we dont segment the data (assume already done)""" from pyfusion.data.base import DataSet from pyfusion.data.timeseries import FlucStruc if label: fs_dataset = DataSet(label) else: fs_dataset = DataSet('flucstrucs_%s' % datetime.now()) if segment > 0: for seg in input_data.segment(segment, overlap=segment_overlap): fs_dataset.update( seg.flucstruc(min_dphase=min_dphase, group=group, method=method, separate=separate, label=label, segment=0)) return fs_dataset svd_data = input_data.subtract_mean().normalise(method, separate).svd() for fs_gr in group(svd_data): tmp = FlucStruc(svd_data, fs_gr, input_data.timebase, min_dphase=min_dphase, phase_pairs=input_data.__dict__.get( "phase_pairs", None)) tmp.meta = input_data.meta tmp.history = svd_data.history tmp.scales = svd_data.scales tmp.norm_method = svd_data.norm_method fs_dataset.add(tmp) return fs_dataset
def normalise(input_data, method='peak', separate=False): from numpy import mean, sqrt, max, abs, var, atleast_2d from pyfusion.data.base import DataSet # this allows method='0'(or 0) to prevent normalisation for cleaner code # elsewhere if (method == 0) or (method == '0'): return(input_data) if isinstance(input_data, DataSet): output_dataset = DataSet(input_data.label+"_normalise") for d in input_data: output_dataset.add(normalise(d, method=method, separate=separate)) return output_dataset if method.lower() in ['rms', 'r']: if input_data.signal.ndim == 1: norm_value = sqrt(mean(input_data.signal**2)) else: rms_vals = sqrt(mean(input_data.signal**2, axis=1)) if separate == False: rms_vals = max(rms_vals) norm_value = atleast_2d(rms_vals).T elif method.lower() in ['peak', 'p']: if input_data.signal.ndim == 1: norm_value = abs(input_data.signal).max(axis=0) else: max_vals = abs(input_data.signal).max(axis=1) if separate == False: max_vals = max(max_vals) norm_value = atleast_2d(max_vals).T elif method.lower() in ['var', 'variance', 'v']: if input_data.signal.ndim == 1: norm_value = var(input_data.signal) else: var_vals = var(input_data.signal, axis=1) if separate == False: var_vals = max(var_vals) norm_value = atleast_2d(var_vals).T input_data.signal = input_data.signal / norm_value #print('norm_value = %s' % norm_value) input_data.scales = norm_value return input_data
def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 """ from pyfusion.data.base import DataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii, data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning( "Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' % (datetime.now(), n_samples, overlap)) for el in arange(0, len(input_data.timebase), n_samples / overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData( timebase=input_data.timebase[el:el + n_samples], signal=input_data.signal[el:el + n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData( timebase=input_data.timebase[el:el + n_samples], signal=input_data.signal[:, el:el + n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def flucstruc(input_data, min_dphase = -pi, group=fs_group_geometric, method='rms', separate=True, label=None, segment=0, segment_overlap=DEFAULT_SEGMENT_OVERLAP): """If segment is 0, then we dont segment the data (assume already done)""" from pyfusion.data.base import DataSet from pyfusion.data.timeseries import FlucStruc if label: fs_dataset = DataSet(label) else: fs_dataset = DataSet('flucstrucs_%s' %datetime.now()) if segment > 0: for seg in input_data.segment(segment, overlap=segment_overlap): fs_dataset.update(seg.flucstruc(min_dphase=min_dphase, group=group, method=method, separate=separate, label=label, segment=0)) return fs_dataset svd_data = input_data.subtract_mean().normalise(method, separate).svd() for fs_gr in group(svd_data): tmp = FlucStruc(svd_data, fs_gr, input_data.timebase, min_dphase=min_dphase, phase_pairs=input_data.__dict__.get("phase_pairs",None)) tmp.meta = input_data.meta tmp.history = svd_data.history tmp.scales = svd_data.scales fs_dataset.add(tmp) return fs_dataset