Пример #1
0
 def test_abs(self):
     a = TimeSeries([ (1, -3), (2, 3.3), (3, -5) ])
     a = abs(a)
     self.assertTrue(isinstance(a, TimeSeries))
     self.assertListEqual(a.values, [ 3, 3.3, 5 ])
Пример #2
0
        wave,
        location='/Users/ireland/Data/AIA_Data/shutdownfun/' + choice,
        derotate=False)

#wave = '171'
#dc, location = aia_specific.rn4(wave, Xrange=[-201, -1])

# Get some properties of the datacube
ny = dc.shape[0]
nx = dc.shape[1]
nt = dc.shape[2]

# Create a time series object
dt = 12.0
t = dt * np.arange(0, nt)
tsdummy = TimeSeries(t, t)
iobs = np.zeros(tsdummy.PowerSpectrum.Npower.shape)
logiobs = np.zeros(tsdummy.PowerSpectrum.Npower.shape)
nposfreq = len(iobs)

# Result # 1 - add up all the emission and do the analysis on the full FOV
# Also, make a histogram of all the power spectra to get an idea of the
# varition present

# Sum over all the spatial locations
full_data = sum_over_space(dc)

#
#full_data = tsutils.fix_nonfinite(dc[10, 10, :])

# Average emission over all the data
Пример #3
0
    def get_event_data(self,
                       channels,
                       events,
                       start_time,
                       end_time,
                       buffer_time=0.0,
                       resampled_rate=None,
                       filt_freq=None,
                       filt_type='stop',
                       filt_order=4,
                       keep_buffer=False,
                       loop_axis=None,
                       num_mp_procs=0,
                       eoffset='eoffset',
                       eoffset_in_time=True):
        """
        Return an TimeSeries containing data for the specified channel
        in the form [events,duration].

        Parameters
        ----------
        channels: {int} or {dict}
            Channels from which to load data.
        events: {array_like} or {recarray}
            Array/list of event offsets (in time or samples as
            specified by eoffset_in_time; in time by default) into
            the data, specifying each event onset time.
        start_time: {float}
            Start of epoch to retrieve (in time-unit of the data).
        end_time: {float}
            End of epoch to retrieve (in time-unit of the data).
        buffer_time: {float},optional
            Extra buffer to add on either side of the event in order
            to avoid edge effects when filtering (in time unit of the
            data).
        resampled_rate: {float},optional
            New samplerate to resample the data to after loading.
        filt_freq: {array_like},optional
            The range of frequencies to filter (depends on the filter
            type.)
        filt_type = {scipy.signal.band_dict.keys()},optional
            Filter type.
        filt_order = {int},optional
            The order of the filter.
        keep_buffer: {boolean},optional
            Whether to keep the buffer when returning the data.
        eoffset_in_time: {boolean},optional        
            If True, the unit of the event offsets is taken to be
            time (unit of the data), otherwise samples.
        """

        # translate back to dur and offset
        dur = end_time - start_time
        offset = start_time
        buf = buffer_time

        # get the event offsets
        if ((not (hasattr(events, 'dtype') or hasattr(events, 'columns')))
                or (hasattr(events, 'dtype') and events.dtype.names is None)):
            # they just passed in a list
            event_offsets = events
        elif ((hasattr(events, 'dtype') and (eoffset in events.dtype.names))
              or (hasattr(events, 'columns') and (eoffset in events.columns))):
            event_offsets = events[eoffset]
        else:
            raise ValueError(eoffset + ' must be a valid fieldname ' +
                             'specifying the offset for the data.')

        # Sanity checks:
        if (dur < 0):
            raise ValueError('Duration must not be negative! ' +
                             'Specified duration: ' + str(dur))
        if (np.min(event_offsets) < 0):
            raise ValueError('Event offsets must not be negative!')

        # make sure the events are an actual array:
        event_offsets = np.asarray(event_offsets)
        if eoffset_in_time:
            # convert to samples
            event_offsets = np.atleast_1d(
                np.int64(np.round(event_offsets * self.samplerate)))

        # set event durations from rate
        # get the samplesize
        samplesize = 1. / self.samplerate

        # get the number of buffer samples
        buf_samp = int(np.ceil(buf / samplesize))

        # calculate the offset samples that contains the desired offset
        offset_samp = int(
            np.ceil((np.abs(offset) - samplesize * .5) / samplesize) *
            np.sign(offset))

        # finally get the duration necessary to cover the desired span
        #dur_samp = int(np.ceil((dur - samplesize*.5)/samplesize))
        dur_samp = (int(np.ceil(
            (dur + offset - samplesize * .5) / samplesize)) - offset_samp + 1)

        # add in the buffer
        dur_samp += 2 * buf_samp
        offset_samp -= buf_samp

        # check that we have all the data we need before every event:
        if (np.min(event_offsets + offset_samp) < 0):
            bad_evs = ((event_offsets + offset_samp) < 0)
            raise ValueError('The specified values for offset and buffer ' +
                             'require more data than is available before ' +
                             str(np.sum(bad_evs)) + ' of all ' +
                             str(len(bad_evs)) + ' events.')

        # process the channels
        if isinstance(channels, dict):
            # turn into indices
            ch_info = self.channels
            key = channels.keys()[0]
            channels = [
                np.nonzero(ch_info[key] == c)[0][0] for c in channels[key]
            ]
        elif isinstance(channels, str):
            # find that channel by name
            channels = np.nonzero(self.channels['name'] == channels)[0][0]
        if channels is None or len(np.atleast_1d(channels)) == 0:
            channels = np.arange(self.nchannels)
        channels = np.atleast_1d(channels)
        channels.sort()

        # load the timeseries (this must be implemented by subclasses)
        eventdata = self._load_data(channels, event_offsets, dur_samp,
                                    offset_samp)

        # calc the time range
        # get the samplesize
        samp_start = offset_samp * samplesize
        samp_end = samp_start + (dur_samp - 1) * samplesize
        time_range = np.linspace(samp_start, samp_end, dur_samp)

        # when channels is an array of channels labels i.e. strings like  '002','003',...
        # we need to use xray arrays to do fancy indexing
        if channels.dtype.char == 'S':
            try:
                # perhaps we should vectorize it...
                selector_array = [
                    np.where(self.channels.name == channel)[0][0]
                    for channel in channels
                ]

                selected_channels = self.channels[selector_array]

                # from xray import DataArray
                # self.channels_xray = DataArray(self.channels.number,coords=[self.channels.name],dims=['name'])
                # self.channels_xray = self.channels_xray.loc[channels]
                #
                # self.channels_xray=np.rec.fromarrays([self.channels_xray.values,self.channels_xray.coords['name'].values],names='number,name')

            except ImportError:
                pass

            dims = [
                Dim(selected_channels, 'channels'),  # can index into channels
                Dim(events, 'events'),
                Dim(time_range, 'time')
            ]

            # dims = [Dim(self.channels_xray,'channels'),  # can index into channels
            #         Dim(events,'events'),
            #         Dim(time_range,'time')]

        else:

            # make it a timeseries
            # ORIGINAL CODE
            dims = [
                Dim(self.channels[channels],
                    'channels'),  # can index into channels
                Dim(events, 'events'),
                Dim(time_range, 'time')
            ]

        # # make it a timeseries
        # dims = [Dim(self.channels[channels],'channels'),  # can index into channels
        #         Dim(events,'events'),
        #         Dim(time_range,'time')]

        eventdata = TimeSeries(np.asarray(eventdata),
                               'time',
                               self.samplerate,
                               dims=dims)

        # filter if desired
        if not (filt_freq is None):
            # filter that data
            eventdata = eventdata.filtered(filt_freq,
                                           filt_type=filt_type,
                                           order=filt_order)

# resample if desired
        if (not (resampled_rate is None)
                and not (resampled_rate == eventdata.samplerate)):
            # resample the data
            eventdata = eventdata.resampled(resampled_rate,
                                            loop_axis=loop_axis,
                                            num_mp_procs=num_mp_procs)

        # remove the buffer and set the time range
        if buf > 0 and not (keep_buffer):
            # remove the buffer
            eventdata = eventdata.remove_buffer(buf)

        # return the timeseries
        return eventdata
Пример #4
0
app = Flask(__name__)

database_info = {
    "url": "localhost",
    "port": "27017",
    "database": "Sibyl"
}
label = "analyzer"
db_manager = DatabaseManager(database_info, label)

global pewma
global cl
global ts
cl = StaticControlLimits()
pewma_model = Pewma()
ts = TimeSeries()


@app.route('/static_control_limits', methods=['POST'])
def static_control_limits():
    """ Function used to check if data is above set threshold

        Args:
            data (dict): the raw data

        Returns:
            dict
    """
    try:
        content = request.get_json()
        try:
# Create a time series object
dt = 12.0
t = dt * np.arange(0, nt)

# Fix the input datacube for any non-finite data
for i in range(0, nx):
    for j in range(0, ny):
        d = dc[j, i, :].flatten()
        # Fix the data for any non-finite entries
        d = tsutils.fix_nonfinite(d)
        # Remove the mean
        d = d - np.mean(d)
        dc[j, i, :] = d

# Time series datacube
dcts = TimeSeries(t, dc)
dcts.name = 'AIA ' + str(wave) + '-' + stype + ': ' + location

# Get the Fourier power
pwr = dcts.ppower

# Arithmetic mean of the Fourier power
iobs = np.mean(pwr, axis=(0, 1))

# Sigma for the fit to the power
sigma = np.std(pwr, axis=(0, 1))

# Result # 1 - add up all the emission and do the analysis on the full FOV
# Also, make a histogram of all the power spectra to get an idea of the
# varition present
Пример #6
0
dt = 12.0
nt = 1800
period = 300.0
window = 31
np.random.seed(seed=1)

t = dt * np.arange(0, nt)

noise = np.random.normal(size=nt)

amplitude = 1.0
data1 = amplitude * np.sin(2 * np.pi * t / period) + noise + 10

data2 = tsutils.movingaverage(data1, window)

ts1 = TimeSeries(t, data1)

ts2 = TimeSeries(t, data2)

ts3 = TimeSeries(t, data1 - data2)

plt.figure(1)
plt.plot(t, data1, label='original time series')
plt.plot(t, data2, label='moving average')
plt.plot(t, data1 - data2, label='original - moving average')
plt.xlabel('time')
plt.ylabel('emission (arbitrary units)')
plt.legend()

window = window / 2
w1 = 1.0 / (2 * window + 1)
Пример #7
0
    def get_data(self,
                 channels,
                 start_time,
                 end_time,
                 buffer_time=0.0,
                 resampled_rate=None,
                 filt_freq=None,
                 filt_type='stop',
                 filt_order=4,
                 keep_buffer=False,
                 esrc='esrc',
                 eoffset='eoffset',
                 loop_axis=None,
                 num_mp_procs=0,
                 eoffset_in_time=True):
        """
        Return the requested range of data for each event by using the
        proper data retrieval mechanism for each event.

        Parameters
        ----------
        channels: {list,int,None}
            Channels from which to load data.
        start_time: {float}
            Start of epoch to retrieve (in time-unit of the data).
        end_time: {float}
            End of epoch to retrieve (in time-unit of the data).
        buffer_time: {float},optional
            Extra buffer to add on either side of the event in order
            to avoid edge effects when filtering (in time unit of the
            data).
        resampled_rate: {float},optional
            New samplerate to resample the data to after loading.
        filt_freq: {array_like},optional
            The range of frequencies to filter (depends on the filter
            type.)
        filt_type = {scipy.signal.band_dict.keys()},optional
            Filter type.
        filt_order = {int},optional
            The order of the filter.
        keep_buffer: {boolean},optional
            Whether to keep the buffer when returning the data.
        esrc : {string},optional
            Name for the field containing the source for the time
            series data corresponding to the event.
        eoffset: {string},optional
            Name for the field containing the offset (in seconds) for
            the event within the specified source.
        eoffset_in_time: {boolean},optional        
            If True, the unit of the event offsets is taken to be
            time (unit of the data), otherwise samples.
        
        Returns
        -------
        A TimeSeries instance with dimensions (channels,events,time).
        """

        # check for necessary fields
        if not (esrc in self.dtype.names and eoffset in self.dtype.names):
            raise ValueError(esrc + ' and ' + eoffset +
                             ' must be valid fieldnames ' +
                             'specifying source and offset for the data.')

# get ready to load dat
        eventdata = []
        events = []

        # speed up by getting unique event sources first
        usources = np.unique(self[esrc])

        # loop over unique sources
        eventdata = None
        for src in usources:
            # get the eventOffsets from that source
            ind = np.atleast_1d(self[esrc] == src)

            if len(ind) == 1:
                event_offsets = self[eoffset]
                events.append(self)
            else:
                event_offsets = self[ind][eoffset]
                events.append(self[ind])

            #print "Loading %d events from %s" % (ind.sum(),src)
            # get the timeseries for those events
            newdat = src.get_event_data(channels, event_offsets, start_time,
                                        end_time, buffer_time, resampled_rate,
                                        filt_freq, filt_type, filt_order,
                                        keep_buffer, loop_axis, num_mp_procs,
                                        eoffset, eoffset_in_time)
            if eventdata is None:
                eventdata = newdat
            else:
                eventdata = eventdata.extend(newdat, axis=1)

        # concatenate (must eventually check that dims match)
        tdim = eventdata['time']
        cdim = eventdata['channels']
        srate = eventdata.samplerate
        events = np.concatenate(events).view(self.__class__)
        eventdata = TimeSeries(eventdata,
                               'time',
                               srate,
                               dims=[cdim, Dim(events, 'events'), tdim])

        return eventdata
Пример #8
0
 def test_tuple_list_init(self):
     series = TimeSeries([ (1, 2), (3, 4), (5, 6) ])
     self.assertListEqual(series.timestamps, [1, 3, 5])
     self.assertListEqual(series.values, [2, 4, 6])
     self.assertEquals(len(series), 3)
Пример #9
0
 def test_invalid_trend(self):
     series = TimeSeries([])
     with self.assertRaises(ArithmeticError):
         series.trend()
Пример #10
0
 def test_linear_trend(self):
     series = TimeSeries([ (1, 32), (2, 55), (3, 40) ])
     trend = series.trend(order=TimeSeries.LINEAR).round()
     self.assertListEqual(trend.timestamps, [1, 2, 3])
     self.assertListEqual(trend.values, [38, 42, 46])
Пример #11
0
 def test_quadratic_trend(self):
     series = TimeSeries([ (1, 32), (2, 55), (3, 40), (4, 100) ])
     trend = series.trend(order=TimeSeries.QUADRATIC).round()
     self.assertListEqual(trend.timestamps, [1, 2, 3, 4])
     self.assertListEqual(trend.values, [38, 38, 57, 94])
Пример #12
0
 def test_group_abs(self):
     a = TimeSeries([ (1, -1), (2, -3), (3, 3.3) ])
     group = DataFrame(a=a)
     group = abs(group)
     self.assertListEqual(group['a'].values, [ 1, 3, 3.3 ])
Пример #13
0
 def test_group_timestamps(self):
     a = TimeSeries([ (1, 3), (2, 3), (3, 3) ])
     b = TimeSeries([ (0, 2), (1, 3), (2, 2), (3, 1), (4, 1) ])
     c = TimeSeries([ (5, 1), (6, 1) ])
     group = DataFrame(a=a, b=b, c=c)
     self.assertListEqual(group.timestamps, [ 0, 1, 2, 3, 4, 5, 6 ])
Пример #14
0
 def test_initial_sort(self):
     points = [ (3, 54), (2, 100), (4, 32) ]
     series = TimeSeries(points)
     self.assertListEqual(series.timestamps, [2, 3, 4])
     self.assertListEqual(series.values, [100, 54, 32])
Пример #15
0
    def get_event_data(self,
                       channels,
                       event_offsets,
                       start_time,
                       end_time,
                       buffer_time=0.0,
                       resampled_rate=None,
                       filt_freq=None,
                       filt_type='stop',
                       filt_order=4,
                       keep_buffer=False):
        """
        Return an TimeSeries containing data for the specified channel
        in the form [events,duration].

        Parameters
        ----------
        channels: {int}
            Channels from which to load data.
        event_offsets: {array_like}
            Array/list of event offsets (in samples) into the data,
            specifying each event onset time.
        start_time: {float}
            Start of epoch to retrieve (in time-unit of the data).
        end_time: {float}
            End of epoch to retrieve (in time-unit of the data).
        buffer_time: {float},optional
            Extra buffer to add on either side of the event in order
            to avoid edge effects when filtering (in time unit of the
            data).
        resampled_rate: {float},optional
            New samplerate to resample the data to after loading.
        filt_freq: {array_like},optional
            The range of frequencies to filter (depends on the filter
            type.)
        filt_type = {scipy.signal.band_dict.keys()},optional
            Filter type.
        filt_order = {int},optional
            The order of the filter.
        keep_buffer: {boolean},optional
            Whether to keep the buffer when returning the data.
        """

        # translate back to dur and offset
        dur = end_time - start_time
        offset = start_time
        buf = buffer_time

        # Sanity checks:
        if (dur < 0):
            raise ValueError('Duration must not be negative! ' +
                             'Specified duration: ' + str(dur))
        if (np.min(event_offsets) < 0):
            raise ValueError('Event offsets must not be negative!')

        # make sure the events are an actual array
        event_offsets = np.asarray(event_offsets)

        # set event durations from rate
        # get the samplesize
        samplesize = 1. / self.samplerate

        # get the number of buffer samples
        buf_samp = int(np.ceil(buf / samplesize))

        # calculate the offset samples that contains the desired offset
        offset_samp = int(
            np.ceil((np.abs(offset) - samplesize * .5) / samplesize) *
            np.sign(offset))

        # finally get the duration necessary to cover the desired span
        #dur_samp = int(np.ceil((dur - samplesize*.5)/samplesize))
        dur_samp = (int(np.ceil(
            (dur + offset - samplesize * .5) / samplesize)) - offset_samp + 1)

        # add in the buffer
        dur_samp += 2 * buf_samp
        offset_samp -= buf_samp

        # check that we have all the data we need before every event:
        if (np.min(event_offsets + offset_samp) < 0):
            bad_evs = ((event_offsets + offset_samp) < 0)
            raise ValueError('The specified values for offset and buffer ' +
                             'require more data than is available before ' +
                             str(np.sum(bad_evs)) + ' of all ' +
                             str(len(bad_evs)) + ' events.')

        # process the channels
        if channels is None or len(np.atleast_1d(channels)) == 0:
            channels = np.arange(self.nchannels)
        channels = np.atleast_1d(channels)

        # load the timeseries (this must be implemented by subclasses)
        eventdata = self._load_data(channels, event_offsets, dur_samp,
                                    offset_samp)

        # calc the time range
        # get the samplesize
        samp_start = offset_samp * samplesize
        samp_end = samp_start + (dur_samp - 1) * samplesize
        time_range = np.linspace(samp_start, samp_end, dur_samp)

        # make it a timeseries
        dims = [
            Dim(channels, 'channels'),
            Dim(event_offsets, 'event_offsets'),
            Dim(time_range, 'time')
        ]
        eventdata = TimeSeries(np.asarray(eventdata),
                               'time',
                               self.samplerate,
                               dims=dims)

        # filter if desired
        if not (filt_freq is None):
            # filter that data
            eventdata = eventdata.filtered(filt_freq,
                                           filt_type=filt_type,
                                           order=filt_order)

# resample if desired
        if (not (resampled_rate is None)
                and not (resampled_rate == eventdata.samplerate)):
            # resample the data
            eventdata = eventdata.resampled(resampled_rate)

        # remove the buffer and set the time range
        if buf > 0 and not (keep_buffer):
            # remove the buffer
            eventdata = eventdata.remove_buffer(buf)

        # return the timeseries
        return eventdata
Пример #16
0
 def test_indexing(self):
     series = TimeSeries([ (1, 3), (2, 3), (3, 3) ])
     self.assertEquals(series[1], 3)
     self.assertEquals(series[2], 3)
     with self.assertRaises(KeyError):
         foo = series[4]
Пример #17
0
def test_stand():
    t1 = TimeSeries([1, 2, 3, 4], [40, 50, 60, 70])
    val = _corr.stand(np.array(t1.values()), 55.0, 10)
    assert (list(val) == [-1.5, -0.5, 0.5, 1.5])
Пример #18
0
 def test_iteration(self):
     points = [ (1, 2), (3, 4), (5, 6) ]
     series = TimeSeries(points)
     self.assertListEqual([ s for s in series ], points)
Пример #19
0
    plt.grid(True)
    plt.show()


if __name__ == "__main__":
    events = queue.Queue()  # 同期キュー

    status = dict()  # tick をまたいで記憶しておきたい情報
    status["is_sim"] = True

    portfolio = PortfolioLocal(status)

    execution = SimulatedExecutionHandler(status)

    timeseries = TimeSeries(status)

    strategy = OLSTIME(status)    
#    strategy = SMAKNN(status)
#    strategy = MARTINRSI(status)
#    strategy = SMABOLPIPRSI(status)
#    strategy = SMABOLPIP(status)    
#    strategy = SMAPIP(status)
#    strategy = SMAPIPRSI(status)
#    strategy = SMAOLSPIP(status)
#    strategy = SMABOL(status)
#    strategy = SMARSIOLS(status)
#    strategy = WMA(status)
#    strategy = SMAOLS(status)
#    strategy = SMA(status)
#    strategy = RSI(status)
Пример #20
0
 def test_map_return_type(self):
     series = TimeSeries([ (1, 2), (3, 4), (5, 6) ])
     double = series.map(lambda y: y * 2)
     self.assertTrue(isinstance(double, TimeSeries))
     self.assertListEqual([ (1, 4), (3, 8), (5, 12) ], double.points)
Пример #21
0
        event_list = np.concatenate((event_list, tmp)).flatten()

# Event list
np.random.shuffle(event_list)

# Poisson mean time between events
lam = 1.0

print 'Number of events %i' % (len(event_list))

#
t = 0.01 * np.arange(lam * event_list.size)
tmin = np.min(t)
tmax = np.max(t)
emission = np.zeros_like(t)
start_time = 0.0
for ev in event_list:
    T = (1.0 * ev)**(-power)
    energy = T**(1.0 + gamma)
    start_time = np.random.uniform(low=tmin, high=tmax)
    emnew = event(t, start_time, energy, T)
    if np.max(emnew) >= 0.0:
        emission = emission + emnew
    else:
        print('Not big enough')

ts = TimeSeries(t[::10], emission[::10])
ts.peek_ps()
plt.loglog()
plt.show()
Пример #22
0
    def insert_ts(self, pk, ts):    
        try:
            pk = str(pk)
        except:
            raise ValueError("Primary keys must be string-compatible")
        if ':' in pk:
            raise ValueError("Primary keys may not include the ':' character") 
        if not isinstance(ts, TimeSeries):
            raise ValueError('Must insert a TimeSeries object')

        if pk not in self.rows:
            self.rows[pk] = {self.pkfield:pk}
        else:
            raise ValueError('Duplicate primary key found during insert')
        if pk not in self.rows_SAX:
            self.rows_SAX[pk] = {self.pkfield:pk}
        else:
            raise ValueError('Duplicate primary key found during insert')

        # Save timeseries as a 2d numpy array
        if self.tslen is None:
            self.tslen = len(ts)
        elif len(ts) != self.tslen:
            raise ValueError('All timeseries must be of same length')
        if not os.path.exists(self.dbname+"_ts"):
            os.makedirs(self.dbname+"_ts")
        np.save(self.dbname+"_ts/"+pk+"_ts.npy", np.vstack((ts.time, ts.data)))
        
        x1 = np.linspace(min(ts.time),max(ts.time), self.tslen_SAX)
        ts_SAX_data = interp1d(ts.time, ts.data)(x1)
        ts_SAX_time = x1
        ts_SAX = TimeSeries(ts_SAX_time,ts_SAX_data)
        if not os.path.exists(self.dbname+"_ts_SAX"):
            os.makedirs(self.dbname+"_ts_SAX")
        np.save(self.dbname+"_ts_SAX/"+pk+"_ts_SAX.npy", np.vstack((ts_SAX.time, ts_SAX.data)))

        # Save a record in the database file
        if self.overwrite or not os.path.exists(self.dbname):
            fd = open(self.dbname, 'w')
            self.overwrite = False
        else:
            fd = open(self.dbname, 'a')
        fd.write(pk+':'+self.pkfield+':'+pk+'\n')
        if 'vp' in self.schema:
            fd.write(pk+':vp:False\n')
        fd.close()

        self.rows[pk]['ts'] = ts  
        if 'vp' in self.schema:
            self.rows[pk]['vp'] = False

        self.rows_SAX[pk]['ts'] = ts_SAX  
        rep = isax_indb(ts_SAX,self.card,self.wordlength)
        self.SAX_tree.insert(pk, rep)
        if 'vp' in self.schema:
            self.rows_SAX[pk]['vp'] = False

        for vp in self.vps:
            ts1 = self.rows[vp]['ts']
            self.upsert_meta(pk, {'d_vp-'+vp : self.dist(ts1,ts)})

        self.update_indices(pk)
Пример #23
0
 def _load_ts(self, pk):
     filepath = 'documents/ts/' + pk + '.json'
     with open(filepath, 'r+') as f:
         time_series = json.load(f)
     time_series['ts'] = TimeSeries(*time_series['ts'])
     return time_series
Пример #24
0
    def __init__(self, schema, pkfield, load=False, dbname="db", overwrite=False, dist=procs.corr_indb, threshold = 10, wordlength = 16, tslen = 256, cardinality = 64):
        """
        Parameters
        ----------
        schema : dict
            Key = name of field (e.g. 'ts', 'mean')
            Value = dict of that field's properties.  Recognized keys include:
                'type': Required for all fields except ts.  pkfield must have type str.
                'index': Required for all fields.  
        pkfield : str
            The name of the field which will be the primary key.  Must match a key in schema.
        load : bool
            Whether to populate the database with an existing one on file.
        dbname : str
            Database filename
        overwrite : bool
            If load=False, whether to overwrite an existing database.
        dist : function
            Calculates the distance between two TimeSeries objects, must take arguments (ts1, ts2)
        Attributes
        ----------
        indexes : dict
            Key = fieldname
            Value = binary search tree (if int or float) or dictionary of sets (otherwise) mapping values to pks
        rows : dict
            Key = primary key
            Value = dict of the fields associated with each key
        schema : dict (See above)
        pkfield : str (See above)
        dbname : str (See above)
        tslen : int
            The length of each timeseries in the database, strictly enforced
        """
        # ---- Validating input ---- #
        if not isinstance(pkfield, str):
            raise ValueError("Field name must be of type str")
        if not isinstance(threshold, int):
            raise ValueError("Threshold must be of type int")
        if not isinstance(wordlength, int):
            raise ValueError("Word length must be of type int")
        if threshold <= 0:
            raise ValueError("Threshold must be greater than zero")
        if wordlength <= 0:
            raise ValueError("Word length must be greater than zero")
        if '1' in '{0:b}'.format(wordlength)[1:]:
            raise ValueError("Word length must be a power of two")
        if not isinstance(tslen, int):
            raise ValueError("TimeSeries length must be of type int")
        if tslen < wordlength:
            raise ValueError("TimeSeries length must be greater than or equal to the word length")
        if '1' in '{0:b}'.format(tslen)[1:]:
            raise ValueError("TimeSeries length must be a power of two")
        if not isinstance(cardinality, int):
            raise ValueError("Cardinality must be of type int")
        if cardinality <= 0:
            raise ValueError("Cardinality must be greater than zero")
        if '1' in '{0:b}'.format(cardinality)[1:]:
            raise ValueError("Cardinality must be a power of two")
        if cardinality > 64:
            raise ValueError("Cardinalities greater than 64 are not supported")    
        if not isinstance(load, bool):
            raise ValueError("Load must be of type bool")
        if not isinstance(dbname, str):
            raise ValueError("Database name must be string")
        if not isinstance(overwrite, bool):
            raise ValueError("Overwrite must be of type bool")
        if isinstance(schema, dict):
            for field in schema:
                if field == 'DELETE':
                    raise ValueError("The fieldname 'DELETE' is forbidden")
                if ':' in field:
                    raise ValueError("Field names may not contain the ':' character")
                if field != 'ts':   
                    if 'type' not in schema[field]:
                        raise ValueError("Schema must specify type for each non-ts field")
                    if field == pkfield and schema[field]['type'] != str:
                        raise ValueError("Primary key must be of type str")
                    if schema[field]['type'] not in [int, float, bool, str]:
                        raise ValueError("Only types int, float, bool, and str are supported")
                if field[:5] == 'd_vp-':
                    raise ValueError("Field names beginning with 'd_vp-' are forbidden")
                if field == 'vp' and schema[field]['type'] != bool:
                    raise ValueError("Field 'vp' must be of boolean type")
        else:
            raise ValueError("Schema must be a dictionary")
        if pkfield not in schema:
            raise ValueError("Primary key field must be included in schema")

        # Assign attributes according to schema
        self.indexes = {}
        self.rows = {}
        self.rows_SAX = {}
        self.wordlength = wordlength
        self.threshold = threshold
        self.SAX_tree = Tree_Initializer(threshold = threshold, wordlength = wordlength).tree    
        self.card = cardinality
        self.schema = schema
        self.dbname = dbname
        self.pkfield = pkfield
        self.tslen = None
        self.tslen_SAX = tslen
        self.overwrite = overwrite
        self.dist = dist
        self.vps = []
        for s in schema:
            indexinfo = schema[s]['index']
            if indexinfo is not None:
                if schema[s]['type'] == int or schema[s]['type'] == float:
                    self.indexes[s] = BinarySearchTree()
                else:  # Add a bitmask option for strings?
                    self.indexes[s] = defaultdict(set)

        if load:   
            try:
                fd = open(dbname)
                for l in fd.readlines():
                    [pk, field, val] = l.strip().split(":")
                    if field in self.schema:
                        if pk not in self.rows:
                            self.rows[pk] = {pkfield:pk}
                        else:
                            if self.schema[field]['type'] == bool:
                                if val == 'False': 
                                    self.rows[pk][field] = False
                                else:
                                    self.rows[pk][field] = True
                            else:
                                self.rows[pk][field] = self.schema[field]['type'](val)
                        if pk not in self.rows_SAX:
                            self.rows_SAX[pk] = {pkfield:pk}
                        else:
                            if self.schema[field]['type'] == bool:
                                if val == 'False': 
                                    self.rows_SAX[pk][field] = False
                                else:
                                    self.rows_SAX[pk][field] = True
                            else:
                                self.rows_SAX[pk][field] = self.schema[field]['type'](val)
                        if field == 'vp' and val == 'True':
                            self.vps.append(pk)
                            self.indexes['d_vp-'+pk] = BinarySearchTree()
                    elif field == 'DELETE':
                        if 'vp' in schema and self.rows[pk]['vp'] == True:
                            self.del_vp(pk)
                        del self.rows[pk]
                        del self.rows_SAX[pk]
                    elif field[:5] == 'd_vp-':
                        self.rows[pk][field] = float(val)
                    else:
                        raise IOError("Database is incompatible with input schema")
                fd.close()
                
                # Read in timeseries of non-deleted keys
                for pk in self.rows:
                    tsarray = np.load(self.dbname+"_ts/"+pk+"_ts.npy")
                    self.rows[pk]['ts'] = TimeSeries(tsarray[0,:], tsarray[1,:])
                    self.tslen = tsarray.shape[1]
                    #tsarray2 = np.load(self.dbname+"_ts_SAX/"+pk+"_ts_SAX.npy")
                    x1 = np.linspace(min(tsarray[0,:]),max(tsarray[0,:]), self.tslen_SAX)
                    ts_SAX_data = interp1d(tsarray[0,:], tsarray[1,:])(x1)
                    ts_SAX_time = x1
                    ts_SAX = TimeSeries(ts_SAX_time,ts_SAX_data)
                    self.rows_SAX[pk]['ts'] = ts_SAX
                    rep = isax_indb(ts_SAX,self.card,self.wordlength)
                    self.SAX_tree.insert(pk, rep)
                self.index_bulk(list(self.rows.keys()))
            except:
                raise IOError("Database does not exist or has been corrupted")
        else:
            if os.path.exists(dbname) and overwrite == False:
                raise ValueError("Database of that name already exists. Delete existing db, rename, or set overwrite=True.")
Пример #25
0
def score_all(functionNode):
    """
        score all thresholds again by using the stream implementation
        #works only on context of the class object
    """
    logger = functionNode.get_logger()
    logger.debug("score_all")
    progressNode = functionNode.get_child("control").get_child("progress")
    progressNode.set_value(0)
    model = functionNode.get_model()  # for the model API
    annos = functionNode.get_child("annotations").get_leaves()
    annos = [
        anno for anno in annos if anno.get_child("type").get_value() == "time"
    ]  #only the time annotations
    variableIds = functionNode.get_child(
        "variables").get_leaves_ids()  # the variableids to work on
    try:
        overWrite = functionNode.get_child("overWrite").get_value()
    except:
        overWrite = True

    obj = functionNode.get_parent().get_object()
    obj.reset(
    )  #read the new thresholds into the object!! this also affects parallel streaming processes

    # for each id (variable) that has threshold(s)
    # take the values and times of that varialbe
    # find out the annotations we need, create the stream data blob, send it over
    progressStep = 1 / float(len(obj.get_thresholds()))
    total = None

    for id, thresholdsInfo in obj.get_thresholds().items(
    ):  # thresholds is a dict of {id: {tag:{"min":0,"max":1}, tag2:{} .. ,id2:{}}
        if id not in variableIds:
            continue  # skip this one, is not selected
        progressNode.set_value(progressNode.get_value() + progressStep)
        var = model.get_node(id)
        data = var.get_time_series()
        times = data["__time"]
        #now produce the interesting states
        blob = {
            "type": "timeseries",
            "data": {
                "__time": times,
                id: data["values"],
                "__states": {}
            }
        }
        for state in thresholdsInfo.keys(
        ):  #iterate over the states where the variable has special thresholds
            myAnnos = mh.filter_annotations(annos, state)
            stateMask = mh.annotations_to_class_vector(myAnnos, data["__time"])
            stateMask = numpy.isfinite(stateMask)
            blob["data"]["__states"][state] = stateMask

        #now we have prepared a data and state blob, we will now score by feeding it into the stream scorer
        #del blob["data"]["__states"]#for test, now
        blob = obj.feed(blob)
        #now the blob contains more entries, e.g. the score variable id and the according scores, that is what we want
        for blobId, values in blob["data"].items():
            if blobId not in ["__time", id, "__states"]:
                #this is the score, overwrite the whole thing
                scoreNode = model.get_node(blobId)
                if scoreNode.get_name() == "_total_score":
                    continue  # this is the combined result of several variables going into the stream scoring, not relevant here

                scoreNode.set_time_series(
                    values=values, times=times
                )  # xxx is set ok here, or do we need "insert" to make sure there has not been changed in the meantime?
                model.notify_observers(scoreNode.get_parent().get_id(),
                                       "children")  # we trigger

                # build the total score:
                # merge in the new times, resample the total score, resampel the local score, then merge them
                # the merge function will use the new values whereever there is one (empty fields are named "nan"
                #  for the total score, we need a resampling to avoid the mixing of results e.g.
                # two sensor have different result during a given interval, but different times, if we just merge
                # we get a True, False, True,False mixture
                # so we build the merge vector, first resample then merge

                values[numpy.isfinite(
                    values)] = -1  # set -1 for all out of limit
                if type(total) is type(None):
                    total = TimeSeries(values=values, times=times)
                else:
                    local = TimeSeries(values=values, times=times)
                    total.merge(
                        local
                    )  # the merge resamples the incoming data to the existing time series, NaN will be replaced by new values,
    # finally, write the total
    # if the overWrite is True, we replace, otherwise we merge with the existing, previous result
    totalScoreNode = functionNode.get_parent().get_child("output").get_child(
        "_total_score")
    if overWrite:
        totalScoreNode.set_time_series(values=total.get_values(),
                                       times=total.get_times())
    else:
        totalScoreNode.merge_time_series(values=total.get_values(),
                                         times=total.get_times())

    return True
Пример #26
0
        'index': 1
    },
    'vp': {
        'type': "bool",
        'index': 1
    }
}

orders = [0, 3, 1, 2]
blargs = [1, 1, 2, 2]
times = [0, 1, 2, 3, 4]  # Same time basis
values1 = [0, 2, 4, 6, 8]  # Two example time series values
values2 = [2, 4, 6, 8, 10]
vps = [True, False, False,
       True]  # Vantage points for first and last timeseries
tsrs = [TimeSeries(times, values1 if i < 2 else values2)
        for i in range(4)]  # only two value ranges


def setup_module(module):
    if os.path.exists("documents/"):
        shutil.rmtree('documents/')

    # Extend schema
    for i in range(4):
        if vps[i]:
            schema["d_vp-{}".format(i)] = {'type': "float", 'index': 1}

    # Make db
    db = DocDB('pk', schema)
Пример #27
0
from timeseries import TimeSeries

threes = TimeSeries(range(0,1000,3))
fives = TimeSeries(range(0,1000,5))

s = 0
for i in range(0,1000):
  if i in threes or i in fives:
    s += i

print("sum",s)
Пример #28
0
def test_delete():
    client.insert_ts('test', TimeSeries(times, values1))
    client.delete_ts('test')
Пример #29
0
red_noise = False

dt = 12.0
nt = 300
np.random.seed(seed=1)
model_param = [10.0, 1.77, -100.0]
pls1 = SimplePowerLawSpectrumWithConstantBackground(model_param,
                                                    nt=nt,
                                                    dt=dt)
data = TimeSeriesFromPowerSpectrum(pls1).sample
t = dt * np.arange(0, nt)
amplitude = 0.0
data = data + amplitude * (data.max() - data.min()) * np.sin(2 * np.pi * t / 300.0)

# Create a time series object
ts = TimeSeries(t, data)
ts.label = 'emission'
ts.units = 'arb. units'
ts.name = 'simulated data [n=%4.2f]' % (model_param[1])

# Get the normalized power and the positive frequencies
iobs = ts.PowerSpectrum.ppower
this = ([ts.PowerSpectrum.frequencies.positive, iobs],)

# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Wavelet transform using a white noise background
# -----------------------------------------------------------------------------
var = ts.data
# Range of periods to average
avg1, avg2 = (150.0, 400.0)
Пример #30
0
 def test_ets_forecast_with_frequency(self):
     series = TimeSeries([ (1, 100), (2, 200), (3, 100), (4, 200), (5, 100) ])
     forecast = series.forecast(3, method=TimeSeries.ETS, frequency=4)
     self.assertTrue(isinstance(forecast, TimeSeries))
     self.assertListEqual(forecast.timestamps, [6, 7, 8])