Exemplo n.º 1
0
    def test_period_op3(self):
        """ Test period operation on time series with 2-dimensional data."""

        st = datetime.datetime(year=1990, month=2, day=3, hour=11, minute=15)
        num = 3005
        dimension2 = 3
        delta = time_interval(minutes=5)
        interval = '1 hour'
        op_delta = time_interval(hours=1)
        aligned_start = datetime.datetime(year=1990,
                                          month=2,
                                          day=3,
                                          hour=12,
                                          minute=0)

        data=[[random.uniform(self.min_val,self.max_val) for i in range(dimension2)] \
              for k in range(num)]

        data = sciarray(data)

        # Reformalize raw data, insert known mini_val and max_val
        # and calcuate hourly mean to use later.
        i0 = 9  # this is the first index with aligned calendar
        num_interval = (num - i0 + 1) // 12

        for k in range(num_interval):
            index = i0 + k * 12 + 1
            data[index, ] = self.min_val
            index = index + 1
            data[index, ] = self.max_val

        nt_data = data[i0:12 * num_interval + i0, ]
        nt_data = sciarray(nt_data)
        nt_data.shape = (num_interval, 12, -1)

        nt_mean = sciadd.reduce(
            nt_data,
            1,
        ) / 12
        nt_sum = sciadd.reduce(
            nt_data,
            1,
        )
        nt_min = sciminimum.reduce(
            nt_data,
            1,
        )
        nt_max = scimaximum.reduce(
            nt_data,
            1,
        )

        ts = rts(data, st, delta, {})

        for (op, op_data) in [(MIN, nt_min), (MAX, nt_max), (MEAN, nt_mean),
                              (SUM, nt_sum)]:
            nt = period_op(ts, interval, op)
            assert_array_equal(nt.data,op_data,\
            err_msg="two array not equal in average" \
            " by %s"%(op))
Exemplo n.º 2
0
    def __init__(self, methodName="runTest"):

        super(TestInterpolate, self).__init__(methodName)

        self.max_val = 1000
        self.min_val = 0.01
        self.large_data_size = 100000
        self.rts1_years = 5
        interval = "1day"
        self.rts1_delta = parse_interval(interval)
        self.rts1 = self.create_rts(self.rts1_delta, self.rts1_years * 365)
        self.rts2 = self.create_rts(self.rts1_delta, self.rts1_years * 365)

        self.rts_has_nan = self.create_rts(self.rts1_delta,
                                           self.rts1_years * 365)
        ts_len = len(self.rts_has_nan)
        #self.nan_indexes=sample(range(5,ts_len-5,1),ts_len/5)
        ## bulit a array for position of nan will be put
        ## into rts_has_nan
        nan_indexes=sciarray([0,5,12,14,15,20,21,22,23,45,49,101,112,\
                          203,204,205])
        ii = arange(500, 520)
        ii2 = sciarray([600, 607, 700, 709])
        nan_indexes = concatenate((nan_indexes, ii, ii2))
        ## remove possible large indexes that over the range.
        nan_indexes=choose(greater(nan_indexes,ts_len-5),\
                           (nan_indexes,ts_len-5))

        self.nan_indexes = nan_indexes

        put(self.rts_has_nan.data, self.nan_indexes, nan)

        self.its1 = self.create_its()
        self.function_to_test = [linear, spline, monotonic_spline, rhistinterp]
Exemplo n.º 3
0
def _boxcar(data, nbefore, nafter):
    """ Inside boxcar averaging function doing real works.
        
    Parameters
    -----------
    
    data: array
        data samples at regular intervals.
                  
    nbefore: int
        number of samples before the point to be averaged
        (not including this point).
        
    nafter: int
        number of samples after the point to be averaged 
        (not including this point).
        
    Returns
    --------
    
    Results: array
        a new averaged data samples.
            
    """
    ntotal = nbefore + nafter + 1
    b = [1.0 / ntotal] * ntotal
    a = 1.0
    size_data = data.size

    ## Using linear filter doing averaging
    ## of ntotal samples.
    ##dd=lfilter(b,a,data)
    ##dd=[add.reduce(data[i-nbefore:i+nafter+1]) for i in range(nbefore,len_data-nafter)]
    if data.ndim == 1:
        dim2_size = 1
        dd = convolve(data, b, mode="valid")
    elif data.ndim == 2:  ## for multi-dimension data,convolve can't handle it directly
        dim2_size = data.shape[1]
        dd = [convolve(data[:, i], b, mode="valid") for i in range(dim2_size)]
        dd = sciarray(dd)
        dd = transpose(dd)
    else:
        raise "_boxcar function can't process data with dimension more than 2"

    ## convole first dimension length.
    dd_len = dd.shape[0]
    ## Based on the nbefore and nafter, do a
    ## sample result shifting.
    dt = sciarray([nan] * size_data)
    dt = dt.reshape(data.shape)

    #dt[nbefore:len_data-nafter]=dd[nbefore+nafter:len_data]
    dt[nbefore:dd_len + nbefore, ] = dd[0:dd_len, ]
    return dt
Exemplo n.º 4
0
def _lowpass_cosine_lanczos_filter_coef(cf, m, normalize=True):
    """return the convolution coefficients for low pass lanczos filter.
      
    Parameters
    -----------
    
    Cf: float
      Cutoff frequency expressed as a ratio of a Nyquist frequency.
                  
    M: int
      Size of filtering window size.
        
    Returns
    --------
    pdb.set_trace()
    Results: list
           Coefficients of filtering window.
    
    """

    coscoef = [
        cf * sin(pi * k * cf) / (pi * k * cf) for k in range(1, m + 1, 1)
    ]
    sigma = [sin(pi * k / m) / (pi * k / m) for k in range(1, m + 1, 1)]
    prod = [c * s for c, s in zip(coscoef, sigma)]
    temp = prod[-1::-1] + [cf] + prod
    res = sciarray(temp)
    if normalize:
        res = res / res.sum()
    return res
Exemplo n.º 5
0
    def test_multidimension_tsdata(self):
        """ Test interploation methods on multi-dimensional data set."""

        msgstr = "on test_multidimension_tsdata"
        num = 1000
        data = [sin(2.0 * pi * k / 250.0) for k in range(num)]
        data = sciarray(data).reshape(num / 4, 4)
        id = 1
        ts=rts(data,datetime(year=1990,month=1,day=2),\
               parse_interval("1hour"),{})
        ts_single_dimension=rts(data[:,id],datetime(year=1990,month=1,day=2),\
                            parse_interval("1hour"),{})
        times=time_sequence(datetime(year=1990,month=1,day=3),\
                            parse_interval("1hour"),50)
        self.assertEqual(abs(ts_single_dimension.data-ts.\
                          data[:,id]).max(),0.0)
        ##rhsit won't pass this test
        function_to_test = [linear, spline, monotonic_spline]
        for funcs in function_to_test:
            nts = funcs(ts, times, filter_nan=False)
            nts_single_dimension=funcs(ts_single_dimension,times,\
                                 filter_nan=False)
            self.assertEqual(len(nts),len(times),\
                             msg="test of %s fail %s."%(funcs.__name__,msgstr))
            self.assertEqual(nts.data.shape[1],ts.data.shape[1],\
                             msg="test of %s fail %s."%(funcs.__name__,msgstr))
            self.assertEqual(abs(nts_single_dimension.data-nts.\
                             data[:,id]).max(),0.0)
Exemplo n.º 6
0
    def test_split_op_regular(self):
        """ Test behaviour of split operation on regular TS."""
        
        #times=sciadd.accumulate(times)
        start = parse_time("1996-2-1")
        interval = parse_interval("1hour")
        data1=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
        data2=sciarray([7.0,1.2,10.5,3.0,1.0,1.0,9.0,3.0,0.0,0.2])
        
        ts = rts(sciarray([data1,data2]).transpose(),start,interval)
        
        ts1,ts2 =ts_split(ts,False)
        
        for d1,d2 in zip(ts.data[:,0],ts1.data):
            self.assertEqual(d1,d2)
        for d1,d2 in zip(ts.data[:,1],ts2.data):
            self.assertEqual(d1,d2)
            
        ts1.data[5] = -9999.0
        ts2.data[2] = -9999.0    
        self.assertNotEqual(ts1.data[5],ts.data[5,0])
        self.assertNotEqual(ts2.data[2],ts.data[2,1])
         
        self.assertEqual(ts1.start,ts.start)
        self.assertEqual(ts1.interval,ts.interval)
        self.assertEqual(len(ts1),len(ts))
        self.assertEqual(ts2.start,ts.start)
        self.assertEqual(ts2.interval,ts.interval)
        self.assertEqual(len(ts2),len(ts))
        
        ts1,ts2 =ts_split(ts,True)

        ts1.data[5] = -9999.0
        ts2.data[2] = -9999.0
    
        
        for d1,d2 in zip(ts.data[:,0],ts1.data):
            self.assertEqual(d1,d2)
        for d1,d2 in zip(ts.data[:,1],ts2.data):
            self.assertEqual(d1,d2)
         
        self.assertEqual(ts1.start,ts.start)
        self.assertEqual(ts1.interval,ts.interval)
        self.assertEqual(len(ts1),len(ts))
        self.assertEqual(ts2.start,ts.start)
        self.assertEqual(ts2.interval,ts.interval)
        self.assertEqual(len(ts2),len(ts))
Exemplo n.º 7
0
 def test_split_op_irregular(self):
     """ Test behaviour of split operation on irregular TS."""
     times=[12,15,32,38,43,52,84,138,161,172]
     #times=sciadd.accumulate(times)
     start_datetime = parse_time("1996-2-1")
     start_ticks = ticks(start_datetime)
     times=scimultiply(times,ticks_per_minute)
     times=sciadd(times,start_ticks)
     data1=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
     data2=sciarray([7.0,1.2,10.5,3.0,1.0,1.0,9.0,3.0,0.0,0.2])
     
     ts = its(times,sciarray([data1,data2]).transpose())
     
     ts1,ts2 =ts_split(ts,False)
        
     
     for d1,d2 in zip(ts.data[:,0],ts1.data):
         self.assertEqual(d1,d2)
     for d1,d2 in zip(ts.data[:,1],ts2.data):
         self.assertEqual(d1,d2)
     ts1.data[5] = -9999.0
     ts2.data[2] = -9999.0    
     self.assertNotEqual(ts1.data[5],ts.data[5,0])
     self.assertNotEqual(ts2.data[2],ts.data[2,1])
     
     for t1,t2 in zip(ts1.times,ts.times):
        self.assertEqual(t1,t2)
     for t1,t2 in zip(ts2.times,ts.times):
        self.assertEqual(t1,t2)
        
     ts1,ts2 =ts_split(ts,True)
     ts1.data[5] = -9999.0
     ts2.data[2] = -9999.0
 
     for d1,d2 in zip(ts.data[:,0],ts1.data):
         self.assertEqual(d1,d2)
     for d1,d2 in zip(ts.data[:,1],ts2.data):
         self.assertEqual(d1,d2)
    
     for t1,t2 in zip(ts1.times,ts.times):
        self.assertEqual(t1,t2)
     for t1,t2 in zip(ts2.times,ts.times):
        self.assertEqual(t1,t2)
Exemplo n.º 8
0
 def test_bind_multivar(self):
     """ test behaviour of bind on multvariate ts"""
     start = parse_time("1996-2-1")
     interval = parse_interval("1hour")
     data1=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
     data2t=sciarray([[1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0],
                     [2.0,2.1,2.8,9.1,3.2,0.5,0.1,8.1,1.2,1.1]])
     data2=data2t.transpose()
     
     data_temp= sciarray([data1[:],data2t[0,:],data2t[1,:]]).transpose()             
                     
     ts1=rts(data1,start,interval,{})
     ts2=rts(data2,start,interval,{})
    
     new_ts = ts_bind(ts1,ts2)
     self.assertEqual(len(new_ts),len(ts1))
     self.assertEqual(new_ts.start,ts1.start)
     self.assertEqual(new_ts.interval,interval)
     for (d1,d2,d3),(dt1,dt2,dt3) in zip(new_ts.data,data_temp):
         self.assertEqual(d1,dt1)
         self.assertEqual(d2,dt2)
         self.assertEqual(d3,dt3)
Exemplo n.º 9
0
    def test_its_at_regular_points(self):
        """ Test interpolation of irregular timeseries at
            regular time ponts.
        """
        msgstr = "at test_its_at_regular_points"
        ts = self.its1
        interval = parse_interval("15min")
        start = ts.start + interval

        ## this about 35000 ponits
        num = number_intervals(start, ts.end - interval, interval)
        times = time_sequence(start, interval, num)

        ## times2 is times in datetime format.
        times2 = map(ticks_to_time, times)

        nt1 = []
        i = 0
        function_to_test = [linear, spline, monotonic_spline]
        for funcs in function_to_test:
            nt1.append(funcs(ts, times))
            self.assertEqual(len(nt1[i]), len(times),
                             "test of %s fail %s." % (funcs.__name__, msgstr))
            i = i + 1

        ## Repeat same task by datetime list instead of ticks array above.
        nt2 = []
        i = 0
        for funcs in function_to_test:
            nt2.append(funcs(ts, times2))
            self.assertEqual(len(nt2[i]), len(times2),
                             "test of %s fail %s." % (funcs.__name__, msgstr))
            self.assertTrue(allclose(nt1[i].data, nt2[i].data),
                            "test of %s fail %s." % (funcs.__name__, msgstr))
            i = i + 1

        ## Repeat same task again by datetime array.
        times3 = sciarray(map(ticks_to_time, times))
        nt3 = []
        i = 0
        for funcs in function_to_test:
            nt3.append(funcs(ts, times3))
            self.assertEqual(len(nt3[i]), len(times3),
                             "test of %s fail %s." % (funcs.__name__, msgstr))
            self.assertTrue(allclose(nt1[i].data, nt3[i].data),
                            "test of %s fail %s." % (funcs.__name__, msgstr))
            i = i + 1
Exemplo n.º 10
0
    def test_bind_op_irregular(self):
        """ Test behaviour of bind operation on irregular TS."""
        times=[12,15,32,38,43,52,84,138,161,172]
        #times=sciadd.accumulate(times)
        start_datetime = parse_time("1996-2-1")
        start_ticks = ticks(start_datetime)
        times=scimultiply(times,ticks_per_minute)
        times=sciadd(times,start_ticks)
        data=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
        ts1=its(times,data,{})
        ts2=its(times,data,{})
 
        new_ts = ts_bind(ts1,ts2)
        self.assertEqual(len(new_ts),len(ts1))
        self.assertEqual(new_ts.start,ts1.start)
       
        for (d1,d2),d  in zip(new_ts.data,data):
            self.assertEqual(d1,d)
            self.assertEqual(d2,d)
Exemplo n.º 11
0
    def test_period_op_large(self):
        """ Test performance of period operation on very large size of TS.
            Print out time used also.
        """
        st = datetime.datetime(year=10, month=2, day=3, hour=11, minute=15)
        num = self.large_data_size
        delta = time_interval(hours=1)
        dimension2 = 3
        interval = '1 day'

        data=[[random.uniform(self.min_val,self.max_val) for i in range(dimension2)] \
              for k in range(num)]
        data = sciarray(data)
        ts = rts(data, st, delta, {TIMESTAMP: INST})

        for op in [MIN, MAX, MEAN, SUM]:

            ##### time profile ####
            #debug_timeprofiler.mark()
            ######################
            nt = period_op(ts, interval, op)
Exemplo n.º 12
0
    def test_bind_op_regular(self):
        """ Test behaviour of bind operation on regular TS."""
    
        #times=sciadd.accumulate(times)
        start = parse_time("1996-2-1")
        interval = parse_interval("1hour")
        data=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
        ts1=rts(data,start,interval,{})
        ts2=rts(data,start,interval,{})
 
        new_ts = ts_bind(ts1,ts2)
        self.assertEqual(len(new_ts),len(ts1))
        self.assertEqual(new_ts.start,ts1.start)
        self.assertEqual(new_ts.interval,interval)
        for (d1,d2),d  in zip(new_ts.data,data):
            self.assertEqual(d1,d)
            self.assertEqual(d2,d)
        
        ## partial overlap
        start2 = parse_time("1996-2-1 4:00")
        ts2=rts(data,start2,interval,{})
        new_ts = ts_bind(ts1,ts2)
        self.assertEqual(len(new_ts),14)
        self.assertEqual(new_ts.start,ts1.start)
        self.assertEqual(new_ts.times[-1],ts2.times[-1])
        self.assertEqual(new_ts.interval,interval)
        for i in range(4):
            self.assertTrue(isnan(new_ts.data[i,1]))
            self.assertTrue(isnan(new_ts.data[i+10,0]))
        for i in range(10):
            self.assertEqual(new_ts.data[i,0],data[i])
            self.assertEqual(new_ts.data[i+4,1],data[i])
          
         ##no overlap,immediately after
        start2 = parse_time("1996-2-1 10:00")
        ts2=rts(data,start2,interval,{})
        new_ts = ts_bind(ts1,ts2)
        self.assertEqual(len(new_ts),20)
        self.assertEqual(new_ts.start,ts1.start)
        self.assertEqual(new_ts.times[-1],ts2.times[-1])
        self.assertEqual(new_ts.interval,interval)
        for i in range(10):
            self.assertTrue(isnan(new_ts.data[i,1]))
            self.assertTrue(isnan(new_ts.data[i+10,0]))
        for i in range(10):
            self.assertEqual(new_ts.data[i,0],data[i])
            self.assertEqual(new_ts.data[i+10,1],data[i])
        
        ## smaller interval
        start2 = parse_time("1996-2-1 8:00")
        interval2=parse_interval("15min")
        ts2=rts(data,start2,interval2,{})
        new_ts = ts_bind(ts1,ts2)
        self.assertEqual(len(new_ts),42)
        self.assertEqual(new_ts.start,ts1.start)
        self.assertEqual(new_ts.times[-1],ts2.times[-1])
        self.assertEqual(new_ts.interval,interval2)
        ts1_id = [4*x for x in range(10)]
        nan_id =  range(len(new_ts))
        for i in ts1_id:
            nan_id.remove(i) ## those id supoose have nan
        ts1_val = new_ts.data[ts1_id,0]
        left_val = new_ts.data[nan_id,0]
        for d1,d2 in zip(ts1_val,data):
            self.assertEqual(d1,d2)
        for d in left_val:
            self.assertTrue(isnan(d))
        ts2_id = range(32,42)
        ts2_val = new_ts.data[ts2_id,1]
        for d1,d2 in zip(ts2_val,data):
            self.assertEqual(d1,d2)
Exemplo n.º 13
0
    def test_period_op_irregular(self):
        """ Test behaviour of period operation on irregular TS."""
        times = [12, 15, 32, 38, 43, 52, 84, 138, 161, 172]
        #times=sciadd.accumulate(times)
        start_datetime = parse_time("1996-2-1")
        start_ticks = ticks(start_datetime)
        times = scimultiply(times, ticks_per_minute)
        times = sciadd(times, start_ticks)
        data = sciarray([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0])
        ts = its(times, data, {})
        op = MEAN
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 3)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)

        times = [0, 15, 32, 38, 43, 52, 60, 120, 138, 161, 180]
        data = sciarray(
            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0])
        times = scimultiply(times, ticks_per_minute)
        times = sciadd(times, start_ticks)
        ts = its(times, data, {})
        op = MEAN
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)
        self.assertEqual(ts_op.data[3], 4.0)

        data = sciarray(
            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 5.0, 4.0])
        ts = its(times, data, {})
        op = MIN
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)
        self.assertEqual(ts_op.data[3], 4.0)

        op = MAX
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 6.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 5.0)
        self.assertEqual(ts_op.data[3], 4.0)

        times = [0, 15, 28, 30, 58, 64, 80, 90, 91]
        start_datetime = parse_time("1996-1-1")
        start_ticks = ticks(start_datetime)
        times = scimultiply(times, ticks_per_day)
        times = sciadd(times, start_ticks)
        data = sciarray([1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0])
        ts = its(times, data, {})
        op = MEAN
        ts_op = period_op(ts, "1 month", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)
        self.assertEqual(ts_op.data[3], 4.0)
Exemplo n.º 14
0
def cosine_lanczos(ts,
                   cutoff_period=None,
                   cutoff_frequency=None,
                   filter_len=None,
                   padtype=None,
                   padlen=None,
                   fill_edge_nan=True):
    """ squared low-pass cosine lanczos  filter on a regular time series.
      
        
    Parameters
    -----------
    
    ts : :class:`~vtools.data.timeseries.TimeSeries`
        Must has data of one dimension, and regular.
    
    filter_len  : int, time_interval
        Size of lanczos window, default is to number of interval within filter_period*1.25.
        
    cutoff_frequency: float,optional
        Cutoff frequency expressed as a ratio of a Nyquist frequency,
        should within the range (0,1). For example, if the sampling frequency
        is 1 hour, the Nyquist frequency is 1 sample/2 hours. If we want a
        36 hour cutoff period, the frequency is 1/36 or 0.0278 cycles per hour. 
        Hence the cutoff frequency argument used here would be
        0.0278/0.5 = 0.056.
                      
    cutoff_period : string  or  :ref:`time_interval<time_intervals>`
         Period of cutting off frequency. If input as a string, it must 
         be  convertible to :ref:`Time interval<time_intervals>`.
         cutoff_frequency and cutoff_period can't be specified at the same time.
         
     padtype : str or None, optional
         Must be 'odd', 'even', 'constant', or None. This determines the type
         of extension to use for the padded signal to which the filter is applied. 
         If padtype is None, no padding is used. The default is None.

     padlen : int or None, optional
          The number of elements by which to extend x at both ends of axis 
          before applying the filter. This value must be less than x.shape[axis]-1. 
          padlen=0 implies no padding. If padtye is not None and padlen is not
          given, padlen is be set to 6*m.
    
     fill_edge_nan: bool,optional
          If pading is not used and fill_edge_nan is true, resulting data on 
          the both ends are filled with nan to account for edge effect. This is
          2*m on the either end of the result. Default is true.
  
    Returns
    -------
    result : :class:`~vtools.data.timeseries.TimeSeries`
        A new regular time series with the same interval of ts. If no pading 
        is used the beigning and ending 4*m resulting data will be set to nan
        to remove edge effect.
        
    Raise
    --------
    ValueError
        If input timeseries is not regular, 
        or, cutoff_period and cutoff_frequency are given at the same time,
        or, neither cutoff_period nor curoff_frequence is given,
        or, padtype is not "odd","even","constant",or None,
        or, padlen is larger than data size
        
    """

    if not ts.is_regular():
        raise ValueError("Only regular time series are supported.")

    interval = ts.interval
    m = filter_len
    if (not (cutoff_frequency is None)) and (not (cutoff_period is None)):
        raise ValueError("cutoff_frequency and cutoff_period can't\
        be specified simultaneously")

    if ((cutoff_frequency is None)) and ((cutoff_period is None)):
        print "neither cutoff_frequency nor cutoff_period is given, 40 hours is used by defualt"
        cutoff_period = hours(40)

    cf = cutoff_frequency
    if (cf is None):
        if (not (cutoff_period is None)):
            ## convert it to ticks
            if not (is_interval(cutoff_period)):
                cutoff_period = parse_interval(cutoff_period)
            cutoff_frequency_in_ticks = 1.0 / float(ticks(cutoff_period))
            nyquist_frequency = 0.5 / float(ticks(interval))
            cf = cutoff_frequency_in_ticks / nyquist_frequency
        else:
            raise ValueError(
                "you must give me either cutoff_frequency or cutoff_period")

    if is_interval(m):
        m = int(ticks(m) / ticks(ts.interval))
    ## if m is none set it to number of interval within filter_period*1.25
    elif (m is None):
        ## cf reverse is half of the interval within filtering period
        m = int(1.25 * 2.0 / cf)
    elif type(1) == type(m):
        ## nothing to do
        m = m
    else:
        raise TypeError("unkown filter length type")

    ##find out nan location and fill with 0.0. This way we can use the
    ## signal processing filtrations out-of-the box without nans causing trouble
    idx = where(isnan(ts.data))[0]
    data = sciarray(ts.data).copy()

    ## figure out indexes that will be nan after the filtration,which
    ## will "grow" the nan region around the original nan by 2*m
    ## slots in each direction
    if len(idx) > 0:
        data[idx] = 0.0
        shifts = arange(-2 * m, 2 * m + 1)
        result_nan_idx = clip(add.outer(shifts, idx), 0, len(ts) - 1).ravel()

    if m < 1:
        raise ValueError("bad input cutoff period or frequency")

    if not (padtype is None):
        if (not padtype in ["odd", "even", "constant"]):
            raise ValueError("unkown padtype :" + padtype)

    if (padlen is None) and (not (padtype is None)):
        padlen = 6 * m

    if padlen > len(data):
        raise ValueError("Padding length is more  than data size")

    ## get filter coefficients. sizeo of coefis is 2*m+1 in fact
    coefs = _lowpass_cosine_lanczos_filter_coef(cf, m)

    d2 = filtfilt(coefs, [1.0], data, axis=0, padtype=padtype, padlen=padlen)

    if (len(idx) > 0):
        d2[result_nan_idx] = nan

    ## replace edge points with nan if pading is not used
    if (padtype is None) and (fill_edge_nan == True):
        d2[0:2 * m] = nan
        d2[len(d2) - 2 * m:len(d2)] = nan

    prop = {}
    for key, val in ts.props.items():
        prop[key] = val
    prop[TIMESTAMP] = INST
    prop[AGGREGATION] = INDIVIDUAL
    time_interval
    return rts(d2, ts.start, ts.interval, prop)
Exemplo n.º 15
0
    def test_interpolate_ts_nan(self):
        """ Test filling nan within a regular time series
        
        """
        ## test ts with leading and trailing nan without nan in middle
        for method in [
                SPLINE, MONOTONIC_SPLINE, LINEAR, PREVIOUS, NEXT, NEAREST,
                RHIST
        ]:
            ts = deepcopy(self.rts_has_nan)
            ts.data[0:5] = numpy.nan
            ts.data[len(ts) - 5:len(ts)] = numpy.nan
            old_len = len(ts)
            ts = interpolate_ts_nan(ts, method=method)
            self.assertEqual(len(ts), old_len)
            self.assertTrue(alltrue(isnan(ts.data)) == False)
            self.assertTrue(alltrue(isnan(ts.data[0:5])))
            self.assertTrue(alltrue(isnan(ts.data[len(ts) - 5:len(ts)])))

        ## test max_gap option

    #test interpolate nan with leading and trailing nans
        data=sciarray([0.3*sin(k*pi/1200+pi/15)+0.4*sin(k*pi/1100+pi/6)+1.1*sin(k*pi/990+pi/18) \
              for k in range(50)])

        put(data, [0, 1, 2, 12, 47, 48, 49], nan)

        max_gap = 2
        start_time = "1/2/1990"
        interval = "15min"
        ts_less_nans = rts(data, start_time, interval, {})

        for method in [
                SPLINE, MONOTONIC_SPLINE, LINEAR, PREVIOUS, NEXT, NEAREST
        ]:
            ts_new = interpolate_ts_nan(ts_less_nans,
                                        max_gap=max_gap,
                                        method=method)
            self.assertTrue((isnan(ts_new.data[0])))
            self.assertTrue((isnan(ts_new.data[1])))
            self.assertTrue((isnan(ts_new.data[2])))
            self.assertTrue((isnan(ts_new.data[47])))
            self.assertTrue((isnan(ts_new.data[48])))
            self.assertTrue((isnan(ts_new.data[49])))
            self.assertFalse((isnan(ts_new.data[12])))

        data=sciarray([0.3*sin(k*pi/1200+pi/15)+0.4*sin(k*pi/1100+pi/6)+1.1*sin(k*pi/990+pi/18) \
              for k in range(50)])

        put(data, [12, 13, 14], nan)
        put(data, [34, 35, 36, 37, 38, 39, 40, 41, 42, 43], nan)

        max_gap = 4

        ts_with_nans = rts(data, start_time, interval, {})

        for method in [
                SPLINE, MONOTONIC_SPLINE, LINEAR, PREVIOUS, NEXT, NEAREST,
                RHIST
        ]:
            ts_new = interpolate_ts_nan(ts_with_nans,
                                        max_gap=max_gap,
                                        method=method)
            self.assertTrue(not (isnan(ts_new.data[12])))
            self.assertTrue(not (isnan(ts_new.data[13])))
            self.assertTrue(not (isnan(ts_new.data[14])))
            self.assertTrue((isnan(ts_new.data[34])))
            self.assertTrue((isnan(ts_new.data[35])))
            self.assertTrue((isnan(ts_new.data[36])))
            self.assertTrue((isnan(ts_new.data[37])))
            self.assertTrue((isnan(ts_new.data[38])))
            self.assertTrue((isnan(ts_new.data[39])))
            self.assertTrue((isnan(ts_new.data[40])))
            self.assertTrue((isnan(ts_new.data[41])))
            self.assertTrue((isnan(ts_new.data[42])))
            self.assertTrue((isnan(ts_new.data[43])))

        for method in [
                SPLINE, MONOTONIC_SPLINE, LINEAR, PREVIOUS, NEXT, NEAREST,
                RHIST
        ]:
            self.assertRaises(TypeError,interpolate_ts_nan,\
                             ts_with_nans,maxgap=max_gap, method=method)

        ## test no nan exits
        data=sciarray([0.3*sin(k*pi/1200+pi/15)+0.4*sin(k*pi/1100+pi/6)+1.1*sin(k*pi/990+pi/18) \
              for k in range(50)])

        ts_without_nans = rts(data, start_time, interval, {})
        for method in [
                SPLINE, MONOTONIC_SPLINE, LINEAR, PREVIOUS, NEXT, NEAREST
        ]:
            ts_new = interpolate_ts_nan(ts_without_nans,
                                        max_gap=max_gap,
                                        method=method)
            for old_value, new_value in zip(ts_without_nans.data, ts_new.data):
                self.assertTrue(old_value == new_value)


        data=sciarray([0.3*sin(k*pi/1200+pi/15)+0.4*sin(k*pi/1100+pi/6)+1.1*sin(k*pi/990+pi/18) \
              for k in range(50)])

        put(data, [12, 13, 14], nan)

        max_gap = 2
        ## should do nothing for its gap wider than max_gap
        ts_less_nans = rts(data, start_time, interval, {})

        for method in [
                SPLINE, MONOTONIC_SPLINE, LINEAR, PREVIOUS, NEXT, NEAREST
        ]:
            ts_new = interpolate_ts_nan(ts_less_nans,
                                        max_gap=max_gap,
                                        method=method)
            self.assertTrue((isnan(ts_new.data[12])))
            self.assertTrue((isnan(ts_new.data[13])))
            self.assertTrue((isnan(ts_new.data[14])))