Exemplo n.º 1
0
 def _as_range_index(self) -> RangeIndex:
     # Convert our i8 representations to RangeIndex
     # Caller is responsible for checking isinstance(self.freq, Tick)
     freq = cast(Tick, self.freq)
     tick = freq.delta.value
     rng = range(self[0].value, self[-1].value + tick, tick)
     return RangeIndex(rng)
Exemplo n.º 2
0
def test_default_causal_cto_w_np_array(rand_data, pre_int_period,
                                       post_int_period):
    data = rand_data.values
    ci = CausalImpact(data, pre_int_period, post_int_period)
    assert_array_equal(ci.data, data)
    assert ci.pre_period == pre_int_period
    assert ci.post_period == post_int_period
    pre_data = pd.DataFrame(data[pre_int_period[0]:pre_int_period[1] + 1, :])
    assert_frame_equal(ci.pre_data, pre_data)

    post_data = pd.DataFrame(data[post_int_period[0]:post_int_period[1] +
                                  1, :])
    post_data.index = RangeIndex(start=len(pre_data), stop=len(rand_data))
    assert_frame_equal(ci.post_data, post_data)

    assert ci.alpha == 0.05
    normed_pre_data, (mu, sig) = standardize(pre_data)
    assert_frame_equal(ci.normed_pre_data, normed_pre_data)

    normed_post_data = (post_data - mu) / sig
    assert_frame_equal(ci.normed_post_data, normed_post_data)

    assert ci.mu_sig == (mu[0], sig[0])
    assert ci.model_args == {'standardize': True, 'nseasons': []}

    assert isinstance(ci.model, UnobservedComponents)
    assert_array_equal(ci.model.endog,
                       normed_pre_data.iloc[:, 0].values.reshape(-1, 1))
    assert_array_equal(
        ci.model.exog,
        normed_pre_data.iloc[:, 1:].values.reshape(-1, data.shape[1] - 1))
    assert ci.model.endog_names == 'y'
    assert ci.model.exog_names == [1, 2]
    assert ci.model.k_endog == 1
    assert ci.model.level
    assert ci.model.trend_specification == 'local level'

    assert isinstance(ci.trained_model, UnobservedComponentsResultsWrapper)
    assert ci.trained_model.nobs == len(pre_data)

    assert ci.inferences is not None
    assert ci.inferences.index.dtype == rand_data.index.dtype
    assert ci.p_value > 0 and ci.p_value < 1
    assert ci.n_sims == 1000
Exemplo n.º 3
0
def load_and_merge_ticks(file_path, freq='3s'):
    '''
    transfer the tick data to bar data
    '''

    # load csv file
    data = pd.read_csv(file_path,
                       dtype={
                           'code': 'str',
                           'date': 'str',
                           'time': 'str'
                       })

    # reindex the data with date time
    mtime = data['time'].apply(
        lambda x: x[:2] + ':' + x[2:4] + ':' + x[4:6] + '.' + x[6:])
    data.index = data['date'].str.cat(mtime.tolist(), sep=' ')
    data.index = pd.DatetimeIndex(data.index)

    # resample the data
    data = data.resample(freq, closed='left', label='left').last()
    data = data[(data.index.time <= datetime.time(11, 30)) |
                (data.index.time > datetime.time(13, 0))]

    # deal with the data before open
    data_before_open = data[data.index.time <= datetime.time(9, 30)]
    data_before_open = data_before_open.dropna()
    data = data[data.index.time > datetime.time(9, 30)].fillna(method='pad')
    data = pd.concat([data_before_open, data])

    # reset the time columns according the latest index
    # data['time'] = data.index.to_series().apply(lambda x: int("%02d%02d%02d000" % (x.hour, x.minute, x.second)))
    data['time'] = data.index.to_series().apply(
        lambda x: int("%02d%02d%02d00" % (x.hour, x.minute, x.second)))
    try:
        data['date'] = data['date'].apply(lambda x: int(x))
    except ValueError:
        print(filepath)
        exit(0)

    data.index = RangeIndex(start=0, stop=len(data))

    return data
Exemplo n.º 4
0
def default_index(n: int) -> RangeIndex:
    rng = range(0, n)
    return RangeIndex._simple_new(rng, name=None)
import pandas as pd
from pandas.core.indexes.range import RangeIndex

print('*' * 10, 'Series to DataFrame', '*' * 10)
oranges = [45, 63, 85, 96, 45]
apples = [63, 52, 41, 78, 99]
org_start = max(oranges)
step = 2
org_stop = max(oranges) + len(oranges) * step
# index starts from the maximum value of the sequence, endsn = after addtion of number of elements in the sequence product of step
ind_org = range(org_start, org_stop, step)
oranges_sr = pd.Series(apples)
print(oranges_sr)
aple_start = max(apples)
apl_stop = max(apples) + len(apples) * step
ind_apl = RangeIndex(aple_start, apl_stop, step=2)
apples_sr = pd.Series(apples)
print("\u001b[30;1m Printing Apple Series")
print('\u001b[32;1m', apples_sr)

# ********** Merging Series *********

mergSr = pd.DataFrame(apples_sr, oranges).reset_index()
print('\u001b[30;1m Merged Series to Dataframe', mergSr)
print('\u001b[33;1m', "Now Merging Series with Dif Index to by using dic")
merg_dic = {'Apples': apples, 'Oranges': oranges}
merg_dic_sr = {'Apple': apples_sr, 'Oranges': oranges_sr}
pdf_seriesMerge = pd.DataFrame(merg_dic)
print('\u001b[32;1m', pdf_seriesMerge)
print('\u001b[32;1m', pdf_seriesMerge.columns)
print('\u001b[32;1m', pdf_seriesMerge.index)