def test_resample_loffset(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') s = Series(np.random.randn(14), index=rng) result = s.resample('5min', how='mean', closed='right', label='right', loffset=timedelta(minutes=1)) idx = date_range('1/1/2000', periods=4, freq='5min') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1)) assert_series_equal(result, expected) expected = s.resample('5min', how='mean', closed='right', label='right', loffset='1min') assert_series_equal(result, expected) expected = s.resample('5min', how='mean', closed='right', label='right', loffset=Minute(1)) assert_series_equal(result, expected) self.assert_(result.index.freq == Minute(5)) # from daily dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='D') ser = Series(np.random.rand(len(dti)), dti) # to weekly result = ser.resample('w-sun', how='last') expected = ser.resample('w-sun', how='last', loffset=-bday) self.assertEqual(result.index[0] - bday, expected.index[0])
def test_custom_grouper(self): dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1), end=datetime(2005, 1, 10)) s = Series(np.array([1] * len(dti)), index=dti, dtype='int64') b = TimeGrouper(Minute(5)) g = s.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) b = TimeGrouper(Minute(5), closed='right', label='right') g = s.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) self.assertEqual(g.ngroups, 2593) self.assertTrue(notnull(g.mean()).all()) # construct expected val arr = [1] + [5] * 2592 idx = dti[0:-1:5] idx = idx.append(dti[-1:]) expect = Series(arr, index=idx) # GH2763 - return in put dtype if we can result = g.agg(np.sum) assert_series_equal(result, expect) df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype='float64') r = df.groupby(b).agg(np.sum) self.assertEqual(len(r.columns), 10) self.assertEqual(len(r.index), 2593)
def test_custom_grouper(index): dti = index s = Series(np.array([1] * len(dti)), index=dti, dtype='int64') b = TimeGrouper(Minute(5)) g = s.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) b = TimeGrouper(Minute(5), closed='right', label='right') g = s.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) assert g.ngroups == 2593 assert notna(g.mean()).all() # construct expected val arr = [1] + [5] * 2592 idx = dti[0:-1:5] idx = idx.append(dti[-1:]) expect = Series(arr, index=idx) # GH2763 - return in put dtype if we can result = g.agg(np.sum) assert_series_equal(result, expect) df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype='float64') r = df.groupby(b).agg(np.sum) assert len(r.columns) == 10 assert len(r.index) == 2593
def test_custom_grouper(index): dti = index s = Series(np.array([1] * len(dti)), index=dti, dtype="int64") b = Grouper(freq=Minute(5)) g = s.groupby(b) # check all cython functions work funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"] for f in funcs: g._cython_agg_general(f) b = Grouper(freq=Minute(5), closed="right", label="right") g = s.groupby(b) # check all cython functions work funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"] for f in funcs: g._cython_agg_general(f) assert g.ngroups == 2593 assert notna(g.mean()).all() # construct expected val arr = [1] + [5] * 2592 idx = dti[0:-1:5] idx = idx.append(dti[-1:]) expect = Series(arr, index=idx) # GH2763 - return in put dtype if we can result = g.agg(np.sum) tm.assert_series_equal(result, expect) df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64") r = df.groupby(b).agg(np.sum) assert len(r.columns) == 10 assert len(r.index) == 2593
def create_flux_ts(thresh_file, bin_width, area, from_dir='data/thresh/'): # creates a time series of flux data # returns time series object of flux # bin_width is time bin size in seconds, area is area of detector in square meters # read in data from threshold file names = ['id', 'jul', 'RE', 'FE', 'FLUX'] skiprows = f.linesToSkip(from_dir + thresh_file + '.thresh') df = pd.read_csv(from_dir + thresh_file + '.thresh', skiprows=skiprows, names=names, delim_whitespace=True) # sort by date/times instead of julian days df['date/times'] = df['jul'] + df['RE'] df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times'])) df.index = df['date/times'] # create time series, sample according to bin_width # calculate bins in pandas notation bins = str(int(bin_width / 60)) + 'T' flux_ts = pd.Series(data=df['FLUX'], index=df.index) flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area)) flux_ts.name = 'FLUX' # determine offset (basically the bin centers) and add to the index start = df['RE'][0] - 0.5 offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600 offset_minutes = (int(bin_width / 2) + int(start * 86400) - offset_hours * 3600) // 60 offset_seconds = int(bin_width / 2) + int( start * 86400) - offset_hours * 3600 - offset_minutes * 60 offset = offset_hours * Hour() + offset_minutes * Minute( ) + offset_seconds * Second() flux_ts.index += offset # filter out unfilled bins for i in range(len(flux_ts)): if i == 0 and (flux_ts[i] == 0 or flux_ts[i + 1] == 0): flux_ts[i] = 'nan' if i > 0 and i < len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0 or flux_ts[i + 1] == 0): flux_ts[i] = 'nan' if i == len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0): flux_ts[i] = 'nan' flux_ts = flux_ts.interpolate() return flux_ts
def restart(hdfname, newstart): ''' Updates STATE values in HSP2 HDF file to start at later newstart date from computed values. User can extend timeseries by predictive or historic data to continue simulation. In this case, the user must set a new stop date! Parameters ---------- hdfname : str HSP2 HDF5 file. newstart : str (in Datatime format for Timestamp) DateTime for restarting the simulation. Returns ------- None. ''' with HDFStore(hdfname) as store: df = store['CONTROL/OP_SEQUENCE'] delt = df.loc[0, 'INDELT_minutes'] df = store['CONTROL/GLOBAL'] start = Timestamp(df.loc['Start', 'Info']) stop = Timestamp(df.loc['Stop', 'Info']) dates = date_range(start, stop, freq=Minute(delt)) # deterime new start date for restart; previous date if not exact match startindx = dates.get_loc(newstart, method='pad') startdate = dates[startindx] df.loc['Start', 'Info'] = str(startdate) df.to_hdf(hdfname, 'CONTROL/GLOBAL', format='table', data_columns=True) for path in [p[1:] for p in store.keys() if p.startswith('/RESULTS')]: _, x, activity = path.split('/') operation, segment = x.split('_') if (operation, activity) not in states: continue df = store[path][states[operation, activity]] df = df.iloc[startindx, :].to_frame() df.columns = [segment] storepath = f'{operation}/{activity}/STATES' dff = store[storepath] dff.update(df.T) dff.to_hdf(store, storepath, format='table', data_columns=True) return
def flightsUsed(data,WIFIAPTag_list,dateRange,time): data_res = [] for i,wifi in enumerate(WIFIAPTag_list): wifiDf = pd.DataFrame([wifi],columns=['WIFIAPTag']) wifiDf['time'] = time temp = data[data['WIFIAPTag']==wifi].copy() for win in dateRange: wifiDf[str(win)+"minutes"] = 0 if win<0: compare = time+30*win*Minute() temp_ = temp[temp['scheduled_flt_time']>=compare] temp_ = temp_[temp_['scheduled_flt_time']<time] count = len(temp_) wifiDf[str(win)+"minutes"] = count else: compare = time+30*win*Minute() temp_ = temp[temp['scheduled_flt_time']>=time] temp_ = temp_[temp_['scheduled_flt_time']<compare] count = len(temp_) wifiDf[str(win)+"minutes"] = count data_res.append(wifiDf.copy()) res = pd.concat(data_res) res = res.reset_index().drop(['index'],axis=1) return res
def compute_activity_levels(self): if self.raw_data.index.freq != Minute(): #FIXME: if freq | Minute(), we should resample. raise ValueError("Activity cut points haven't been validated for " 'epoch lengths other than 60s') activity_level = pd.Series(index=self.data.index) activity_level[:-1] = pd.cut(self.data.ix[:-1, 'Axis1'], count_bins, right=False, labels=activity_labels) activity_level[~boolify(self.data['awake'], True)] = 'sleep' # can't tell standing from sitting w/o activPAL activity_level[activity_level == 'standing'] = 'sedentary' sedentary = pd.Series(index=self.data.index) sedentary[:-1] = boolify(activity_level[:-1] == 'sedentary') return activity_level, sedentary
def split_data_chunk(data_chunk: pd.DataFrame, data_period, output_path: str): i = 0 while i < len(data_period): start_time = data_period[i] end_time = start_time + Minute(15) try: data_part = data_chunk[start_time:end_time] part_name = str(end_time.date()) + '_' + str( end_time.time()).replace(':', '-')[:-3] file_path = output_path + '\\' + part_name + '.csv' data_part.to_csv(file_path, index=False) except Exception as e: print(e) i += 1 return True
def set_time_right(self, obj, offset_time=Minute(1)): ''' df索引,由"开始时间"转为"结束时间" ''' assert (isinstance(obj, datetime.datetime) or isinstance(obj, pd.Timestamp) or isinstance(obj, pd.DataFrame)), 'obj类型错误' if (isinstance(obj, datetime.datetime) or isinstance(obj, pd.Timestamp)): ret = obj + offset_time else: index_name = obj.index.name obj.index = obj.parallel_apply(lambda o: o.name + offset_time, axis=1) obj.index.rename(index_name, inplace=True) ret = obj return ret
def test_union_not_cacheable(self, sort): rng = date_range("1/1/2000", periods=50, freq=Minute()) rng1 = rng[10:] rng2 = rng[:25] the_union = rng1.union(rng2, sort=sort) if sort is None: tm.assert_index_equal(the_union, rng) else: expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10])) tm.assert_index_equal(the_union, expected) rng1 = rng[10:] rng2 = rng[15:35] the_union = rng1.union(rng2, sort=sort) expected = rng[10:] tm.assert_index_equal(the_union, expected)
def chunk_data(data: pd.DataFrame, data_period, cpus=os.cpu_count()): chunk_size, extra = divmod(len(data_period), cpus * 8) if extra: chunk_size += 1 split_time_iter = iter(data_period) while 1: divide_time = tuple(islice(split_time_iter, chunk_size)) if not divide_time: return i = 0 j = -1 start_time = divide_time[i] end_time = divide_time[j] + Minute(15) try: yield (data[start_time:end_time], divide_time) except Exception as e: print(e)
def test_intersection(self): rng = date_range("1/1/2000", periods=50, freq=Minute()) rng1 = rng[10:] rng2 = rng[:25] the_int = rng1.intersection(rng2) expected = rng[10:25] tm.assert_index_equal(the_int, expected) assert isinstance(the_int, DatetimeIndex) assert the_int.freq == rng.freq the_int = rng1.intersection(rng2.view(DatetimeIndex)) tm.assert_index_equal(the_int, expected) # non-overlapping the_int = rng[:10].intersection(rng[10:]) expected = DatetimeIndex([]) tm.assert_index_equal(the_int, expected)
def monthval(siminfo, monthly): ''' returns value at start of month for all times within the month''' start = siminfo['start'] stop = siminfo['stop'] freq = Minute(siminfo['delt']) months = tile(monthly, stop.year - start.year + 1).astype(float) dr = date_range(start=f'{start.year}-01-01', end=f'{stop.year}-12-31', freq='MS') ts = Series(months, index=dr).resample('D').ffill() if ts.index.freq > freq: # upsample ts = ts.resample(freq).asfreq().ffill() elif ts.index.freq < freq: # downsample ts = ts.resample(freq).mean() return ts.truncate(start, stop).to_numpy()
def test_intersection(self): rng = date_range('1/1/2000', periods=50, freq=Minute()) rng1 = rng[10:] rng2 = rng[:25] the_int = rng1.intersection(rng2) expected = rng[10:25] self.assert_index_equal(the_int, expected) tm.assertIsInstance(the_int, DatetimeIndex) self.assertEqual(the_int.offset, rng.offset) the_int = rng1.intersection(rng2.view(DatetimeIndex)) self.assert_index_equal(the_int, expected) # non-overlapping the_int = rng[:10].intersection(rng[10:]) expected = DatetimeIndex([]) self.assert_index_equal(the_int, expected)
def dayval(siminfo, monthly): '''broadcasts HSPF monthly data onto timeseries at desired freq with HSPF interpolation to day, but constant within day''' start = siminfo['start'] stop = siminfo['stop'] freq = Minute(siminfo['delt']) months = tile(monthly, stop.year - start.year + 1).astype(float) dr = date_range(start=f'{start.year}-01-01', end=f'{stop.year}-12-31', freq='MS') ts = Series(months, index=dr).resample('D').interpolate('time') if ts.index.freq > freq: # upsample ts = ts.resample(freq).ffill() elif ts.index.freq < freq: # downsample ts = ts.resample(freq).mean() return ts.truncate(start, stop).to_numpy()
def data_preprocess(volume_file, volume_file_new, test_volume_file): volume = pd.read_csv(volume_file) volume_new = pd.read_csv(volume_file_new) test_volume = pd.read_csv(test_volume_file) time_window = pd.date_range( start=datetime(2016, 9, 19), end=datetime(2016, 10, 18), freq='20min', closed='left').map( lambda x: '[' + str(x) + ',' + str(x + Minute(20)) + ')') fill_null_dataframe = pd.DataFrame({ 'tollgate_id': len(time_window) * 2 * [1] + len(time_window) * [2] + len(time_window) * 2 * [3], 'direction': len(time_window) * [0] + len(time_window) * [1] + len(time_window) * [0] + len(time_window) * [0] + len(time_window) * [1], 'time_window': np.tile(time_window, 5) }) volume = pd.merge(volume, fill_null_dataframe, how='right').fillna(0).sort_values( ['tollgate_id', 'direction', 'time_window']) #use fill null values volume = pd.concat((volume, volume_new), ignore_index=True) volume['volume'] = volume['volume'].astype('float') volume['tollgate_id'] = volume['tollgate_id'].astype('int') volume['direction'] = volume['direction'].astype('int') volume.index = volume['time_window'].map( lambda x: parse(x.split(',')[0][1:])) volume['id'] = 'T' + volume.tollgate_id.map( str) + 'D' + volume.direction.map(str) test_volume['volume'] = test_volume['volume'].astype('float') test_volume['tollgate_id'] = test_volume['tollgate_id'].astype('int') test_volume['direction'] = test_volume['direction'].astype('int') test_volume.index = test_volume['time_window'].map( lambda x: parse(x.split(',')[0][1:])) test_volume['id'] = 'T' + test_volume.tollgate_id.map( str) + 'D' + test_volume.direction.map(str) return volume, test_volume
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0): """计算下次更新时间 说明: 'S':移动到下一秒 'm':移动到下一分钟 'H':移动到下一小时 'D':移动到下一天 'W':移动到下周一 'M':移动到下月第一天 'Q':下一季度的第一天 将时间调整到指定的hour和minute """ if pd.isnull(last_updated): return MARKET_START if freq == 'S': off = Second() return last_updated + off elif freq == 'm': off = Minute() return last_updated + off elif freq == 'H': off = Hour() return last_updated + off elif freq == 'D': d = BDay(n=1, normalize=True) res = last_updated + d return res.replace(hour=hour, minute=minute, second=second) elif freq == 'W': w = Week(normalize=True, weekday=0) res = last_updated + w return res.replace(hour=hour, minute=minute, second=second) elif freq == 'M': m = MonthBegin(n=1, normalize=True) res = last_updated + m return res.replace(hour=hour, minute=minute, second=second) elif freq == 'Q': q = QuarterBegin(normalize=True, startingMonth=1) res = last_updated + q return res.replace(hour=hour, minute=minute, second=second) else: raise TypeError('不能识别的周期类型,仅接受{}'.format( ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
def test_resample_ohlc(self): s = self.series grouper = TimeGrouper(Minute(5)) expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample('5Min', how='ohlc') self.assertEquals(len(result), len(expect)) self.assertEquals(len(result.columns), 4) xs = result.irow(-2) self.assertEquals(xs['open'], s[-6]) self.assertEquals(xs['high'], s[-6:-1].max()) self.assertEquals(xs['low'], s[-6:-1].min()) self.assertEquals(xs['close'], s[-2]) xs = result.irow(0) self.assertEquals(xs['open'], s[0]) self.assertEquals(xs['high'], s[:5].max()) self.assertEquals(xs['low'], s[:5].min()) self.assertEquals(xs['close'], s[4])
def test_resample_ohlc(self): s = self.series grouper = TimeGrouper(Minute(5), closed='right', label='right') expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample('5Min', how='ohlc') self.assertEquals(len(result), len(expect)) self.assertEquals(len(result.columns), 4) xs = result.irow(-1) self.assertEquals(xs['open'], s[-5]) self.assertEquals(xs['high'], s[-5:].max()) self.assertEquals(xs['low'], s[-5:].min()) self.assertEquals(xs['close'], s[-1]) xs = result.irow(1) self.assertEquals(xs['open'], s[1]) self.assertEquals(xs['high'], s[1:6].max()) self.assertEquals(xs['low'], s[1:6].min()) self.assertEquals(xs['close'], s[5])
def test_resample_basic(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') s = Series(np.random.randn(14), index=rng) result = s.resample('5min', how='mean', closed='right', label='right') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=date_range('1/1/2000', periods=4, freq='5min')) assert_series_equal(result, expected) self.assertEqual(result.index.name, 'index') result = s.resample('5min', how='mean', closed='left', label='right') expected = Series([s[:5].mean(), s[5:10].mean(), s[10:].mean()], index=date_range('1/1/2000 00:05', periods=3, freq='5min')) assert_series_equal(result, expected) s = self.series result = s.resample('5Min', how='last') grouper = TimeGrouper(Minute(5), closed='left', label='left') expect = s.groupby(grouper).agg(lambda x: x[-1]) assert_series_equal(result, expect)
def test_resample_ohlc(series): s = series grouper = TimeGrouper(Minute(5)) expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample('5Min').ohlc() assert len(result) == len(expect) assert len(result.columns) == 4 xs = result.iloc[-2] assert xs['open'] == s[-6] assert xs['high'] == s[-6:-1].max() assert xs['low'] == s[-6:-1].min() assert xs['close'] == s[-2] xs = result.iloc[0] assert xs['open'] == s[0] assert xs['high'] == s[:5].max() assert xs['low'] == s[:5].min() assert xs['close'] == s[4]
def flightsCount(data,WIFIAPTag_list,dateRange,time,spaceData): data_res = [] for i,wifi in enumerate(WIFIAPTag_list): wifiDf = pd.DataFrame([wifi],columns=['WIFIAPTag']) wifiDf['time'] = time temp = data[data['WIFIAPTag']==wifi].copy() wifiDf[str(dateRange)+"minutes_"] = 0 compare = time+30*dateRange*Minute() temp_ = temp[temp['scheduled_flt_time']>=time] temp_ = temp_[temp_['scheduled_flt_time']<compare] temp_['diffUsed'] = [ np.round((x-time).seconds/1800) for x in temp_['time']] temp_ = pd.merge(temp_,spaceTimeCount,on=['flight_ID'],how='left').fillna(0) temp_ = pd.merge(temp_,spaceData,on=['diffUsed'],how='left') count = np.sum(temp_['sum_Times']*temp_['passenger_ID']) wifiDf[str(dateRange)+"minutes_"] = count data_res.append(wifiDf.copy()) res = pd.concat(data_res) res = res.reset_index().drop(['index'],axis=1) return res
def test_resample_ohlc(series): s = series grouper = Grouper(freq=Minute(5)) expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample("5Min").ohlc() assert len(result) == len(expect) assert len(result.columns) == 4 xs = result.iloc[-2] assert xs["open"] == s[-6] assert xs["high"] == s[-6:-1].max() assert xs["low"] == s[-6:-1].min() assert xs["close"] == s[-2] xs = result.iloc[0] assert xs["open"] == s[0] assert xs["high"] == s[:5].max() assert xs["low"] == s[:5].min() assert xs["close"] == s[4]
def slide7(): from pandas.tseries.offsets import Hour, Minute hour = Hour() print hour four_hours = Hour(4) print four_hours print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') print Hour(2) + Minute(30) print pd.date_range('1/1/2000', periods=10, freq='1h30min') ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print ts print ts.shift(2) print ts.shift(-2) print '2 M' print ts.shift(2, freq='M') print '3 D' print ts.shift(3, freq='D') print '1 3D' print ts.shift(1, freq='3D') print '1 90T' print ts.shift(1, freq='90T') print 'shifting dates with offsets' from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011, 11, 17) print now + 3 * Day() print now + MonthEnd() print now + MonthEnd(2) offset = MonthEnd() print offset print offset.rollforward(now) print offset.rollback(now) ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print ts.groupby(offset.rollforward).mean()
def create_position_info(self): """ Create daily profit loss using stock data not using statement data :return: dict """ last_close = self.stocks.last().close if self.date == self.position_set.stop_date and self.position_set.status == 'CLOSE': last_close = self.close_order.net_price stage = self.position_set.get_stage(last_close) status = self.position_set.current_status( new_price=last_close, old_price=self.stocks.reverse()[1].close ) pl_open = (last_close - self.open_order.net_price) * self.open_order.quantity pl_open_pct = round(pl_open / (self.open_order.net_price * self.open_order.quantity) * 100, 2) if self.date == self.start_date: pl_day = (last_close - self.open_order.net_price) * self.open_order.quantity elif self.date == self.stop_date: pl_day = (self.close_order.net_price - self.stocks.reverse()[1].close) * self.open_order.quantity else: pl_day = (last_close - self.stocks.reverse()[1].close) * self.open_order.quantity pl_day_pct = round(pl_day / (self.open_order.net_price * self.open_order.quantity) * 100, 2) return dict( stage_id=stage.id, stage=stage.stage_name, status=status, pl_open=round(pl_open, 2), pl_open_pct=pl_open_pct, pl_day=round(pl_day, 2), pl_day_pct=pl_day_pct, enter_price=self.open_order.net_price, exit_price=self.close_order.net_price if self.close_order else 0.0, quantity=self.open_order.quantity, holding=self.open_order.net_price * self.open_order.quantity, bp_effect=self.position_instruments.last().bp_effect, date=(self.date + Hour(17) + Minute(30)).to_datetime().date(), )
def sarimax_predict(): model, prediction = {}, {} data = ['T1D0'] #, 'T1D1', 'T2D0', 'T3D0', 'T3D1' train_data = pd.read_csv('sarimax_data.csv', index_col=0) in_model_pkl = 'SARIMAX_6_0_1_1_0_1_72_%s.pkl' in_model_path = '../../data/data_after_process/tmp_file' for i in data: model[i] = joblib.load(path.join(in_model_path, in_model_pkl % (i))) # print results[td].summary() print i + ' model start predicting!' prediction[i] = model[i].predict(0, len(train_data) - 1) prediction[i] = prediction[i].map(lambda x: np.round(np.exp(x) - 1, 2)) answer = pd.DataFrame(prediction)['2016-10-25':] answer = pd.concat([ answer.between_time('17:00', '18:40'), answer.between_time('8:00', '9:40') ]).sort_index() answer['time_window'] = answer.index.map( lambda x: '[' + str(x) + ',' + str(x + Minute(20)) + ')') answer = pd.melt(answer, var_name='tollgate_id', value_name='volume', id_vars=['time_window']) answer['direction'] = answer['tollgate_id'].map(lambda d: int(d[3])) answer['tollgate_id'] = answer['tollgate_id'].map(lambda d: int(d[1])) answer = answer[['tollgate_id', 'time_window', 'direction', 'volume']] # import time # version = time.strftime('%Y-%m-%d_%R', time.localtime(time.time())) # answer.to_csv('answer/prediction_'+version+'.csv',float_format='%.2f',header=True,index=False,encoding='utf-8') answer.to_csv('../../answer/prediction_sarimax.csv', float_format='%.2f', header=True, index=False, encoding='utf-8')
def get_data_once(self, stock_code, period, end_time=None, count=1): ''' 获取单个股票的历史数据,限制长度5000条记录 ''' if end_time is None: end_time = datetime.datetime.now() if not (isinstance(count, int) and 0 < count <= 5000): count = 5000 if not self.is_auth(): self.connect_server() df = jqdatasdk.get_bars( security=stock_code, count=count, unit=period, fields=['date', 'open', 'high', 'low', 'close'], include_now=False, end_dt=end_time, fq_ref_date=None, df=True) if df.empty: raise ValueError(f'未能获取数据; {stock_code}, {period}') index_name = 'date' df.set_index(index_name, inplace=True) self.set_time_left(df, Minute(1)) return df
def test_resample_loffset(loffset): # GH 7687 rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') s = Series(np.random.randn(14), index=rng) result = s.resample('5min', closed='right', label='right', loffset=loffset).mean() idx = date_range('1/1/2000', periods=4, freq='5min') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1)) assert_series_equal(result, expected) assert result.index.freq == Minute(5) # from daily dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='D') ser = Series(np.random.rand(len(dti)), dti) # to weekly result = ser.resample('w-sun').last() business_day_offset = BDay() expected = ser.resample('w-sun', loffset=-business_day_offset).last() assert result.index[0] - business_day_offset == expected.index[0]
#---------------------------------------------------------------------- # Offset names ("time rules") and related functions from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli, Week, Micro, MonthEnd, MonthBegin, BMonthBegin, BMonthEnd, YearBegin, YearEnd, BYearBegin, BYearEnd, QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd) _offset_map = { 'D' : Day(), 'B' : BDay(), 'H' : Hour(), 'T' : Minute(), 'S' : Second(), 'L' : Milli(), 'U' : Micro(), None : None, # Monthly - Calendar 'M' : MonthEnd(), 'MS' : MonthBegin(), # Monthly - Business 'BM' : BMonthEnd(), 'BMS' : BMonthBegin(), # Annual - Calendar 'A-JAN' : YearEnd(month=1),