def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(0, 0) etime = time(1, 0) inclusive = inclusive_endpoints_fixture filtered = ts.between_time(stime, etime, inclusive=inclusive) exp_len = 13 * 4 + 1 if inclusive in ["right", "neither"]: exp_len -= 5 if inclusive in ["left", "neither"]: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inclusive in ["left", "both"]: assert t >= stime else: assert t > stime if inclusive in ["right", "both"]: assert t <= etime else: assert t < etime result = ts.between_time("00:00", "01:00") expected = ts.between_time(stime, etime) tm.assert_equal(result, expected) # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(22, 0) etime = time(9, 0) filtered = ts.between_time(stime, etime, inclusive=inclusive) exp_len = (12 * 11 + 1) * 4 + 1 if inclusive in ["right", "neither"]: exp_len -= 4 if inclusive in ["left", "neither"]: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inclusive in ["left", "both"]: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inclusive in ["right", "both"]: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def update(): global itemK,itemBi,itemLine,itemZhongshu dataToNowDf = DataFrame(index=dataToNow.Times,data = dataToNow.Data[0],columns=['price']) dataToNowDf = dataToNowDf.between_time('9:30','11:30').append(dataToNowDf.between_time('13:00','15:00')) a = dataToNowDf.resample('30T',how = {'price':'ohlc'},label='right').dropna() for i in a.iterrows(): data.Times.append(i[0].to_datetime()) data.Data[0].append(i[1]['price']['open']) data.Data[1].append(i[1]['price']['high']) data.Data[2].append(i[1]['price']['low']) data.Data[3].append(i[1]['price']['close']) data.Data[4].append(0) quotes = [] for i in range(len(data.Times)): quotes.append([i, data.Data[0][i], data.Data[3][i], data.Data[2][i], data.Data[1][i]]) chan = Chan(data.Data[0], data.Data[1], data.Data[2], data.Data[3], data.Data[4], data.Times) chan.barsMerge() chan.findFenxing() chan.findBi() chan.findLines() chan.findZhongshus() chan.calculate_ta() a += 1 itemK.set_data(quotes) # itemBi.set_data(chan.bis) # itemLine.set_data(chan.lines) # itemZhongshu.set_data(chan.zhongshus) app.processEvents() ## force complete redraw for every plot
def test_between_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 self.assertEqual(len(filtered), exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assertTrue(t >= stime) else: self.assertTrue(t > stime) if inc_end: self.assertTrue(t <= etime) else: self.assertTrue(t < etime) result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_frame_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(22, 0) etime = time(9, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 self.assertEqual(len(filtered), exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assertTrue((t >= stime) or (t <= etime)) else: self.assertTrue((t > stime) or (t <= etime)) if inc_end: self.assertTrue((t <= etime) or (t >= stime)) else: self.assertTrue((t < etime) or (t >= stime))
def test_between_time_raises(self, frame_or_series): # GH#20725 obj = DataFrame([[1, 2, 3], [4, 5, 6]]) obj = tm.get_obj(obj, frame_or_series) msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex obj.between_time(start_time="00:00", end_time="12:00")
def test_between_time_types(self, frame_or_series): # GH11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") obj = DataFrame({"A": 0}, index=rng) obj = tm.get_obj(obj, frame_or_series) msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" with pytest.raises(ValueError, match=msg): obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_between_time(self, close_open_fixture): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) inc_start, inc_end = close_open_fixture filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert t >= stime else: assert t > stime if inc_end: assert t <= etime else: assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_frame_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(22, 0) etime = time(9, 0) filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inc_end: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def test_between_time(self, close_open_fixture): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) inc_start, inc_end = close_open_fixture filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert t >= stime else: assert t > stime if inc_end: assert t <= etime else: assert t < etime result = ts.between_time("00:00", "01:00") expected = ts.between_time(stime, etime) tm.assert_frame_equal(result, expected) # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(22, 0) etime = time(9, 0) filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inc_end: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def test_between_time_incorr_arg_inclusive(self): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) inclusive = "bad_string" msg = "Inclusive has to be either 'both', 'neither', 'left' or 'right'" with pytest.raises(ValueError, match=msg): ts.between_time(stime, etime, inclusive=inclusive)
def test_between_time_incompatiable_args_given(self, include_start, include_end): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) msg = ( "Deprecated arguments `include_start` and `include_end` cannot be " "passed if `inclusive` has been given." ) with pytest.raises(ValueError, match=msg): ts.between_time(stime, etime, include_start, include_end, inclusive="left")
def test_between_time_types(self): # GH11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" with pytest.raises(ValueError, match=msg): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({"A": 0}, index=rng) with pytest.raises(ValueError, match=msg): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError, match=msg): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') with pytest.raises(ValueError): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) with pytest.raises(ValueError): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_all2(self): # more sophisticated comparison of integer vs. # time-based windowing df = DataFrame({'B': np.arange(50)}, index=pd.date_range('20130101', periods=50, freq='H') ) # in-range data dft = df.between_time("09:00", "16:00") r = dft.rolling(window='5H') for f in ['sum', 'mean', 'count', 'median', 'std', 'var', 'kurt', 'skew', 'min', 'max']: result = getattr(r, f)() # we need to roll the days separately # to compare with a time-based roll # finally groupby-apply will return a multi-index # so we need to drop the day def agg_by_day(x): x = x.between_time("09:00", "16:00") return getattr(x.rolling(5, min_periods=1), f)() expected = df.groupby(df.index.day).apply( agg_by_day).reset_index(level=0, drop=True) tm.assert_frame_equal(result, expected)
def test_between_time_axis(self, axis): # issue 8839 rng = date_range("1/1/2000", periods=100, freq="10min") ts = DataFrame(np.random.randn(len(rng), len(rng))) stime, etime = ("08:00:00", "09:00:00") exp_len = 7 if axis in ["index", 0]: ts.index = rng assert len(ts.between_time(stime, etime)) == exp_len assert len(ts.between_time(stime, etime, axis=0)) == exp_len if axis in ["columns", 1]: ts.columns = rng selected = ts.between_time(stime, etime, axis=1).columns assert len(selected) == exp_len
def make_timeslot_oneday(data: pd.DataFrame, sta_order_start=1, sta_order_end=23, slot_length=15) -> pd.DataFrame: start = pd.Timedelta(minutes=0) maxtime = pd.Timedelta(days=1) date_day = data.iloc[0]['sum_time'] date_day = str(date_day)[:11] oneday = pd.DataFrame() while start < maxtime: start_str = str(start)[-8:] start = start + pd.Timedelta(minutes=slot_length) end_str = str(start)[-8:] oneslot = data.between_time(start_str, end_str, include_end=False) sta_sum = dict() sta_count = dict() for row in oneslot.iterrows(): so = '%02d' % (int(row[1]['sta_order'])) if so in sta_sum.keys(): sta_sum[so] += row[1]['pf'] sta_count[so] += 1 else: sta_sum[so] = row[1]['pf'] sta_count[so] = 1 #for i in sta_sum.keys(): # sta_sum[i]=round(sta_sum[i]*1.0/sta_count[i],2) if len(sta_sum.keys()) > 0: sta_sum['start_time'] = date_day + start_str oneday = oneday.append(sta_sum, ignore_index=True) print('\r\twork done:', date_day + start_str, end='') start_time = oneday.pop('start_time') oneday.insert(0, 'start_time', start_time) return oneday
def test_all2(self, arithmetic_win_operators): f = arithmetic_win_operators # more sophisticated comparison of integer vs. # time-based windowing df = DataFrame({"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H")) # in-range data dft = df.between_time("09:00", "16:00") r = dft.rolling(window="5H") result = getattr(r, f)() # we need to roll the days separately # to compare with a time-based roll # finally groupby-apply will return a multi-index # so we need to drop the day def agg_by_day(x): x = x.between_time("09:00", "16:00") return getattr(x.rolling(5, min_periods=1), f)() expected = (df.groupby(df.index.day).apply(agg_by_day).reset_index( level=0, drop=True)) tm.assert_frame_equal(result, expected)
def test_between_time_axis(self, axis): # issue 8839 rng = date_range('1/1/2000', periods=100, freq='10min') ts = DataFrame(np.random.randn(len(rng), len(rng))) stime, etime = ('08:00:00', '09:00:00') exp_len = 7 if axis in ['index', 0]: ts.index = rng assert len(ts.between_time(stime, etime)) == exp_len assert len(ts.between_time(stime, etime, axis=0)) == exp_len if axis in ['columns', 1]: ts.columns = rng selected = ts.between_time(stime, etime, axis=1).columns assert len(selected) == exp_len
def make_timeslot_oneday(data:pd.DataFrame,slot_length=15)->pd.DataFrame: start=pd.Timedelta(minutes=0) maxtime=pd.Timedelta(days=1) date_day=data.iloc[0]['arrival'] date_day=str(date_day)[:11]#stringize and slice 0:11#获得日期信息 #start=str(start).replace('0 days ','') oneday=pd.DataFrame() while start<maxtime: start_str=str(start)[-8:]#从00:00:00开始 00:00:00结束 start=start+pd.Timedelta(minutes=slot_length)#一个时隙时间长度 end_str=str(start)[-8:] #obtain one slot of data oneslot=data.between_time(start_str,end_str,include_end=False) sta_sum=dict() sta_count=dict() for row in oneslot.iterrows(): so='%02d' %(int(row[1]['sta_order'])) if so in sta_sum.keys(): sta_sum[so]+=row[1]['sta_time'] sta_count[so]+=1 else: sta_sum[so]=row[1]['sta_time'] sta_count[so]=1 for i in sta_sum.keys():#用平均法计算时隙内站点时间,round 控制小数位数 sta_sum[i]=round(sta_sum[i]*1.0/sta_count[i],2) if len(sta_sum.keys())>0:#该时隙没有key代表该时隙为空 sta_sum['start_time']=date_day+start_str#添加时隙标签 oneday=oneday.append(sta_sum,ignore_index=True) print('\twork done:',date_day+start_str,'\r',end='') #print(' ') return oneday
def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') msg = (r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\]" " to a time") with pytest.raises(ValueError, match=msg): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) with pytest.raises(ValueError, match=msg): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError, match=msg): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def make_mask(sc: Scenario, df: pd.DataFrame): mask = pd.Series(np.zeros(df.values.shape[0], dtype=np.bool_), index=df.index) for mask_rule in sc.home_mask_rules: if mask_rule.get('type') == 'time': begin = mask_rule.get('begin', '23:55') end = mask_rule.get('end', '00:00') mask[df.between_time(begin, end).index] = True return mask.values
def test_between_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame(np.random.randn(len(index), 5), index=index) bkey = slice(time(13, 0, 0), time(14, 0, 0)) binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] result = df.between_time(bkey.start, bkey.stop) expected = df.loc[bkey] expected2 = df.iloc[binds] tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result, expected2) assert len(result) == 12
def test_between_time_formats(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), ("0200am", "0230am"), ("2:00:00", "2:30:00"), ("020000", "023000"), ("2:00:00am", "2:30:00am"), ("020000am", "023000am")] expected_length = 28 for time_string in strings: assert len(ts.between_time(*time_string)) == expected_length
def _get_power_spent_by_type(power_data: pd.DataFrame, times_peak_types: pd.DataFrame): power_spent_by_type = {} for index, period in times_peak_types.iterrows(): power_spent_data = power_data.between_time(period[TariffPeriods.COLUMN_NAME_START_PERIOD], period[TariffPeriods.COLUMN_NAME_END_PERIOD]) if power_spent_data.shape[0] > 0: power_spent = power_spent_data.sum().values[0] / (4 * 1000) peak_type = period[TariffPeriods.COLUMN_NAME_TARIFF_TYPE] if peak_type not in power_spent_by_type: power_spent_by_type[peak_type] = 0 power_spent_by_type[peak_type] += power_spent return power_spent_by_type
def test_between_time_same_functionality_old_and_new_args(self): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) match = ("`include_start` and `include_end` " "are deprecated in favour of `inclusive`.") result = ts.between_time(stime, etime) expected = ts.between_time(stime, etime, inclusive="both") tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=match): result = ts.between_time(stime, etime, include_start=False) expected = ts.between_time(stime, etime, inclusive="right") tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=match): result = ts.between_time(stime, etime, include_end=False) expected = ts.between_time(stime, etime, inclusive="left") tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=match): result = ts.between_time(stime, etime, include_start=False, include_end=False) expected = ts.between_time(stime, etime, inclusive="neither") tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning, match=match): result = ts.between_time(stime, etime, include_start=True, include_end=True) expected = ts.between_time(stime, etime, inclusive="both") tm.assert_frame_equal(result, expected)
def test_between_time_warn(self, include_start, include_end, frame_or_series): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(0, 0) etime = time(1, 0) match = ("`include_start` and `include_end` " "are deprecated in favour of `inclusive`.") with tm.assert_produces_warning(FutureWarning, match=match): _ = ts.between_time(stime, etime, include_start, include_end)
def prepare_dataset(data: pd.DataFrame, slot_length=15) -> pd.DataFrame: data.set_index(['start_time'], drop=False, inplace=True) data.drop(columns=['01', '02', '03'], inplace=True) #cause o1-03 has been droped in tt data data = data.between_time('05:15:00', '23:00:00') #fill up possible timeslot loss d181201 = pd.to_datetime('2018-12-01 05:15:00') d181230 = pd.to_datetime('2018-12-30 23:00:00') d190111 = pd.to_datetime('2019-01-11 05:15:00') d190131 = pd.to_datetime('2019-01-31 23:00:00') d190301 = pd.to_datetime('2019-03-01 05:15:00') d190320 = pd.to_datetime('2019-03-20 23:00:00') index_data = data.loc[:, 'start_time'] daysta = d181201 while daysta <= d181230: time = daysta dayend = str(daysta).replace('05:15:00', '23:00:00') dayend = pd.to_datetime(dayend) daysta = daysta + pd.Timedelta(days=1) while time <= dayend: if time not in index_data: data = data.append({'start_time': time}, ignore_index=True) #print(time) time += pd.Timedelta(minutes=slot_length) daysta = d190111 while daysta <= d190131: time = daysta dayend = str(daysta).replace('05:15:00', '23:00:00') dayend = pd.to_datetime(dayend) daysta = daysta + pd.Timedelta(days=1) while time <= dayend: if time not in index_data: data = data.append({'start_time': time}, ignore_index=True) #print(time) time += pd.Timedelta(minutes=slot_length) daysta = d190301 while daysta <= d190320: time = daysta dayend = str(daysta).replace('05:15:00', '23:00:00') dayend = pd.to_datetime(dayend) daysta = daysta + pd.Timedelta(days=1) while time <= dayend: if time not in index_data: data = data.append({'start_time': time}, ignore_index=True) #print(time) time += pd.Timedelta(minutes=slot_length) data.sort_values(['start_time'], inplace=True, ignore_index=True) data.fillna(value=0, inplace=True) data.to_csv('pf_dataset.csv', index=False) return data
def select_hour(d: pd.DataFrame) -> DataFrame: """""" """ ["t_date_hour"], ["rtn"] """ print(d.head(2)) d = d.between_time(datetime.time(15, 00, 00), datetime.time(15, 00, 00), include_start=True, include_end=True) print(d.head()) # d.reindex(d.index.date.tolist(), inplace=True) d.index = d.index.date print(d.head()) """ ["t_date"], ["rtn"] """ return d
def test_between_time_formats(self): # GH11818 _skip_if_has_locale() rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), ("0200am", "0230am"), ("2:00:00", "2:30:00"), ("020000", "023000"), ("2:00:00am", "2:30:00am"), ("020000am", "023000am")] expected_length = 28 for time_string in strings: self.assertEqual(len(ts.between_time(*time_string)), expected_length, "%s - %s" % time_string)
def clean_non_trading_times(df: pd.DataFrame) -> pd.DataFrame: """ :param df: Data with weekends and holidays :return trading_data: """ # Weekends go out df = df[df['DateTime'].dt.weekday < 5].reset_index(drop=True) df = df.set_index('DateTime') # Remove non trading hours df = df.between_time('9:00', '16:00') df.reset_index(inplace=True) # Holiday days we want to delete from data_slice holidays = Calendar().holidays(start='2000-01-01', end='2020-12-31') m = df['DateTime'].isin(holidays) clean_df = df[~m].copy() trading_data = clean_df.fillna(method='ffill') return trading_data
def test_between_time_formats(self, frame_or_series): # GH#11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) strings = [ ("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), ("0200am", "0230am"), ("2:00:00", "2:30:00"), ("020000", "023000"), ("2:00:00am", "2:30:00am"), ("020000am", "023000am"), ] expected_length = 28 for time_string in strings: assert len(ts.between_time(*time_string)) == expected_length
def remove_faulty_data(df: pd.DataFrame, start: str, end: str, filename: str) -> pd.DataFrame: """Removes faulty data at a known time interval specified through a starting time and ending time. The input time values can either be in a known string format or as datetime.time() objects.""" if (type(start) == str): start = to_datetime(start) if (type(end) == str): end = to_datetime(end) # Remove faulty data from df: # (Using end time before start time excludes the interval in-between) df_filtered = df.between_time(start_time=end, end_time=start, include_start=False, include_end=False) # Number of deleted rows due to faulty data: n_deleted_rows = df.shape[0] - df_filtered.shape[0] print(f"{n_deleted_rows} rows of faulty data between {start} and {end} " f"succesfully removed from '{filename}'.") return df_filtered
def test_between_time_axis_raises(self, axis): # issue 8839 rng = date_range("1/1/2000", periods=100, freq="10min") mask = np.arange(0, len(rng)) rand_data = np.random.randn(len(rng), len(rng)) ts = DataFrame(rand_data, index=rng, columns=rng) stime, etime = ("08:00:00", "09:00:00") msg = "Index must be DatetimeIndex" if axis in ["columns", 1]: ts.index = mask with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime) with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime, axis=0) if axis in ["index", 0]: ts.columns = mask with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime, axis=1)
def test_between_time_axis_raises(self, axis): # issue 8839 rng = date_range('1/1/2000', periods=100, freq='10min') mask = np.arange(0, len(rng)) rand_data = np.random.randn(len(rng), len(rng)) ts = DataFrame(rand_data, index=rng, columns=rng) stime, etime = ('08:00:00', '09:00:00') msg = "Index must be DatetimeIndex" if axis in ['columns', 1]: ts.index = mask with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime) with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime, axis=0) if axis in ['index', 0]: ts.columns = mask with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime, axis=1)
def get_mask(self, df: pd.DataFrame): mask = HomeMask.empty(df) mask[df.between_time(self.begin, self.end).index] = True return mask