def GetEndDateList(data, freq, trim_end=False): ''' trim主要用于日度数据resample成低频数据 :param data: :param freq: :param trim_end: :return: ''' if freq == 'M': date_list = data.index.where( data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) elif freq == 'W': week_day = 5 # 0-6分别对应周一至周日 date_list = data.index.where( data.index == ((data.index + Week(weekday=week_day)) - Week()), data.index + Week(weekday=week_day)) elif freq == 'Y': date_list = data.index.where( data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) if trim_end: return sorted(set(date_list))[:-1] else: return sorted(set(date_list))
def GetEndDateList(data, freq, trim_end=False): ''' trim主要用于日度数据 :param data: :param freq: :param trim_end: :return: ''' if freq=='M': date_list=data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) #date_list=pd.to_datetime(date_list.astype(str),format='%Y%m')+MonthEnd() elif freq=='W': week_day = 5 # 0-6分别对应周一至周日 date_list = data.index.where(data.index == ((data.index + Week(weekday=week_day)) - Week()), data.index + Week(weekday=week_day)) #date_list=pd.to_datetime(date_list.astype(str).str.pad(7,side='right',fillchar='6'),format='%Y%W%w') elif freq=='Y': date_list = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) #date_list = pd.to_datetime(date_list.astype(str), format='%Y') + YearEnd() if trim_end: return sorted(set(date_list))[:-1] else: return sorted(set(date_list))
def resample_index(dat, to_freq): ''' 例如: 20180808 -> 20180831 20180809 -> 20180831 注意: 1、使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错; 2、此方法会掩盖真实交易日期(全都转换为自然年月末尾值) :param dat: :param to_freq: :return: ''' data = dat.copy() if to_freq == 'M': data.index = data.index.where( data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) elif to_freq == 'W': # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误 week_day = 5 #0-6分别对应周一至周日 data.index = data.index.where( data.index == ((data.index + Week(weekday=week_day)) - Week()), data.index + Week(weekday=week_day)) elif to_freq == 'Y': data.index = data.index.where( data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) return data
def get_data(self): """ 基于现金的盈利能力 = 营业利润 + 应收账款的减少 + 库存减少 + 应付账款的增长 + 应计负债的增加 :return: """ date = self.date # 提取近两年的应收账款,库存,应付账款,应计负债 date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) pre_year_date1 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date1 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(3) # 提取营业利润 ebit_oper = w.wss( self.stockcodes, "wgsd_ebit_oper", "unit=1;rptDate=" + pre_year_date.strftime("%Y-%m-%d") + ";rptType=1;currencyType=") acct_rcv = w.wss( self.stockcodes, "acct_rcv", "unit=1;rptDate=" + pre_year_date.strftime("%Y-%m-%d") + ";rptType=1") acct_rcv1 = w.wss( self.stockcodes, "acct_rcv", "unit=1;rptDate=" + pre_year_date1.strftime("%Y-%m-%d") + ";rptType=1") inventory = w.wss( self.stockcodes, "inventories", "unit=1;rptDate=" + pre_year_date.strftime("%Y-%m-%d") + ";rptType=1") inventory1 = w.wss( self.stockcodes, "inventories", "unit=1;rptDate=" + pre_year_date1.strftime("%Y-%m-%d") + ";rptType=1") acc_payable = w.wss( self.stockcodes, "acct_payable", "unit=1;rptDate=" + pre_year_date.strftime("%Y-%m-%d") + ";rptType=1") acc_payable1 = w.wss( self.stockcodes, "acct_payable", "unit=1;rptDate=" + pre_year_date1.strftime("%Y-%m-%d") + ";rptType=1") if any([ ebit_oper.ErrorCode, acct_rcv.ErrorCode, acct_rcv1.ErrorCode, inventory.ErrorCode, inventory1.ErrorCode, acc_payable.ErrorCode, acc_payable1.ErrorCode ]): raise Exception("数据提取异常") cashbasedoperprofit = np.array(ebit_oper) - ( np.array(acct_rcv) - np.array(acct_rcv1)) - ( np.array(inventory) - np.array(inventory1)) + ( np.array(acc_payable) - np.array(acc_payable1)) deflate_f = FactorsZoo.deflate_factor(date, self.stockcodes, "ME") cashprofit = cashbasedoperprofit / np.array(deflate_f) return cashprofit.tolist()
def generate_calendar(year, drop_index=False): ''' Simple function to generate a calendar containing US holidays, weekdays and holiday weeks. ''' from pandas.tseries.offsets import YearEnd from pandas.tseries.holiday import USFederalHolidayCalendar start_date = pd.to_datetime('1/1/'+str(year)) end_date = start_date + YearEnd() DAT = pd.date_range(str(start_date), str(end_date), freq='D') MO = [d.strftime('%B') for d in DAT] holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date) cal_df = pd.DataFrame({'date':DAT, 'month':MO}) cal_df['year'] = [format(d, '%Y') for d in DAT] cal_df['weekday'] = [format(d, '%A') for d in DAT] cal_df['is_weekday'] = cal_df.weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday']) cal_df['is_weekday'] = cal_df['is_weekday'].astype(int) cal_df['is_holiday'] = cal_df['date'].isin(holidays) cal_df['is_holiday'] = cal_df['is_holiday'].astype(int) cal_df['is_holiday_week'] = cal_df.is_holiday.rolling(window=7,center=True,min_periods=1).sum() cal_df['is_holiday_week'] = cal_df['is_holiday_week'].astype(int) if not drop_index: cal_df.set_index('date', inplace=True) return cal_df
def mktimerange( time_resolution: TimeResolution, date_from: Union[datetime, str], date_to: Union[datetime, str] = None) -> Tuple[Timestamp, Timestamp]: """ Compute appropriate time ranges for monthly and annual time resolutions. This takes into account to properly floor/ceil the date_from/date_to values to respective "begin of month/year" and "end of month/year" values. Args: time_resolution: time resolution as enumeration date_from: datetime string or object date_to: datetime string or object Returns: Tuple of two Timestamps: "date_from" and "date_to" """ if date_to is None: date_to = date_from if time_resolution == TimeResolution.ANNUAL: date_from = pd.to_datetime(date_from) - YearBegin(1) date_to = pd.to_datetime(date_to) + YearEnd(1) elif time_resolution == TimeResolution.MONTHLY: date_from = pd.to_datetime(date_from) - MonthBegin(1) date_to = pd.to_datetime(date_to) + MonthEnd(1) else: raise NotImplementedError( "mktimerange only implemented for annual and monthly time ranges") return date_from, date_to
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, **kwds): object.__setattr__(self, "n", n) object.__setattr__(self, "normalized", normalize) self.kwds.update(kwds) object.__setattr__(self, "offset", timedelta(0)) object.__setattr__(self, "month", self._default_month) try: kwds.pop('month') except Exception as e: pass object.__setattr__( self, "cbday", CustomBusinessDay(n=1, normalize=normalize, weekmask=weekmask, holidays=holidays, calendar=calendar, **kwds)) self.kwds['calendar'] = self.cbday.calendar object.__setattr__(self, "y_offset", YearEnd(1))
def merge_ccm_comp(ccm, comp): ccm1 = pd.merge(comp[['gvkey', 'datadate', 'be', 'count']], ccm, how='left', on=['gvkey']) ccm1['yearend'] = ccm1['datadate'] + YearEnd(0) ccm1['jdate'] = ccm1['yearend'] + MonthEnd(6) # set link date bounds ccm2 = ccm1[(ccm1['jdate'] >= ccm1['linkdt']) & (ccm1['jdate'] <= ccm1['linkenddt'])] ccm2 = ccm2[[ 'gvkey', 'permno', 'datadate', 'yearend', 'jdate', 'be', 'count' ]] # keep the lastest be per gvkey, permno, jdate ccm2 = pd.merge(ccm2, ccm2.groupby(['gvkey', 'permno', 'jdate']).datadate.max().reset_index(), on=['gvkey', 'permno', 'jdate', 'datadate'], how='inner') # if several gvkeys for the same permno, keep the gvkey with the highest BE value ccm2 = pd.merge(ccm2, ccm2.groupby(['permno', 'jdate']).be.max().reset_index(), on=['permno', 'jdate', 'be'], how='inner') return ccm2
def generate_calendar(year): from pandas.tseries.offsets import YearEnd from pandas.tseries.holiday import USFederalHolidayCalendar start_date = pd.to_datetime('1/1/'+str(year)) end_date = start_date + YearEnd() DAT = pd.date_range(str(start_date), str(end_date), freq='D') WK = [d.strftime('%U') for d in DAT] MO = [d.strftime('%B') for d in DAT] holidays = USFederalHolidayCalendar().holidays(start=start_date, end=end_date) DAYZ = pd.DataFrame({'Date':DAT, 'WeekNumber':WK, 'Month':MO}) DAYZ['Year'] = [format(d, '%Y') for d in DAT] DAYZ['Weekday'] = [format(d, '%A') for d in DAT] DAYZ['DOTM'] = [format(d, '%d') for d in DAT] DAYZ['IsWeekday'] = DAYZ.Weekday.isin(['Monday','Tuesday','Wednesday','Thursday','Friday']) DAYZ['IsProductionDay'] = DAYZ.Weekday.isin(['Tuesday','Wednesday','Thursday','Friday']) last_biz_day = [str(format(dat, '%Y-%m-%d')) for dat in pd.date_range(start_date, end_date, freq='BM')] DAYZ['LastSellingDayOfMonth'] = [dat in last_biz_day for dat in DAYZ['Date'].astype(str)] DAYZ.loc[DAYZ.WeekNumber.isin(['00','01','02','03','04','05','06','07','08','09','50','51','52','53']), 'Season'] = 'Winter' DAYZ.loc[DAYZ.WeekNumber.isin(['10','11','12','13','14','15','16','17','18','19','20','21','22']), 'Season'] = 'Spring' DAYZ.loc[DAYZ.WeekNumber.isin(['23','24','25','26','27','28','29','30','31','32','33','34','35']), 'Season'] = 'Summer' DAYZ.loc[DAYZ.WeekNumber.isin(['36','37','38','39','40','41','42','43','44','45','46','47','48','49']), 'Season'] = 'Autumn' DAYZ['Holiday'] = DAYZ.Date.isin(holidays) DAYZ['HolidayWeek'] = DAYZ['Holiday'].rolling(window=7,center=True,min_periods=1).sum() DAYZ['ShipWeek'] = ['A' if int(wk) % 2 == 0 else 'B' for wk in WK] DAYZ.reset_index(drop=True, inplace=True) return DAYZ
class TestYearEnd(Base): _offset = YearEnd def test_misspecified(self): with pytest.raises(ValueError, match="Month must go from 1 to 12"): YearEnd(month=13) offset_cases = [] offset_cases.append((YearEnd(), { datetime(2008, 1, 1): datetime(2008, 12, 31), datetime(2008, 6, 30): datetime(2008, 12, 31), datetime(2008, 12, 31): datetime(2009, 12, 31), datetime(2005, 12, 30): datetime(2005, 12, 31), datetime(2005, 12, 31): datetime(2006, 12, 31) })) offset_cases.append((YearEnd(0), { datetime(2008, 1, 1): datetime(2008, 12, 31), datetime(2008, 6, 30): datetime(2008, 12, 31), datetime(2008, 12, 31): datetime(2008, 12, 31), datetime(2005, 12, 30): datetime(2005, 12, 31) })) offset_cases.append((YearEnd(-1), { datetime(2007, 1, 1): datetime(2006, 12, 31), datetime(2008, 6, 30): datetime(2007, 12, 31), datetime(2008, 12, 31): datetime(2007, 12, 31), datetime(2006, 12, 29): datetime(2005, 12, 31), datetime(2006, 12, 30): datetime(2005, 12, 31), datetime(2007, 1, 1): datetime(2006, 12, 31) })) offset_cases.append((YearEnd(-2), { datetime(2007, 1, 1): datetime(2005, 12, 31), datetime(2008, 6, 30): datetime(2006, 12, 31), datetime(2008, 12, 31): datetime(2006, 12, 31) })) @pytest.mark.parametrize('case', offset_cases) def test_offset(self, case): offset, cases = case for base, expected in cases.items(): assert_offset_equal(offset, base, expected) on_offset_cases = [(YearEnd(), datetime(2007, 12, 31), True), (YearEnd(), datetime(2008, 1, 1), False), (YearEnd(), datetime(2006, 12, 31), True), (YearEnd(), datetime(2006, 12, 29), False)] @pytest.mark.parametrize('case', on_offset_cases) def test_onOffset(self, case): offset, dt, expected = case assert_onOffset(offset, dt, expected)
def _time_format(self, start: str, end: str, freq='d'): '''轉換日期格式''' if freq == 'd': pass elif freq == 'm': start = pd.to_datetime(start, format='%Y%m') + MonthEnd(1) end = pd.to_datetime(end, format='%Y%m') + MonthEnd(1) elif freq == 'q': start = start[0:4] + start[4:6].replace('0', 'Q') start = pd.to_datetime(start) + QuarterEnd(1) end = end[0:4] + end[4:6].replace('0', 'Q') end = pd.to_datetime(end) + QuarterEnd(1) elif freq == 'y': start = pd.to_datetime(start) + YearEnd(1) end = pd.to_datetime(end) + YearEnd(1) return start, end
def _split_by_year(tile, time_dim='time'): start_range = tile.sources[time_dim][0].data end_range = tile.sources[time_dim][-1].data for date in pd.date_range(start=YearBegin(normalize=True).rollback(start_range), end=end_range, freq='AS', normalize=True): sources_slice = tile.sources.loc[{time_dim: slice(date, YearEnd(normalize=True).rollforward(date))}] year_str = '{0:%Y}'.format(date) yield year_str, Tile(sources=sources_slice, geobox=tile.geobox)
def resample_index(dat, to_freq): ''' 使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错 :param data: :param to_freq: :return: ''' data=dat.copy() if to_freq=='M': data.index = data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) elif to_freq=='W': # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误 week_day=5 #0-6分别对应周一至周日 data.index=data.index.where(data.index==((data.index+Week(weekday=week_day))-Week()),data.index+Week(weekday=week_day)) elif to_freq=='Y': data.index = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) return data
def get_data(self): """ :return: """ date = self.date date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date = pre_year_date.strftime("%Y") dividend_data = w.wss(self.stockcodes, "div_divpct_3yearaccu", "year=" + pre_year_date) if dividend_data.ErrorCode != 0: print("数据提取异常") raise Exception("数据提取异常") dividend = FactorsZoo.check_data(dividend_data.Data[0]) return dividend
def eoy(d, offset=0): """Unmodified end-of-year. Returns the last date of year for the same year as in date d The offset parameter represents the number of years that will be added (if offset > 0) or subtracted (if offset < 0) to input date d. This is especially useful for offset = -1, which gives you the EOY of previous year, for example. """ d = to_datetime(d) # As in the case of the EOM, we leave the offset to a different # function (see comments of EOM function) return d + DateOffset(years=offset) + YearEnd(0)
def select_stock(self): """ 股票池选择标准如下: 1:剔除当期*ST,ST个股 2:剔除当期停牌的个股,剔除上市未满四年的个股 3:剔除当期涨停的股票 4:近三年经营活动现金流为正,ROE>10 OR FCF/销售收入>5 ,盈利增长速度协调一致 5:近三年营业利润为正,且发行在外的总股本增长不明显 :return: """ stockdata = {} # 剔除*ST,ST stock = w.wset("sectorconstituent", "date=" + self.date + ";sectorid=a001010f00000000") stockdata['Codes'] = stock.Data[1] # 剔除当期停牌的个股 status = w.wss(stockdata['Codes'], "trade_status", "tradeDate=" + self.date) stockdata['status'] = status.Data[0] df = DataFrame(stockdata) df = df[df['status'] == u'交易'] # 剔除涨停的股票 maxud = w.wss(df['Codes'].values.tolist(), "maxupordown", "tradeDate=" + self.date) df['maxud'] = maxud.Data[0] df = df[df['maxud'] < 1] # 剔除上市未满三年的股票 ipo_days = w.wss(df['Codes'].values.tolist(), "ipo_listdays", "tradeDate=" + self.date) df['ipo_days'] = ipo_days.Data[0] df = df[df['ipo_days'] > 4 * 365] # 股票池标准 date = self.date date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) pre_year_date1 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date2 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(3) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date1 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(3) pre_year_date2 = datetime.strptime(date, "%Y-%m-%d") - YearEnd(4) # roe > 5, pe > 0, inc > 0 roe = w.wss(df['Codes'].values.tolist(), "roe", "rptDate=" + pre_year_date.strftime("%Y-%m-%d")) df['roe'] = roe.Data[0] pe = w.wss(df['Codes'].values.tolist(), "pe","tradeDate=" + pre_year_date.strftime("%Y-%m-%d")+ ";ruleType=10") df['pe'] = pe.Data[0] inc = w.wss(df['Codes'].values.tolist(), "wgsd_net_inc","unit=1;rptDate="+pre_year_date.strftime("%Y-%m-%d")+";rptType=1;currencyType=") df['net_inc'] = inc.Data[0] df = df[df['roe'] > 5] df = df[df['pe'] > 0] df = df[df['net_inc'] > 0] stockcodes = df['Codes'].values.tolist() return stockcodes
def get_data(self): """ :return: """ date = self.date date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date = pre_year_date.strftime("%Y-%m-%d") deductedprofit_data = w.wss(self.stockcodes, "deductedprofit", "unit=1;rptDate=" + pre_year_date) if deductedprofit_data.ErrorCode != 0: print("数据提取异常") raise Exception("数据提取异常") deductedprofit = FactorsZoo.check_data(deductedprofit_data.Data[0]) return deductedprofit
def calculateTTMValue(in_df, code): in_df_date = in_df['report_type'].map(lambda x: pd.to_datetime(x)) df = pd.read_sql_query( "select code,report_type,yysr,jlr,lrze,kjlr,zzc,gdqy,jyjxjl,mgsy,roe,mgjyxjl,mgjzc,mgsy_ttm,mgjyxjl_ttm \ from hexun_finance_basic \ where code=%(name)s", db.engine, params={'name': code}) df = df.append(in_df) i = df['report_type'].map(lambda x: pd.to_datetime(x)) df3 = df.set_index(i) for index, row in df3.iterrows(): if row.mgjyxjl_ttm is None or row.mgjyxjl_ttm == '': # 去年年底 lastYearEnd = YearEnd().rollback(index) # offset = offset.strftime('%Y-%m-%d') lastYearQuart = index - pd.DateOffset(months=12) app.logger.debug( index.strftime('%Y-%m-%d') + ':' + lastYearEnd.strftime('%Y-%m-%d') + ':' + lastYearQuart.strftime('%Y-%m-%d')) try: if index.quarter != 4: n_mgsy = float(df3.loc[lastYearEnd].mgsy) - float( df3.loc[lastYearQuart].mgsy) + float(row.mgsy) n_mgjyxjl = float(df3.loc[lastYearEnd].mgjyxjl) - float( df3.loc[lastYearQuart].mgjyxjl) + float(row.mgjyxjl) else: n_mgsy = float(row.mgsy) n_mgjyxjl = float(row.mgjyxjl) df3.mgsy_ttm.loc[index] = n_mgsy df3.mgjyxjl_ttm.loc[index] = n_mgjyxjl except Exception, ex: app.logger.warn(traceback.format_exc()) df3.mgsy_ttm.loc[index] = float(row.mgsy) df3.mgjyxjl_ttm.loc[index] = float(row.mgjyxjl) #数据位截取 v_mgsy_ttm = round(df3.mgsy_ttm.loc[index], 2) v_mgjyxjl_ttm = round(df3.mgjyxjl_ttm.loc[index], 2) #零值处理 v_mgsy_ttm = 0.01 if v_mgsy_ttm == 0 else v_mgsy_ttm v_mgjyxjl_ttm = 0.01 if v_mgjyxjl_ttm == 0 else v_mgjyxjl_ttm df3.mgsy_ttm.loc[index] = v_mgsy_ttm df3.mgjyxjl_ttm.loc[index] = v_mgjyxjl_ttm
class TestYearEndDiffMonth(Base): offset_cases = [] offset_cases.append((YearEnd(month=3), { datetime(2008, 1, 1): datetime(2008, 3, 31), datetime(2008, 2, 15): datetime(2008, 3, 31), datetime(2008, 3, 31): datetime(2009, 3, 31), datetime(2008, 3, 30): datetime(2008, 3, 31), datetime(2005, 3, 31): datetime(2006, 3, 31), datetime(2006, 7, 30): datetime(2007, 3, 31) })) offset_cases.append((YearEnd(0, month=3), { datetime(2008, 1, 1): datetime(2008, 3, 31), datetime(2008, 2, 28): datetime(2008, 3, 31), datetime(2008, 3, 31): datetime(2008, 3, 31), datetime(2005, 3, 30): datetime(2005, 3, 31) })) offset_cases.append((YearEnd(-1, month=3), { datetime(2007, 1, 1): datetime(2006, 3, 31), datetime(2008, 2, 28): datetime(2007, 3, 31), datetime(2008, 3, 31): datetime(2007, 3, 31), datetime(2006, 3, 29): datetime(2005, 3, 31), datetime(2006, 3, 30): datetime(2005, 3, 31), datetime(2007, 3, 1): datetime(2006, 3, 31) })) offset_cases.append((YearEnd(-2, month=3), { datetime(2007, 1, 1): datetime(2005, 3, 31), datetime(2008, 6, 30): datetime(2007, 3, 31), datetime(2008, 3, 31): datetime(2006, 3, 31) })) @pytest.mark.parametrize('case', offset_cases) def test_offset(self, case): offset, cases = case for base, expected in compat.iteritems(cases): assert_offset_equal(offset, base, expected) on_offset_cases = [(YearEnd(month=3), datetime(2007, 3, 31), True), (YearEnd(month=3), datetime(2008, 1, 1), False), (YearEnd(month=3), datetime(2006, 3, 31), True), (YearEnd(month=3), datetime(2006, 3, 29), False)] @pytest.mark.parametrize('case', on_offset_cases) def test_onOffset(self, case): offset, dt, expected = case assert_onOffset(offset, dt, expected)
def get_data(self): """ :return: """ date = self.date date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date = pre_year_date.strftime("%Y-%m-%d") cash_data = w.wss(self.stockcodes, "net_cash_flows_oper_act", "unit=1;rptDate=" + pre_year_date + ";rptType=1") if cash_data.ErrorCode != 0: print("数据提取异常") raise Exception("数据提取异常") cash_net_oper_act = FactorsZoo.check_data(cash_data.Data[0]) return cash_net_oper_act
def get_data(self): """ :return: """ date = self.date date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date = pre_year_date.strftime("%Y-%m-%d") net_inc_data = w.wss( self.stockcodes, "wgsd_net_inc", "unit=1;rptDate=" + pre_year_date + ";rptType=1;currencyType=") if net_inc_data.ErrorCode != 0: print("数据提取异常") raise Exception("数据提取异常") net_inc = FactorsZoo.check_data(net_inc_data.Data[0]) return net_inc
def deflate_factor(date, stockcodes, label=None): """ BE:权益账目价值 ME:股票总市值 AT:总资产 :param label: 选择缩减因子,包括三个BE,ME,AT :return: """ date_1 = datetime.strptime(date, "%Y-%m-%d") if date_1.month >= 5: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(1) else: pre_year_date = datetime.strptime(date, "%Y-%m-%d") - YearEnd(2) pre_year_date = pre_year_date.strftime("%Y-%m-%d") if label is None: return elif label == "BE": # 提取权益账目价值 be = w.wss(stockcodes, "tot_equity", "unit=1;rptDate=" + pre_year_date + ";rptType=1") if be.ErrorCode != 0: raise Exception("提取数据异常") deflate_f = be.Data[0] elif label == "ME": # 提取股票总市值 me = w.wss(stockcodes, "mkt_cap_ard", "unit=1;tradeDate=" + date) if me.ErrorCode != 0: raise Exception("提取数据异常") deflate_f = me.Data[0] elif label == "AT": # 提取总资产 at = w.wss( stockcodes, "wgsd_assets", "unit=1;rptDate=" + pre_year_date + ";rptType=1;currencyType=") if at.ErrorCode != 0: raise Exception("提取数据异常") deflate_f = at.Data[0] return deflate_f
def set_current(self): now = datetime.date.today() offset_m, offset_q = MonthEnd(), QuarterEnd() self.newest_date['M'] = offset_m.rollback(now) self.newest_date['Q'] = offset_q.rollback(now) self.newest_date['D'] = now - timedelta(days=1) self.newest_date['Y'] = YearEnd().rollback(now) half1 = datetime.date(now.year, 6, 30) half2 = datetime.date(now.year, 12, 31) if now < half1: self.newest_date['H'] = datetime.date(now.year - 1, 12, 31) elif now < half2: self.newest_date['H'] = half1 else: self.newest_date['H'] = half2
def ecos(code='021Y125', item1='?', item2='?', item3='?', freq='Q', first='1900', last='2100', N='10000', detail=True, col=None): '''retreive monthly, quarterly, annul time series from ecos. run 'open_ecosapi() to explore ecos api codes.''' ecos_key = "http://ecos.bok.or.kr/api/StatisticSearch/390S6FIOF95M7MHASMEA" freq_str = {'QQ': 'Q', 'MM': '-'} freq += freq # Y, Q, M, D -> YY, QQ, MM, DD url = f"{ecos_key}/json/kr/1/{N}/{code}/{freq}/{first}/{last}/{item1}/{item2}/{item3}/" result = urlopen(url) data = json.loads(result.read()) data = data["StatisticSearch"]["row"] df = pd.DataFrame(data) if detail: print( f"통계: {df.loc[0, 'STAT_NAME']}", f"단위: {df.loc[0, 'UNIT_NAME']}", f"기간: {df.loc[0, 'TIME']} - {df.loc[df.index[-1], 'TIME']}", f"항목: {df.loc[0, 'ITEM_NAME1']}", ) df = df.set_index("TIME") df.index.names = ['DATE'] if (freq == 'MM'): df.index = pd.DatetimeIndex(df.index.str[:4] + freq_str[freq] + df.index.str[4:]) df.index = df.index + MonthEnd() elif (freq == 'QQ'): df.index = pd.DatetimeIndex(df.index.str[:4] + freq_str[freq] + df.index.str[4:]) df.index = df.index + QuarterEnd() elif (freq == 'YY'): df.index = pd.DatetimeIndex(df.index) df.index = df.index + YearEnd() elif (freq == 'DD'): df.index = pd.DatetimeIndex(df.index) else: print('frequency is not one of D, M, Q, A.') return df["DATA_VALUE"] = df["DATA_VALUE"].astype("float") return df['DATA_VALUE'].to_frame(col)
def generate_weeks(year): from pandas.tseries.offsets import YearEnd start_date = pd.to_datetime('1/1/'+str(year)) end_date = start_date + YearEnd() DAT = pd.date_range(str(start_date), str(end_date), freq='D') WK = [d.strftime('%U') for d in DAT] MO = [d.strftime('%B') for d in DAT] DAYZ = pd.DataFrame({'Date':DAT, 'WeekNumber':WK, 'Month':MO}) DAYZ.loc[DAYZ.WeekNumber.isin(['00','01','02','03','04','05','06','07','08','09','50','51','52','53']), 'Season'] = 'Winter' DAYZ.loc[DAYZ.WeekNumber.isin(['10','11','12','13','14','15','16','17','18','19','20','21','22']), 'Season'] = 'Spring' DAYZ.loc[DAYZ.WeekNumber.isin(['23','24','25','26','27','28','29','30','31','32','33','34','35']), 'Season'] = 'Summer' DAYZ.loc[DAYZ.WeekNumber.isin(['36','37','38','39','40','41','42','43','44','45','46','47','48','49']), 'Season'] = 'Autumn' DAYZ.reset_index(drop=True, inplace=True) return DAYZ
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, **kwds): self.n = n self.normalize = normalize self.kwds.update(kwds) self.offset = kwds.get('offset', timedelta(0)) self.month = kwds.get('month', self._default_month) try: kwds.pop('month') except Exception as e: pass self.cbday = CustomBusinessDay(n=1, normalize=normalize, weekmask=weekmask, holidays=holidays, calendar=calendar, **kwds) self.kwds['calendar'] = self.cbday.calendar self.y_offset = YearEnd(1)
def get_dividends(start, end, **kwargs): start = (pd.to_datetime(start) - YearBegin(1)).strftime("%Y%m%d") end = (pd.to_datetime(end) + YearEnd(1)).strftime("%Y%m%d") raw_dividends = uqer_db.run_api( "EquDivGet", beginDate=start, endDate=end, field=["endDate", "ticker", "publishDate", "perCashDiv"]) raw_dividends.dropna(inplace=True) raw_dividends['endDate'] = (raw_dividends['endDate'].str.replace( '-', '')).astype('int') raw_dividends['publishDate'] = (raw_dividends['publishDate'].str.replace( '-', '')).astype('int') raw_dividends['ticker'] = raw_dividends['ticker'].astype('int') raw_dividends.sort_values(['ticker', 'endDate', 'publishDate'], inplace=True) raw_dividends.rename(columns={ 'ticker': 'IDs', 'endDate': 'date', 'publishDate': 'ann_dt', 'perCashDiv': 'dividend' }, inplace=True) h5db.save_h5file(raw_dividends, 'cash_div', '/dividends/')
'T' : Minute(), 'S' : Second(), 'L' : Milli(), 'U' : Micro(), None : None, # Monthly - Calendar 'M' : MonthEnd(), 'MS' : MonthBegin(), # Monthly - Business 'BM' : BMonthEnd(), 'BMS' : BMonthBegin(), # Annual - Calendar 'A-JAN' : YearEnd(month=1), 'A-FEB' : YearEnd(month=2), 'A-MAR' : YearEnd(month=3), 'A-APR' : YearEnd(month=4), 'A-MAY' : YearEnd(month=5), 'A-JUN' : YearEnd(month=6), 'A-JUL' : YearEnd(month=7), 'A-AUG' : YearEnd(month=8), 'A-SEP' : YearEnd(month=9), 'A-OCT' : YearEnd(month=10), 'A-NOV' : YearEnd(month=11), 'A-DEC' : YearEnd(month=12), 'A' : YearEnd(month=12), # Annual - Calendar (start) 'AS-JAN' : YearBegin(month=1),
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series( date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict(float=DataFrame({ 'A': series['float'], 'B': series['float'] + 1 }), int=DataFrame({ 'A': series['int'], 'B': series['int'] + 1 }), mixed=DataFrame({k: data[k] for k in ['A', 'B', 'C', 'D']}), mi=DataFrame( { 'A': np.arange(5).astype(np.float64), 'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}), cat_and_float=DataFrame({ 'A': Categorical(['foo', 'bar', 'baz']), 'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET'), 'C': Timestamp('20130603', tz='UTC') }, index=range(5))) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def test_misspecified(self): with pytest.raises(ValueError, match="Month must go from 1 to 12"): YearEnd(month=13)