def plus_low_to_high(self, file, file_l, columns_l, columns, dir_path, transform_path): input_path = "{}/{}".format(dir_path, file_l) df1 = pd.read_csv(input_path, encoding='gbk') df1 = ut.check_dataframe(df1).loc[:, columns_l].fillna(0) df1.columns = columns l = len(columns) input_path = "{}/{}".format(dir_path, file) df = pd.read_csv(input_path, encoding='gbk') df = ut.check_dataframe(df).fillna(0) for i in range(l): df.ix[0, i] = df.ix[0, i] + df1.ix[0, i] df.to_csv("{}/{}".format(transform_path, file), encoding='gbk', index=False)
def concat_low_to_high (self, type, file, file_l, columns_l, columns, dir_path, transform_path): input_path = "{}/{}".format(dir_path, file_l) df1 = pd.read_csv(input_path, encoding='gbk') df1 = ut.check_dataframe(df1).loc[:, columns_l].fillna(0) df1.columns = columns input_path = "{}/{}".format(dir_path, file) df = pd.read_csv(input_path, encoding='gbk') df = ut.check_dataframe(df).fillna(0) df = pd.concat([df, df1]) if type == 1: df['stock'] = df['Id'].apply(lambda x: 1 if (x[:2] == 'IF') or (x[:2] == 'IH') or (x[:2] == 'IC')else 0) df = df[df['stock'] == 1] df.to_csv("{}/{}".format(transform_path, file), encoding='gbk', index=False)
def transform_normal(self, input_file='trade.xls', trade_columns=[u"证券代码", u"成交价格", u"成交数量", u"手续费", u"买卖标志"]): input_path = "{}/{}".format(self.manual_path, input_file) trade = pd.read_csv(input_path, sep='\t', encoding='gbk') # , usecols=asset_columns trade = ut.check_dataframe(trade) trade = trade.loc[:, trade_columns].fillna(0) trade.columns = self.column_name['trade'] if 'Direction' in trade.columns: trade['Direction'] = trade['Direction'].apply(ut.standard_csv_direction) trade.to_csv("{}/trade.csv".format(self.manual_transform_path), encoding='gbk', index=False)
def holding_zhongxin_security_normal(self, input_file='holding.xls', sep='\t', asset_columns=[u'冻结金额', u'可用', u'参考市值', u'资产'], holding_columns=[u'证券代码', u'证券数量', u'当前价', u'成本价', u'浮动盈亏']): input_path = "{}/{}".format(self.manual_path, input_file) asset = pd.read_csv(input_path, sep=sep, encoding='gbk', nrows=1) # , usecols=asset_columns asset = ut.check_dataframe(asset).loc[:, asset_columns].fillna(0) asset.columns = self.column_name['asset'] asset.to_csv("{}/asset.csv".format(self.manual_transform_path), encoding='gbk', index=False) holding = pd.read_csv(input_path, sep=sep, encoding='gbk', skiprows=3) # , usecols=holding_columns holding = ut.check_dataframe(holding) holding = holding.loc[:, holding_columns].fillna(0) holding.columns = self.column_name['holding'] holding['int_type'] = holding['Id'].apply(lambda x: ut.find_type(x)) holding = holding[holding['int_type'] == 1] del holding['int_type'] holding['Id'] = holding['Id'].apply(lambda x: int(x)) holding.to_csv("{}/holding.csv".format(self.manual_transform_path), encoding='gbk', index=False)
def transform_trade_fut(self, input_file, trade_columns, l=0): input_path = "{}/{}".format(self.manual_path, input_file) trade = pd.read_csv(input_path, encoding='gbk') # , usecols=asset_columns trade = ut.check_dataframe(trade).loc[:, trade_columns].fillna(0) trade.columns = self.column_name['trade_fut'] if 'Direction' in trade.columns: trade['Direction'] = trade['Direction'].apply(ut.standard_csv_direction) trade['stock'] = trade['Id'].apply(lambda x: 1 if (x[:2] == 'IF') or (x[:2] == 'IH') or (x[:2] == 'IC')else 0) trade = trade[trade['stock'] == 1] if l == 0: trade.to_csv("{}/trade_fut.csv".format(self.manual_transform_path), encoding='gbk', index=False) else: trade.to_csv("{}/trade_fut_l.csv".format(self.manual_transform_path), encoding='gbk', index=False)
def holding_zhongxin_security_17(self, input_file='holding.xls', asset_columns=[u'冻结金额', u'可用', u'参考市值', u'总资产'], holding_columns=[u'证券代码', u'拥股数量', u'最新价', u'盈亏成本', u'浮动盈亏']): input_path = "{}/{}".format(self.manual_path, input_file) asset = pd.read_excel(input_path, skiprows=5).iloc[[0],:] # , usecols=asset_columns asset = ut.check_dataframe(asset).loc[:, asset_columns].fillna(0) asset.columns = self.column_name['asset'] asset.ix[0, 1] = str(asset.ix[0, 1]).replace(',','') asset.to_csv("{}/asset.csv".format(self.manual_transform_path), encoding='gbk', index=False) holding = pd.read_excel(input_path, skiprows=8) #这里注释掉是因为17号下载holding.xls数据格式不稳定 # holding = pd.read_excel(input_path) # , usecols=holding_columns l = len(holding) holding = holding.drop(l-1, axis=0) holding = ut.check_dataframe(holding) holding = holding.loc[:, holding_columns].fillna(0) holding.columns = self.column_name['holding'] holding['int_type'] = holding['Id'].apply(lambda x: ut.find_type(x)) holding = holding[holding['int_type'] == 1] del holding['int_type'] holding['Id'] = holding['Id'].apply(lambda x: int(x)) holding.to_csv("{}/holding.csv".format(self.manual_transform_path), encoding='gbk', index=False)
def transform_holding_fut(self, input_file, holding_columns, l=0): input_path = "{}/{}".format(self.manual_path, input_file) holding = pd.read_csv(input_path, encoding='gbk') # , usecols=asset_columns # print input_path # print holding holding = ut.check_dataframe(holding).loc[:, holding_columns].fillna(0) holding.columns = self.column_name['holding_fut'] # print holding if 'Direction' in holding.columns: holding['Direction'] = holding['Direction'].apply(ut.standard_csv_direction) holding['stock'] = holding['Id'].apply(lambda x: 1 if (x[:2] == 'IF') or (x[:2] == 'IH') or (x[:2] == 'IC')else 0) # 排除商品期货,只留股指期货 holding = holding[holding['stock'] == 1] if l == 0: holding.to_csv("{}/holding_fut.csv".format(self.manual_transform_path), encoding='gbk', index=False) else: holding.to_csv("{}/holding_fut_l.csv".format(self.manual_transform_path), encoding='gbk', index=False)
def transform_trade_17(self, input_file='trade_l.xls', sep='\t', columns=[u"证券代码", u"成交价格", u"成交数量", u"手续费", u"买卖标志"]): input_path = "{}/{}".format(self.manual_path, input_file) df = pd.read_csv(input_path, sep=sep, encoding='gbk') df = ut.check_dataframe(df).loc[:, columns].fillna(0) df.to_csv("{}/trade_l.csv".format(self.manual_transform_path), encoding='gbk', index=False)
def T(self, input_file='trade.csv', sep=',', usecols=None, dir_path=None, process = 1): if isinstance(input_file, (tuple, list)): for single_file in input_file: self.T(input_file=single_file, sep=sep, usecols=usecols, dir_path=dir_path, process = process) return if input_file.split('.')[0] == 'trade1': table_name = 'trade' else: table_name = input_file.split('.')[0] if dir_path: input_file = "{}/{}".format(dir_path, input_file) else: input_file = "{}/{}".format(self.manual_path, input_file) if sep: df = pd.read_csv(input_file, sep=sep, index_col=False, encoding='gbk') else: df = pd.read_excel(input_file, encoding='gbk') df = ut.check_dataframe(df) if u'成交状态' in df.columns: df = df[df[u'成交状态'] != u'撤单成交'] if usecols: df = df.loc[:, usecols] df = df.fillna(0) #将na默认为0 if process == 1: if len(df.columns) != len(self.column_name[table_name]): df = df.iloc[:, range(len(self.column_name[table_name]))] df.columns = self.column_name[table_name] # 对于trade,trade_fut,holding_fut中的direction做买卖多空判断 if 'Direction' in df.columns: df['Direction'] = df['Direction'].apply(ut.standard_csv_direction) else: if len(df.columns) != len(self.column_name[table_name]): df = df.reindex(columns=self.column_name[table_name]) if 'Id' in df.columns: df = df[df['Id'] != 0] if input_file == 'holding.csv': df = df[0 < df['Id'] < 1000000] if 'Holding' in df.columns: # # st1 = u'股' # # st2 = u'份' # # df['Holding'] = df['Holding'].apply(lambda x: str(x).strip(st1)) # # df['Holding'] = df['Holding'].apply(lambda x: str(x).strip(st2)) # print df # # df['Holding'] = df['Holding'].apply(lambda x: float(x)) df['Holding'] = df['Holding'].astype(int) # if 'Volume' in df.columns: # print df['Volume'] # st1 = u'股' # st2 = u'份'.encode(encoding='gbk') # print st1 # df['Volume'] = df['Volume'].apply(lambda x: str(x).strip(st1)) # df['Volume'] = df['Volume'].apply(lambda x: str(x).strip(st2)) # df['Volume'] = df['Volume'].apply(lambda x: float(x)) # df['Volume'] = df['Volume'].astype(int) if input_file == 'holding.csv': df['int_type'] = df['Id'].apply(lambda x: ut.find_type(x)) df = df[df['int_type'] == 1] del df['int_type'] df['Id'] = df['Id'].apply(lambda x: int(x)) if table_name.endswith('fut') and table_name != 'asset_fut': # 排除商品期货,只留股指期货 df['stock'] = df['Id'].apply(lambda x: 1 if (x[:2] == 'IF') or (x[:2] == 'IH') or (x[:2] == 'IC')else 0) df = df[df['stock'] == 1] df.to_csv("{}/{}.csv".format(self.path, table_name), encoding='gbk', index=False)