def process_5447231412(): df = normalize.parse_file(u'all.xlsx') for i,row in df.iterrows(): operation = row[u'操作'] stock_name = row[u'证券名称'] stock_code = row[u'证券代码'] if stock_name == u'申购款' and operation in [u'申', u'卖']: stock_name = SinaQuote.getNewStockName(stock_code) if stock_name == '': print 'error, new stock name not found:%s' % stock_code df.loc[i,u'证券名称'] = stock_name if operation == u'申': df.loc[i,u'操作'] = u'申购扣款' else: df.loc[i,u'操作'] = u'申购还款' df.to_excel(u'调整后.xls',encoding='gbk')
def process_1488061441(): df = normalize.parse_file(u'卓建华2014交割单1.201503050951.xlsx') for i,row in df.iterrows(): operation = row[u'委托类别'] bianhao = row[u'成交编号'] stock_name = row[u'证券名称'] stock_code = row[u'证券代码'] if operation == u'直接还款' and bianhao == u'申购扣款': df.loc[i,u'委托类别'] = u'申购扣款' elif stock_name == u'申购款' and operation == u'直接还款': stock_name = SinaQuote.getNewStockName(stock_code) if stock_name == '': print 'error, new stock name not found:%s' % stock_code df.loc[i,u'证券名称'] = stock_name df.loc[i,u'委托类别'] = u'申购扣款' df.to_excel(u'调整后.xls',encoding='gbk')
def pre_process(df): change_column_names(df) merge_zhaiyao_and_operation(df) #df = df.apply(remove_space, axis=0) # only for 2089526857张洁, for i, row in df.iterrows(): if row[u'operation'] in [u'基金申购拨出', u'基金赎回拨入'] and \ type(row[u'stock_code']) is unicode and row[u'stock_code'].startswith(u'AA0007'): df.loc[i, u'stock_number'] = abs(row[u'actual_amount']) if row[u'operation'] in [u'基金申购', u'基金赎回']: if type(row[u'stock_code']) is float and \ math.isnan(row[u'stock_code']): # no stock code df.loc[i, u'operation'] = u'基金其他' if row[u'operation'] == u'基金分拆': if row[u'stock_name'].endswith((u'A', u'B')): df.loc[i, u'operation'] = u'分级基金分拆' else: df.loc[i, u'operation'] = u'基金分拆-母鸡' if row[u'operation'] in [u'托管转入', u'托管转出']: #print row[u'stock_code'] if row[u'stock_code'] in [u'163113', u'165521']: df.loc[i, u'operation'] = u'分级基金-母基操作' #print row[u'operation'], row[u'stock_name'], type(row[u'stock_name']) stock_name = SinaQuote.getNewStockName(unicode(row[u'stock_name'])) if stock_name != '': df.loc[i, u'operation'] = u'新股相关' if row[u'stock_code'] == u'799999': df.loc[i, u'operation'] = u'登记指定相关' if type(row[u'operation']) in [float, int] or type(row[u'stock_name']) in [int]: print row[u'operation'],row if row[u'operation'].startswith(u'托管转入') and type(row[u'stock_name']) is unicode and row[u'stock_name'].endswith((u'A', u'B')): df.loc[i, u'operation'] = u'分级基金分拆' if row[u'operation'] == u'托管转入' and type(row[u'stock_name']) is unicode and row[u'stock_name'].startswith(u'DR'): df.loc[i, u'operation'] = u'红股入帐' if u'成交编号' in row.index: if row[u'operation'] == u'其他' and row[u'成交编号'] == u'股息差别税': df.loc[i, u'operation'] = u'股息差别税' if row[u'operation'] == u'转托' and row[u'成交编号'] == u'转托管转入': df.loc[i, u'operation'] = u'指定' if row[u'operation'] == u'托管转入' and row[u'成交编号'] == u'上市流通': df.loc[i, u'operation'] = u'新股相关' if row[u'operation'] == u'托管转出' and row[u'成交编号'] == u'上市转出': df.loc[i, u'operation'] = u'新股相关' if 'beizhu' in row.index: if row[u'operation'] == u'托管转出' and row['beizhu'] == u'托管转出' \ and (str(int(row[u'stock_code'])) not in [u'163113', u'165521']): print row[u'stock_code'], row[u'operation'], row[u'beizhu'] df.loc[i, u'operation'] = u'担保转出' if row['operation'] == u'撤指' and row['stock_number'] == 0: df.loc[i, u'operation'] = u'转存管转出' # handle special case if row[u'stock_code'] == 82046: df.loc[i, u'stock_code'] = 2046 out_file = '普通账户.xls' for o in df['operation']: if o in [u'融资利息扣款', u'融资买入', u'融资利息']: out_file = '融资融券账户.xls' return ( df.apply(process_operation, axis=1), out_file )
def parse_operation(i, row, df): deal_date = row[u'成交日期'] deal_time = row[u'成交时间'] deal_mark = row[u'买卖标志'] actual_amount = row[u'发生金额'] remain_amount = row[u'剩余金额'] zhaiyao = row[u'摘要'] if re.match(match_borrow_cash, zhaiyao): df.loc[i, u'买卖标志'] = u'融资借款' elif re.match(match_interest_in, zhaiyao): df.loc[i, u'买卖标志'] = u'结息入账' elif re.search(search_bank_in_money, zhaiyao): df.loc[i, u'买卖标志'] = u'银行转入' elif re.search(search_bank_out_money, zhaiyao): df.loc[i, u'买卖标志'] = u'银行转出' elif re.match(match_hongli, zhaiyao) or re.match(match_hongli1, zhaiyao) or re.search(search_shenshu, zhaiyao): df.loc[i, u'买卖标志'] = u'红利' elif re.search(search_tax, zhaiyao): df.loc[i, u'买卖标志'] = u'股息差别税' elif re.match(match_rzrq_in, zhaiyao): result = re.match(match_rzrq_in, zhaiyao) operation = result.group(2) stock_name = result.group(3) stock_code = result.group(4) stock_number = float(result.group(5)) stock_price = float(result.group(6)) df.loc[i, u'买卖标志'] = operation df.loc[i, u'证券代码'] = stock_code df.loc[i, u'证券名称'] = stock_name df.loc[i, u'成交价格'] = stock_price df.loc[i, u'成交数量'] = stock_number if operation not in [u'融资买入', u'信用买入', u'卖券还款', u'信用卖出', u'融券卖出', u'买券还券']: print '!!!error operation %s row:\n%s' % (operation, row) elif re.match(match_new_stock, zhaiyao): result = re.match(match_new_stock, zhaiyao) #print result.groups() operation = result.group(2) stock_code = result.group(4) stock_number = float(result.group(5)) stock_price = float(result.group(6)) stock_name = SinaQuote.getNewStockName(stock_code) df.loc[i, u'买卖标志'] = operation df.loc[i, u'证券代码'] = stock_code df.loc[i, u'证券名称'] = stock_name df.loc[i, u'成交价格'] = stock_price df.loc[i, u'成交数量'] = stock_number if operation not in [u'申购扣款', u'申购还款', u'申购中签款']: print '!!!error operation %s row:\n%s' % (operation, row) elif re.match(match_rzrq_out, zhaiyao): result = re.match(match_rzrq_out, zhaiyao) operation = result.group(1) operation1 = result.group(3) stock_name = result.group(4) stock_code = result.group(5) stock_number = float(result.group(6)) stock_price = float(result.group(7)) df.loc[i, u'买卖标志'] = operation df.loc[i, u'证券代码'] = stock_code df.loc[i, u'证券名称'] = stock_name df.loc[i, u'成交价格'] = stock_price df.loc[i, u'成交数量'] = stock_number #print 'ou' + operation + stock_name + stock_code + stock_number + stock_price if operation not in [u'卖券偿还本金', u'卖出偿还本金', u'卖券偿还融资利息', u'卖券偿还融券费用', u'卖出偿还利息', u'偿还融资其它费用']: print '!!!error operation %s row:\n%s' % (operation, row) elif re.match(match_rzrq_return, zhaiyao): result = re.match(match_rzrq_return, zhaiyao) operation = result.group(1) df.loc[i, u'买卖标志'] = operation if operation not in [u'偿还本金', u'偿还融资利息', u'偿还融券费用']: print '!!!error operation %s row:\n%s' % (operation, row) elif re.match(match_rzrq_pay_interest, zhaiyao): df.loc[i, u'买卖标志'] = u'偿还融资利息' else: print '!!!! unknown row %s' % row