def processDBTrades(self, trades): ''' Run the methods to create the new DataFrame and fill in the data for the new trade- centric (as opposed to transaction-centric, trades may contain multiple transactgions) DataFrame. ''' rc = self._frc # Process the output file DataFrame trades = self.addFinReqCol(trades) rccolumns = rc.columns.copy() rccolumns = self.appendCols(rccolumns) newTrades = trades[rccolumns] newTrades.copy() nt = newTrades.sort_values([rc.ticker, rc.acct, rc.date]) # nt = self.writeShareBalance(nt) nt = self.addStartTimeDB(nt) # nt.Date = pd.to_datetime(nt.Date) nt = nt.sort_values([rc.start, rc.ticker, rc.acct, rc.date, rc.time], ascending=True) nt = self.addTradeIndex(nt) nt = self.addTradePL(nt) nt = self.addTradeDurationDB(nt) nt = self.addTradeNameDB(nt) ldf, nt = self.postProcessingDB(self.getTradeList(nt)) nt = DataFrameUtil.addRows(nt, 2) nt = self.addSummaryPL(nt) dframe = DataFrameUtil.addRows(nt, 2) return dframe, ldf
def loadTradeSummaries(loc, trades): ''' Load up each trade summary in the excel doc into a 1 row DataFrame, return a list of these DataFrames. The addresses are supplied by srf plus the loc object that has the anchors. :params:loc: A list of the rows within the excel doc on which to find the trade summaries :return: A list of 1-row DataFrames Each trade is on one row from each of the trade summay forms ''' ldf = list() ts = dict() srf = SumReqFields() reqCol = srf.rc newdf = pd.DataFrame(columns=reqCol.values()) colFormat = srf.tfcolumns for i, rowNum in enumerate(loc): newdf = DataFrameUtil.createDf(newdf, 1) for key in reqCol.keys(): if key in ['date', 'clean', 'id']: continue cell = colFormat[reqCol[key]][0] if isinstance(cell, list): cell = cell[0] cell = tcell(cell, anchor=(1, rowNum)) newdf.iloc[-1][reqCol[key]] = trades[cell].value tradekey = str(i + 1) + ' ' + newdf.iloc[0].Name ts[tradekey] = newdf ldf.append(newdf) return ldf, ts
def combinePartialsFlexCSV(self, t): ''' In flex Statements, the TRNT (Trades) table input might be in transacations instead of tickets identified by LevelOfDetail=EXECUTION without the summary rows identified by LevelOfDetail=ORDERS. This is fixable (in both Activity statements and Trade statements) by changing Options to inclue Orders. If we have Executions only, we need to recombine the partials as identified by IBOrderID. If we also lack that column, blitz the sucker. Its not that hard to get a new statment. New wrinkle. There are some orders that have the same datetime making any sort by time void and leaving the balance up to chance which is first. While these might be different orders by IB, the trader ordered them as a single ticket- and we will combine them. :t: Is a TRNT DataFrame. That is a Trades table from a CSV multi table doc in which TRNT is the tableid. :assert: Tickets written at the exact same time are partials, identified by Notes/Codes == P (change name to Codes) and by having a single Symbol :prerequisite: Must have the columns ['Price', 'Commission', 'Quantity', 'LevelOfDetail', 'Codes'] ''' lod = t['LevelOfDetail'].unique() if len(lod) > 1: assert ValueError('I need to see this') if lod[0].lower() != 'execution': assert ValueError('I need to see this') t = t[t['LevelOfDetail'].str.lower() == 'execution'] newdf = pd.DataFrame() for tickerKey in t['Symbol'].unique(): ticker = t[t['Symbol'] == tickerKey] # #### New Code codes = ticker['Codes'].unique() for code in codes: if isinstance(code, float): continue parts = ticker[ticker['Codes'] == code] ticketKeys = parts['IBOrderID'].unique() for ticketKey in ticketKeys: ticket = parts[parts['IBOrderID'] == ticketKey] if len(ticket) > 1: thisticket = DataFrameUtil.createDf(ticket.columns, 1) net = 0.0 # Need to figure the average price of the transactions and sum of # quantity and commission for i, row in ticket.iterrows(): net = net + (float(row['Price']) * int(row['Quantity'])) for col in list(thisticket.columns): if col not in ['Quantity', 'Price', 'Commission']: thisticket[col] = ticket[col].unique()[0] thisticket['Quantity'] = ticket['Quantity'].map( int).sum() thisticket['Commission'] = ticket['Commission'].map( float).sum() thisticket['Price'] = net / ticket['Quantity'].map( int).sum() newdf = newdf.append(thisticket) else: newdf = newdf.append(ticket) return newdf
def imageData(self, df, ldf, ft="png"): ''' Gather the image names and determine the locations in the Excel doc to place them. Excel has a few things at top followed by trade summaries, charts and tables for each trade. Return with the image name/location data structure. The structure can be used for the Excel DataFrame-- to navigate summary form locations and just for the names :params df: The DataFrame representing the input file plus some stuff added in processOutputFile :params ldf: A list of dataFrames. Each encapsulates a trade. :parmas ft: Image filetype extension. (NOT USED) :return (Imagelocation, df): ImageLocation contains information about the excel document locations of trade summaries and image locations. The dataFrame df is the outline used to create the workbook, ImageLocation will be used to stye it and fill in the stuff. ''' # Add rows and append each trade, leaving space for an image. Create a list of # names and row numbers to place images within the excel file (imageLocation # data structure). # Number of rows between trade summaries frq = FinReqCol() newdf = DataFrameUtil.createDf(df, self.topMargin) df = newdf.append(df, ignore_index=True) imageLocation = list() count = 0 for tdf in ldf: imageName = '{0}_{1}_{2}_{3}.{4}'.format( tdf[frq.tix].unique()[-1].replace(' ', ''), tdf[frq.name].unique()[-1].replace(' ', '-'), tdf[frq.start].unique()[-1], tdf[frq.dur].unique()[-1], ft) # Holds location, deprected name, image name, trade start time, trade duration as delta imageLocation.append([ len(tdf) + len(df) + self.spacing, tdf.Tindex.unique()[0].replace(' ', '') + '.' + ft, imageName, tdf.Start.unique()[-1], tdf.Duration.unique()[-1] ]) count = count + 1 # Append the mini trade table then add rows to fit the tradeSummary form df = df.append(tdf, ignore_index=True) df = DataFrameUtil.addRows(df, self.summarySize) return imageLocation, df
def testCheckReqColumnsWithReqColSuccess(self): '''Test return values of DataFrameUtil.checkRequiredInputFields''' reqCol = ReqCol() finReqCol = FinReqCol() frc = pd.DataFrame(columns=finReqCol.columns) t1 = False t2 = False try: t1 = DataFrameUtil.checkRequiredInputFields(frc, finReqCol.columns) t2 = DataFrameUtil.checkRequiredInputFields(frc, reqCol.columns) except ValueError as ex: print(ex) self.assertTrue(t1) self.assertTrue(t2)
def test_dfUtil_addRow(self): '''Test method DataFrameUtil.addRows ''' cols2 = ['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it'] numRow = 9 fill = 'something silly' fill2 = 'sillier' y = DataFrameUtil.createDf(cols2, numRow, fill=fill) y = DataFrameUtil.addRows(y, numRow, fill=fill2) self.assertEqual(len(y), numRow * 2) for i in range(numRow): for ii in y.iloc[i]: self.assertEqual(ii, fill) for i in range(numRow, numRow * 2): for ii in y.iloc[i]: self.assertEqual(ii, fill2)
def testCheckRequiredColumnsThrow(self): '''Test DataFrameUtil.checkRequredInputFields for raising exceptions''' vals = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']] apd = pd.DataFrame(vals) apd.columns = [['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it']] columns = ['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it', 'sofuckit'] # DataFrameUtil.checkRequiredInputFields(apd, columns) try: DataFrameUtil.checkRequiredInputFields(apd, columns) except ValueError: pass except Exception as ex: msg = "{0}{1}".format("Unexpected exception. ", ex) self.fail(msg) else: self.fail("Failed to throw expected exception") vals = [[1, 2, 3, 4, 5, 6, 7, 8, 9], [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']] apd = pd.DataFrame( vals, columns=['Its', 'the', 'end', 'of', 'world', 'as', 'we', 'know', 'it']) gotve = False try: DataFrameUtil.checkRequiredInputFields(apd, columns) except ValueError: gotve = True except Exception as ex: msg = "Wrong exception was thrown" + ex self.fail(msg) finally: self.assertTrue(gotve, "Failed to throw a Value Error Exception")
def test_dfUtil_createDf(self): '''Test method DataFrameUtil.createDf''' cols = pd.DataFrame(columns=['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it']) cols2 = ['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it'] numRow = 9 fill = '' x = DataFrameUtil.createDf(cols, numRow, fill) y = DataFrameUtil.createDf(cols2, numRow, fill) self.assertEqual(list(x.columns), list(y.columns)) self.assertEqual(len(x), len(y)) for xc, yc in zip(x.iloc[1], y.iloc[1]): self.assertEqual(xc, yc) self.assertEqual(xc, fill) fill = None y = DataFrameUtil.createDf(cols2, numRow, fill) for xc, yc in zip(x.iloc[1], y.iloc[1]): self.assertTrue(xc != yc) self.assertEqual(yc, fill)
def testCheckrequiredColumnsWithReqColFail(self): '''Test method DataFrameUtil.checkRequiredInputFields''' reqCol = ReqCol() finReqCol = FinReqCol() fail = pd.DataFrame( columns=['Time', 'Symb', 'Side', 'Price', 'Qty', 'Account']) rc = pd.DataFrame(columns=reqCol.columns) gotve = False try: DataFrameUtil.checkRequiredInputFields(fail, reqCol.columns) except ValueError: gotve = True finally: self.assertTrue(gotve, "Failed to throw value error") gotve = False try: DataFrameUtil.checkRequiredInputFields(rc, finReqCol.columns) except ValueError: gotve = True finally: self.assertTrue(gotve, "Failed to throw a ValueError")
def combinePartialsFlexTrade(self, t): ''' The necessity of a new method to handle this is annoying...BUT gdmit, The Open/Close info is not in any of the available fields. Instead, a less rigorous system is used based on OrderID ''' lod = t['LevelOfDetail'].unique() if len(lod) > 1: assert ValueError('I need to see this') if lod[0].lower() != 'execution': assert ValueError('I need to see this') t = t[t['LevelOfDetail'].str.lower() == 'execution'] newdf = pd.DataFrame() for tickerKey in t['Symbol'].unique(): ticker = t[t['Symbol'] == tickerKey] # ##### New Code ticketKeys = ticker['OrderID'].unique() for ticketKey in ticketKeys: ticket = ticker[ticker['OrderID'] == ticketKey] if len(ticket) > 1: codes = ticket['Codes'] for code in codes: assert code.find('P') > -1 thisticket = DataFrameUtil.createDf(ticket.columns, 1) net = 0.0 # Need to figure the average price of the transactions and sum of quantity # and commission for i, row in ticket.iterrows(): net = net + (float(row['Price']) * int(row['Quantity'])) for col in list(thisticket.columns): if col not in ['Quantity', 'Price', 'Commission']: thisticket[col] = ticket[col].unique()[0] thisticket['Quantity'] = ticket['Quantity'].map(int).sum() thisticket['Commission'] = ticket['Commission'].map( float).sum() thisticket['Price'] = net / ticket['Quantity'].map( int).sum() newdf = newdf.append(thisticket) else: newdf = newdf.append(ticket) return newdf
def getStatementType(infile): ''' Determine if infile is a statement. If it is return a tuple (data, type) TODO: Not doing what I said... If it is a DAS statement, determine if it matches the current date. As DAS statements do not include dates, the date in structjour and the directory structure date must match. If they don't match, the program, at a higher level, will pop a query to get the date of the statement. ''' file, ext = os.path.splitext(infile) if not os.path.exists(infile) or (ext.lower() != '.csv' and not ext.lower().startswith('.htm')): return None, None if ext.lower() == '.csv': df = pd.read_csv(infile, names=[x for x in range(0, 100)]) if df.iloc[0][0] == 'BOF' or df.iloc[0][0] == 'HEADER' or ( df.iloc[0][0] == 'ClientAccountID') or (df.iloc[0][0] == 'Statement'): return df, "IB_CSV" df = pd.read_csv(infile) if not df.empty: requiredFields = list(ReqCol().columns) requiredFields.remove('Date') # A small hack to allow tradesByTickets to pass as a DAS export if 'PnL' not in df.columns: requiredFields.remove('PnL') requiredFields.append('P / L') try: if DataFrameUtil.checkRequiredInputFields(df, requiredFields): if not checkDateDir(infile): return None, 'DAS' return df, 'DAS' except ValueError: pass elif ext.lower().startswith('.htm'): soup = BeautifulSoup(readit(infile), 'html.parser') tbldivs = soup.find_all("div", id=lambda x: x and x.startswith('tbl')) if tbldivs: return tbldivs, 'IB_HTML' return None, None
def __init__(self, df, interview, sf): ''' Create a dataframe that includes all the summary material for review. Some of this data comes from the program and some of it comes from the user. The user will determine which parts to fill out from a couple of options. :params:df: A DataFrame that includes the transactions, or tickets, from a singel trade. ''' self.interview = interview col = list(sf.tfcolumns.keys()) col.append('Date') TheTrade = pd.DataFrame(columns=col) TheTrade = DataFrameUtil.addRows(TheTrade, 1) self.sf = sf ix = df.index[-1] ix0 = df.index[0] # TODO This list should be retrieved from TheStrategyObject strats = [ 'ORB', 'ABCD', 'VWAP Reversal', 'Bull Flag', 'Fallen Angel', 'VWAP False Breakout', 'VWAP Reversal', '15 Minute Reversal', 'VWAP MA trend', 'Other', 'Skip' ] side = df.loc[ix0][frc.side] self.df = df self.TheTrade = TheTrade self.ix = ix self.ix0 = ix0 self.strats = strats self.side = side self.shares = 0 self.chartSlot1 = None self.chartSlot2 = None self.chartSlot3 = None self.settings = QSettings('zero_substance', 'structjour')
def layoutExcelData(self, df, ldf, imageNames): ''' 1) Determine the locations in the Excel doc to place trade summaries, trade tables and images. 2) Create the empty rows and place the trade tables in the df according to the locations. :params df: We requre the df as a whole because we are adding rows to it. :params ldf: A list of dataframes, each a trade, each one is placed into our new skeletal layout for excel :return (Imagelocation, df): ImageLocation contains [ [list of image location], # up to 3 per trade [list of image names], # up to 3 per trade Start time, trade dur, ] ''' imageLocation = list() srf = SumReqFields() sumSize = srf.maxrow() + 5 summarySize = sumSize spacing = 3 # Image column location c1col = 13 c2col = 1 c3col = 9 frq = FinReqCol() newdf = DataFrameUtil.createDf(df, self.topMargin) df = newdf.append(df, ignore_index=True) deleteme = [] for i, tdf in enumerate(ldf): theKey = tdf[frq.tix].unique()[-1] if len(theKey) == 0: deleteme.append(i) continue imageName = imageNames[theKey] xtraimage = 0 # Add space for second/third image if len(imageName) > 1: xtraimage = 21 ilocs = [] # Need 1 entry even if there are no images ilocs.append((c1col, len(tdf) + len(df) + spacing)) for i in range(0, len(imageName)): if i == 1: ilocs.append((c2col, len(tdf) + len(df) + spacing + 20)) elif i == 2: ilocs.append((c3col, len(tdf) + len(df) + spacing + 20)) # Holds image locations, image name, trade start time, trade duration as delta imageLocation.append([ ilocs, imageName, tdf.Start.unique()[-1], tdf.Duration.unique()[-1] ]) # Append the mini trade table then add rows to fit the tradeSummary form df = df.append(tdf, ignore_index=True) df = DataFrameUtil.addRows(df, summarySize + xtraimage) for d in deleteme: ldf.pop(d) return imageLocation, df