Python DataFrameUtil示例，structjour.dfutil.DataFrameUtil Python示例

示例#1

0

显示文件

文件： definetrades.py 项目： ajmal017/structjour

    def processDBTrades(self, trades):
        '''
        Run the methods to create the new DataFrame and fill in the data for the new trade-
        centric (as opposed to transaction-centric, trades may contain multiple transactgions) DataFrame.
        '''
        rc = self._frc

        # Process the output file DataFrame
        trades = self.addFinReqCol(trades)
        rccolumns = rc.columns.copy()
        rccolumns = self.appendCols(rccolumns)

        newTrades = trades[rccolumns]
        newTrades.copy()
        nt = newTrades.sort_values([rc.ticker, rc.acct, rc.date])
        # nt = self.writeShareBalance(nt)
        nt = self.addStartTimeDB(nt)
        # nt.Date = pd.to_datetime(nt.Date)
        nt = nt.sort_values([rc.start, rc.ticker, rc.acct, rc.date, rc.time],
                            ascending=True)
        nt = self.addTradeIndex(nt)
        nt = self.addTradePL(nt)
        nt = self.addTradeDurationDB(nt)
        nt = self.addTradeNameDB(nt)
        ldf, nt = self.postProcessingDB(self.getTradeList(nt))
        nt = DataFrameUtil.addRows(nt, 2)
        nt = self.addSummaryPL(nt)

        dframe = DataFrameUtil.addRows(nt, 2)
        return dframe, ldf

示例#2

0

显示文件

文件： disciplined.py 项目： ajmal017/structjour

def loadTradeSummaries(loc, trades):
    '''
    Load up each trade summary in the excel doc into a 1 row DataFrame, return a list
    of these DataFrames. The addresses are supplied by srf plus the loc object that has the
    anchors.
    :params:loc: A list of the rows within the excel doc on which to find the trade summaries
    :return: A list of 1-row DataFrames Each trade is on one row from each of the trade summay forms
    '''

    ldf = list()
    ts = dict()
    srf = SumReqFields()
    reqCol = srf.rc
    newdf = pd.DataFrame(columns=reqCol.values())
    colFormat = srf.tfcolumns

    for i, rowNum in enumerate(loc):
        newdf = DataFrameUtil.createDf(newdf, 1)
        for key in reqCol.keys():
            if key in ['date', 'clean', 'id']:
                continue
            cell = colFormat[reqCol[key]][0]
            if isinstance(cell, list):
                cell = cell[0]
            cell = tcell(cell, anchor=(1, rowNum))
            newdf.iloc[-1][reqCol[key]] = trades[cell].value

        tradekey = str(i + 1) + ' ' + newdf.iloc[0].Name
        ts[tradekey] = newdf
        ldf.append(newdf)
    return ldf, ts

示例#3

0

显示文件

    def combinePartialsFlexCSV(self, t):
        '''
        In flex Statements, the TRNT (Trades) table input might be in transacations instead of
        tickets identified by LevelOfDetail=EXECUTION without the summary rows identified by
        LevelOfDetail=ORDERS. This is fixable (in both Activity statements and Trade statements)
        by changing Options to inclue Orders. If we have Executions only, we need to recombine
        the partials as identified by IBOrderID. If we also lack that column, blitz the sucker.
        Its not that hard to get a new statment.
        New wrinkle. There are some orders that have the same datetime making any sort by time void
        and leaving the balance up to chance which is first. While these might be different orders
        by IB, the trader ordered them as a single ticket- and we will combine them.
        :t: Is a TRNT DataFrame. That is a Trades table from a CSV multi table doc in which TRNT
                 is the tableid.
        :assert: Tickets written at the exact same time are partials, identified by
                 Notes/Codes == P (change name to Codes) and by having a single Symbol
        :prerequisite: Must have the columns
                        ['Price', 'Commission', 'Quantity', 'LevelOfDetail', 'Codes']
        '''
        lod = t['LevelOfDetail'].unique()
        if len(lod) > 1:
            assert ValueError('I need to see this')
        if lod[0].lower() != 'execution':
            assert ValueError('I need to see this')

        t = t[t['LevelOfDetail'].str.lower() == 'execution']
        newdf = pd.DataFrame()
        for tickerKey in t['Symbol'].unique():
            ticker = t[t['Symbol'] == tickerKey]
            # #### New Code
            codes = ticker['Codes'].unique()
            for code in codes:
                if isinstance(code, float):
                    continue
                parts = ticker[ticker['Codes'] == code]
                ticketKeys = parts['IBOrderID'].unique()
                for ticketKey in ticketKeys:
                    ticket = parts[parts['IBOrderID'] == ticketKey]
                    if len(ticket) > 1:
                        thisticket = DataFrameUtil.createDf(ticket.columns, 1)
                        net = 0.0
                        # Need to figure the average price of the transactions and sum of
                        # quantity and commission
                        for i, row in ticket.iterrows():
                            net = net + (float(row['Price']) *
                                         int(row['Quantity']))
                        for col in list(thisticket.columns):
                            if col not in ['Quantity', 'Price', 'Commission']:
                                thisticket[col] = ticket[col].unique()[0]
                        thisticket['Quantity'] = ticket['Quantity'].map(
                            int).sum()
                        thisticket['Commission'] = ticket['Commission'].map(
                            float).sum()
                        thisticket['Price'] = net / ticket['Quantity'].map(
                            int).sum()
                        newdf = newdf.append(thisticket)

                    else:
                        newdf = newdf.append(ticket)
        return newdf

示例#4

0

显示文件

文件： layoutsheet.py 项目： imbi7py/structjour

    def imageData(self, df, ldf, ft="png"):
        '''
        Gather the image names and determine the locations in the Excel doc to place them. Excel
        has a few things at top followed by trade summaries, charts and tables for each trade.
        Return with the image name/location data structure. The structure can be used for the Excel
        DataFrame-- to navigate summary form locations and just for the names
        :params df: The DataFrame representing the input file plus some stuff added in
                    processOutputFile
        :params ldf: A list of dataFrames. Each encapsulates a trade.
        :parmas ft: Image filetype extension. (NOT USED)
        :return (Imagelocation, df): ImageLocation contains information about the excel document
                    locations of trade summaries and image locations. The dataFrame df is the
                    outline used to create the workbook, ImageLocation will be used to stye it
                    and fill in the stuff.
        '''
        # Add rows and append each trade, leaving space for an image. Create a list of
        # names and row numbers to place images within the excel file (imageLocation
        # data structure).

        # Number of rows between trade summaries
        frq = FinReqCol()
        newdf = DataFrameUtil.createDf(df, self.topMargin)

        df = newdf.append(df, ignore_index=True)

        imageLocation = list()
        count = 0
        for tdf in ldf:
            imageName = '{0}_{1}_{2}_{3}.{4}'.format(
                tdf[frq.tix].unique()[-1].replace(' ', ''),
                tdf[frq.name].unique()[-1].replace(' ', '-'),
                tdf[frq.start].unique()[-1], tdf[frq.dur].unique()[-1], ft)

            # Holds location, deprected name, image name, trade start time, trade duration as delta
            imageLocation.append([
                len(tdf) + len(df) + self.spacing,
                tdf.Tindex.unique()[0].replace(' ', '') + '.' + ft, imageName,
                tdf.Start.unique()[-1],
                tdf.Duration.unique()[-1]
            ])
            count = count + 1

            # Append the mini trade table then add rows to fit the tradeSummary form
            df = df.append(tdf, ignore_index=True)
            df = DataFrameUtil.addRows(df, self.summarySize)
        return imageLocation, df

示例#5

0

显示文件

文件： test_dfutil.py 项目： lwilke/structjour

    def testCheckReqColumnsWithReqColSuccess(self):
        '''Test return values of DataFrameUtil.checkRequiredInputFields'''
        reqCol = ReqCol()
        finReqCol = FinReqCol()

        frc = pd.DataFrame(columns=finReqCol.columns)

        t1 = False
        t2 = False
        try:
            t1 = DataFrameUtil.checkRequiredInputFields(frc, finReqCol.columns)
            t2 = DataFrameUtil.checkRequiredInputFields(frc, reqCol.columns)

        except ValueError as ex:
            print(ex)
        self.assertTrue(t1)
        self.assertTrue(t2)

示例#6

0

显示文件

文件： test_dfutil.py 项目： lwilke/structjour

    def test_dfUtil_addRow(self):
        '''Test method DataFrameUtil.addRows
        '''
        cols2 = ['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it']
        numRow = 9
        fill = 'something silly'
        fill2 = 'sillier'

        y = DataFrameUtil.createDf(cols2, numRow, fill=fill)
        y = DataFrameUtil.addRows(y, numRow, fill=fill2)
        self.assertEqual(len(y), numRow * 2)

        for i in range(numRow):
            for ii in y.iloc[i]:
                self.assertEqual(ii, fill)

        for i in range(numRow, numRow * 2):
            for ii in y.iloc[i]:
                self.assertEqual(ii, fill2)

示例#7

0

显示文件

文件： test_dfutil.py 项目： lwilke/structjour

    def testCheckRequiredColumnsThrow(self):
        '''Test DataFrameUtil.checkRequredInputFields for raising exceptions'''
        vals = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']]
        apd = pd.DataFrame(vals)
        apd.columns = [['Its', 'the', 'end', 'of',
                        'the', 'world', 'as', 'we', 'know', 'it']]
        columns = ['Its', 'the', 'end', 'of', 'the',
                   'world', 'as', 'we', 'know', 'it', 'sofuckit']
        # DataFrameUtil.checkRequiredInputFields(apd, columns)

        try:
            DataFrameUtil.checkRequiredInputFields(apd, columns)

        except ValueError:
            pass
        except Exception as ex:
            msg = "{0}{1}".format("Unexpected exception. ", ex)
            self.fail(msg)

        else:
            self.fail("Failed to throw expected exception")

        vals = [[1, 2, 3, 4, 5, 6, 7, 8, 9], [
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']]
        apd = pd.DataFrame(
            vals, columns=['Its', 'the', 'end', 'of', 'world', 'as', 'we', 'know', 'it'])

        gotve = False
        try:
            DataFrameUtil.checkRequiredInputFields(apd, columns)
        except ValueError:
            gotve = True
        except Exception as ex:
            msg = "Wrong exception was thrown" + ex
            self.fail(msg)
        finally:
            self.assertTrue(gotve, "Failed to throw a Value Error Exception")

示例#8

0

显示文件

文件： test_dfutil.py 项目： lwilke/structjour

    def test_dfUtil_createDf(self):
        '''Test method DataFrameUtil.createDf'''
        cols = pd.DataFrame(columns=['Its', 'the', 'end', 'of', 'the', 'world',
                                     'as', 'we', 'know', 'it'])
        cols2 = ['Its', 'the', 'end', 'of', 'the', 'world', 'as', 'we', 'know', 'it']
        numRow = 9
        fill = ''

        x = DataFrameUtil.createDf(cols, numRow, fill)
        y = DataFrameUtil.createDf(cols2, numRow, fill)

        self.assertEqual(list(x.columns), list(y.columns))
        self.assertEqual(len(x), len(y))

        for xc, yc in zip(x.iloc[1], y.iloc[1]):
            self.assertEqual(xc, yc)
            self.assertEqual(xc, fill)

        fill = None
        y = DataFrameUtil.createDf(cols2, numRow, fill)
        for xc, yc in zip(x.iloc[1], y.iloc[1]):
            self.assertTrue(xc != yc)

            self.assertEqual(yc, fill)

示例#9

0

显示文件

文件： test_dfutil.py 项目： lwilke/structjour

    def testCheckrequiredColumnsWithReqColFail(self):
        '''Test method DataFrameUtil.checkRequiredInputFields'''

        reqCol = ReqCol()
        finReqCol = FinReqCol()
        fail = pd.DataFrame(
            columns=['Time', 'Symb', 'Side', 'Price', 'Qty', 'Account'])
        rc = pd.DataFrame(columns=reqCol.columns)

        gotve = False
        try:
            DataFrameUtil.checkRequiredInputFields(fail, reqCol.columns)
        except ValueError:
            gotve = True
        finally:
            self.assertTrue(gotve, "Failed to throw value error")

        gotve = False
        try:
            DataFrameUtil.checkRequiredInputFields(rc, finReqCol.columns)
        except ValueError:
            gotve = True
        finally:
            self.assertTrue(gotve, "Failed to throw a ValueError")

示例#10

0

显示文件

文件： ibstatements_notused.py 项目： ajmal017/structjour

    def combinePartialsFlexTrade(self, t):
        '''
        The necessity of a new method to handle this is annoying...BUT gdmit, The Open/Close info
        is not in any of the available fields. Instead, a less rigorous system is used based on
        OrderID
        '''
        lod = t['LevelOfDetail'].unique()
        if len(lod) > 1:
            assert ValueError('I need to see this')
        if lod[0].lower() != 'execution':
            assert ValueError('I need to see this')

        t = t[t['LevelOfDetail'].str.lower() == 'execution']
        newdf = pd.DataFrame()
        for tickerKey in t['Symbol'].unique():
            ticker = t[t['Symbol'] == tickerKey]
            # ##### New Code
            ticketKeys = ticker['OrderID'].unique()
            for ticketKey in ticketKeys:
                ticket = ticker[ticker['OrderID'] == ticketKey]
                if len(ticket) > 1:
                    codes = ticket['Codes']
                    for code in codes:
                        assert code.find('P') > -1

                    thisticket = DataFrameUtil.createDf(ticket.columns, 1)
                    net = 0.0
                    # Need to figure the average price of the transactions and sum of quantity
                    # and commission
                    for i, row in ticket.iterrows():
                        net = net + (float(row['Price']) *
                                     int(row['Quantity']))
                    for col in list(thisticket.columns):
                        if col not in ['Quantity', 'Price', 'Commission']:
                            thisticket[col] = ticket[col].unique()[0]
                    thisticket['Quantity'] = ticket['Quantity'].map(int).sum()
                    thisticket['Commission'] = ticket['Commission'].map(
                        float).sum()
                    thisticket['Price'] = net / ticket['Quantity'].map(
                        int).sum()
                    newdf = newdf.append(thisticket)

                else:
                    newdf = newdf.append(ticket)
        return newdf

示例#11

0

显示文件

文件： statement.py 项目： lwilke/structjour

def getStatementType(infile):
    '''
    Determine if infile is a statement. If it is return a tuple (data, type) TODO: Not doing what I said...
    If it is a DAS statement, determine if it matches the current date. As DAS statements do not
    include dates, the date in structjour and the directory structure date must match.
    If they don't match, the program, at a higher level, will pop a query to get the date of the
    statement.
    '''
    file, ext = os.path.splitext(infile)
    if not os.path.exists(infile) or (ext.lower() != '.csv'
                                      and not ext.lower().startswith('.htm')):
        return None, None
    if ext.lower() == '.csv':
        df = pd.read_csv(infile, names=[x for x in range(0, 100)])
        if df.iloc[0][0] == 'BOF' or df.iloc[0][0] == 'HEADER' or (
                df.iloc[0][0] == 'ClientAccountID') or (df.iloc[0][0]
                                                        == 'Statement'):
            return df, "IB_CSV"
        df = pd.read_csv(infile)
        if not df.empty:
            requiredFields = list(ReqCol().columns)
            requiredFields.remove('Date')

            # A small hack to allow tradesByTickets to pass as a DAS export
            if 'PnL' not in df.columns:
                requiredFields.remove('PnL')
                requiredFields.append('P / L')
            try:
                if DataFrameUtil.checkRequiredInputFields(df, requiredFields):
                    if not checkDateDir(infile):
                        return None, 'DAS'
                    return df, 'DAS'
            except ValueError:
                pass

    elif ext.lower().startswith('.htm'):
        soup = BeautifulSoup(readit(infile), 'html.parser')
        tbldivs = soup.find_all("div", id=lambda x: x and x.startswith('tbl'))
        if tbldivs:
            return tbldivs, 'IB_HTML'
    return None, None

示例#12

0

显示文件

    def __init__(self, df, interview, sf):
        '''
        Create a dataframe that includes all the summary material for review. Some
        of this data comes from the program and some of it comes from the user. The
        user will determine which parts to fill out from a couple of options.
        :params:df: A DataFrame that includes the transactions, or tickets,
            from a singel trade.
        '''

        self.interview = interview
        col = list(sf.tfcolumns.keys())
        col.append('Date')
        TheTrade = pd.DataFrame(columns=col)
        TheTrade = DataFrameUtil.addRows(TheTrade, 1)
        self.sf = sf

        ix = df.index[-1]
        ix0 = df.index[0]

        # TODO This list should be retrieved from TheStrategyObject
        strats = [
            'ORB', 'ABCD', 'VWAP Reversal', 'Bull Flag', 'Fallen Angel',
            'VWAP False Breakout', 'VWAP Reversal', '15 Minute Reversal',
            'VWAP MA trend', 'Other', 'Skip'
        ]
        side = df.loc[ix0][frc.side]
        self.df = df
        self.TheTrade = TheTrade
        self.ix = ix
        self.ix0 = ix0
        self.strats = strats
        self.side = side
        self.shares = 0
        self.chartSlot1 = None
        self.chartSlot2 = None
        self.chartSlot3 = None
        self.settings = QSettings('zero_substance', 'structjour')

示例#13

0

显示文件

    def layoutExcelData(self, df, ldf, imageNames):
        '''
        1) Determine the locations in the Excel doc to place trade summaries, trade tables and
            images.
        2) Create the empty rows and place the trade tables in the df according to the locations.
        :params df: We requre the df as  a whole because we are adding rows to it.
        :params ldf: A list of dataframes, each a trade, each one is placed into our new skeletal
                     layout for excel
        :return (Imagelocation, df): ImageLocation contains
                                [ [list of image location],   # up to 3 per trade
                                  [list of image names],      # up to 3 per trade
                                  Start time,
                                  trade dur,
                                ]
        '''

        imageLocation = list()
        srf = SumReqFields()
        sumSize = srf.maxrow() + 5
        summarySize = sumSize
        spacing = 3

        # Image column location
        c1col = 13
        c2col = 1
        c3col = 9
        frq = FinReqCol()
        newdf = DataFrameUtil.createDf(df, self.topMargin)

        df = newdf.append(df, ignore_index=True)
        deleteme = []
        for i, tdf in enumerate(ldf):
            theKey = tdf[frq.tix].unique()[-1]
            if len(theKey) == 0:
                deleteme.append(i)
                continue
            imageName = imageNames[theKey]
            xtraimage = 0  # Add space for second/third image
            if len(imageName) > 1:
                xtraimage = 21
            ilocs = []
            # Need 1 entry even if there are no images
            ilocs.append((c1col, len(tdf) + len(df) + spacing))
            for i in range(0, len(imageName)):
                if i == 1:
                    ilocs.append((c2col, len(tdf) + len(df) + spacing + 20))
                elif i == 2:
                    ilocs.append((c3col, len(tdf) + len(df) + spacing + 20))

            # Holds image locations, image name, trade start time, trade duration as delta
            imageLocation.append([
                ilocs, imageName,
                tdf.Start.unique()[-1],
                tdf.Duration.unique()[-1]
            ])

            # Append the mini trade table then add rows to fit the tradeSummary form
            df = df.append(tdf, ignore_index=True)
            df = DataFrameUtil.addRows(df, summarySize + xtraimage)
        for d in deleteme:
            ldf.pop(d)
        return imageLocation, df