def readCash(ws): """ [Worksheet] ws => [List] list of (currency, amount) Return looks like [('HKD', 1234.67), ('USD', 89.88)] """ isClosingBalance = lambda s: True if isinstance(s, str) and s.startswith('Closing Balance') \ else False isCashLine = lambda L: True if any(isClosingBalance(x) for x in L) else False def cashEntry(lineItems): """ [List] lineItems => [Tuple] (currency, amount) """ isFloat = lambda x: True if isinstance(x, float) else False isCurrencyString = lambda x: True if isinstance(x, str) \ and len(x) > 6 and x[0] == '(' \ and x[6] == ')' \ else False amount = firstOf(isFloat, lineItems) currencyString = firstOf(isCurrencyString, lineItems) if amount == None or currencyString == None: raise ValueError('cashEntry(): cannot parse cash entry: {0}'.format(lineItems)) return (currencyString.strip()[2:5], amount) return map(cashEntry, filter(isCashLine, worksheetToLines(ws)))
def toCsv(inputFile, outputDir, prefix): """ [String] intputFile, [String] outputDir, [String] prefix => [String] outputFile Side effect: create an output csv file """ logger.info('toCsv(): {0}'.format(inputFile)) dictToValues = lambda keys, d: map(partial(getitem, d), keys) (dateString, holdings, cashEntries) = \ readJPM(worksheetToLines(open_workbook(inputFile).sheet_by_index(0))) (holdingFile, cashFile) = getOutputFilename(dateString, prefix, outputDir) headers = [ 'portfolio', 'custodian', 'date', 'geneva_investment_id' , 'ISIN', 'bloomberg_figi', 'name', 'currency', 'quantity'] writeCsv(holdingFile , chain([headers] , map(partial(dictToValues, headers), holdings)) , '|') headers = [ 'portfolio', 'custodian', 'date', 'currency', 'balance'] writeCsv(cashFile , chain([headers] , map(partial(dictToValues, headers), cashEntries)) , '|') return [holdingFile, cashFile]
def fileToLines(file): """ [String] file => [Iterable] lines Read the first sheet of an Excel file and convert its rows to lines """ return worksheetToLines(open_workbook(file).sheet_by_index(0))
def testHolding(self): inputFile = join(getCurrentDirectory(), 'samples', 'statement01.xls') lines = worksheetToLines(open_workbook(inputFile).sheet_by_index(0)) accountCode, positions = account(list(islice(lines, 7, 201))) holdings = list(filter(isHolding, positions)) self.assertEqual(36, len(holdings)) self.verifyHolding1(holdings[0]) self.verifyHolding2(holdings[35])
def testReadJPM(self): inputFile = join(getCurrentDirectory(), 'samples', 'statement01.xls') (dateString, holdings, cashEntries) = \ readJPM(worksheetToLines(open_workbook(inputFile).sheet_by_index(0))) self.assertEqual(len(holdings), 52) self.assertEqual(len(cashEntries), 10) self.assertEqual('2016-07-06', dateString) self.verifyGenevaHolding2(holdings[51], dateString) self.verifyGenevaCash2(cashEntries[9], dateString)
def testGenevaPosition(self): inputFile = join(getCurrentDirectory(), 'samples', 'statement01.xls') lines = worksheetToLines(open_workbook(inputFile).sheet_by_index(0)) dateString = '2016-07-06' positions = list( genevaPosition(dateString, account(list(islice(lines, 7, 201))))) self.assertEqual(39, len(positions)) self.verifyGenevaHolding1(positions[0], dateString) self.verifyGenevaCash1(positions[38], dateString)
def testCash(self): inputFile = join(getCurrentDirectory(), 'samples', 'statement01.xls') lines = worksheetToLines(open_workbook(inputFile).sheet_by_index(0)) accountCode, positions = account(list(islice(lines, 7, 201))) cash = list(filterfalse(isHolding, positions)) self.assertEqual(3, len(cash)) self.assertEqual('HKD', cash[1]['Local CCY']) self.assertAlmostEqual(1208208427.86, cash[1]['Opening Cash Balance']) self.assertAlmostEqual(1115935826.52, cash[1]['Closing Cash Balance'])
def readHolding(ws, startRow): """ [Worksheet] ws, [Int] startRow => [Iterable] rows Read the Excel worksheet containing the holdings, return an iterable object on the list of holding positions. Each position is a dictionary. """ headers = readHeaders(ws, startRow) position = lambda headers, values: dict(zip(headers, values)) emptyString = lambda s: s == '' return map(partial(position, headers) , map(partial(filterfalse, emptyString) , takewhile(firstCellNotEmpty , worksheetToLines(ws, getStartRow()+2))))
def readHolding(file): """ [String] file => ([String] date, [Iterable] positions) """ return (lambda lines: (getDate(lines), getPosition(lines)))\ (worksheetToLines(open_workbook(file).sheet_by_index(0)))
headers = [ 'portfolio', 'custodian', 'date', 'currency', 'balance'] writeCsv(cashFile , chain([headers] , map(partial(dictToValues, headers), cashEntries)) , '|') return [holdingFile, cashFile] if __name__ == '__main__': import logging.config logging.config.fileConfig('logging.config', disable_existing_loggers=False) inputFile = join(getCurrentDirectory(), 'samples', 'statement01.xls') lines = worksheetToLines(open_workbook(inputFile).sheet_by_index(0)) # accountCode, positions = account(list(islice(lines, 7, 201))) # print(accountCode) # for x in positions: # print(x) # for x in genevaPosition('2016-06-07', account(list(islice(lines, 7, 201)))): # print(x) # for x in readJPM(lines): # print(x) print(toCsv(inputFile, getCurrentDirectory(), 'listco_equity_jpm_'))
Assume the first line is column headers """ getRawPositions = lambda lines: \ (lambda headers, lines: \ map( lambda line: dict(zip(headers, line))\ , takewhile( lambda line: not line[0].startswith('Record Count')\ , lines)) )(*getHeadersnLines(lines)) """ [String] file => [Iterable] lines Read an Excel file, convert its first sheet into lines, each line is a list of the columns in the row. """ fileToLines = lambda file: \ worksheetToLines(open_workbook(file).sheet_by_index(0)) """ [List] line => [String] date (yyyy-mm-dd) First item in the line is the date. Most of the time the date is read a float number, but sometimes it is a string (dd/mm/yyyy) """ dateFromLine = lambda line: \ (lambda x: \ fromExcelOrdinal(x).strftime('%Y-%m-%d') \ if isinstance(x, float) else \ (lambda items: \ items[2] + '-' + items[1] + '-' + items[0] )(x.split('/')) )(line[0])