def parse(filename: str) -> Table: """Parse the American Funds download.""" with open(filename) as infile: reader = csv.reader(infile) header = next(reader) rows = list(reader) tbl = Table(header, [str] * len(header), rows) # Compute fraction. tbl = utils.create_fraction_from_market_value(tbl, 'market_value') # Add asset class. cls = { 'Equity': 'Equity', 'Fixed Income': 'FixedIncome', 'Short Term': 'ShortTerm' } tbl = (tbl.map('asset_type', cls.__getitem__).rename( ('asset_type', 'asstype'))) # Set name column. tbl = tbl.rename(('security_name', 'name')) # Cull the final set of produced columns. return tbl.select(['fraction', 'asstype', 'name'])
def normalize_holdings_table(tbl: Table) -> Table: """The assets don't actually sum to 100%, normalize them.""" total = sum([row.fraction for row in tbl]) if not 0.98 < total < 1.02: logging.error("Total weight seems invalid: %s", total) scale = 1. / total return tbl.map('fraction', lambda f: f * scale)
def parse(filename: str) -> Dict[str, Table]: """Load tables from the CSV file.""" with open(filename) as infile: reader = csv.reader(infile) rows = list(reader) sections = csv_utils.csv_split_sections_with_titles(rows) table_map = { title: Table(rows[0], [str] * len(rows[0]), rows[1:]) for title, rows in sections.items() } parsers = { 'Equity': parse_equity, 'Fixed income': parse_fixed_income, 'Short-term reserves': parse_shortterm_reserves, } tables = [] for title, tbl in table_map.items(): parser = parsers[title] subtbl = parser(tbl) subtbl.checkall(VALUES_COLUMNS) tables.append(subtbl) values_table = table.concat(*tables) # pylint: disable=bad-continuation return (utils.create_fraction_from_market_value( values_table, 'market_value').map( 'ticker', lambda ticker: ticker if ticker != '-' else '').rename( ('holdings', 'name')).map('sedol', utils.empty_dashes).select( ['fraction', 'asstype', 'name', 'ticker', 'sedol']))
def parse(filename: str) -> Table: """Parse the iShares holdings file.""" header, outrows = find_table(filename) tbl = Table(header, [str] * len(header), outrows) # Create fraction column. tbl = utils.create_fraction_from_market_value(tbl, 'market_value') # Add ticker column. if 'Ticker' in header: tbl = (tbl.create('asstype', lambda _: 'Equity').map('ticker', str.strip)) else: tbl = (tbl.create('asstype', lambda _: 'FixedIncome').create( 'ticker', lambda _: '')) return (tbl.map('ticker', utils.empty_dashes).map( 'sedol', utils.empty_dashes).map('isin', utils.empty_dashes).select( ['fraction', 'asstype', 'name', 'ticker', 'sedol', 'isin']))
def get_chain_table(dateMap): columns = sorted(first(first(dateMap.values()).values())[0].keys()) rows = [] for dateDays, priceMap in sorted(dateMap.items()): _, __, days = dateDays.partition(':') for price, optionlist in sorted(priceMap.items()): for option in optionlist: rows.append([option[col] for col in columns]) return Table(columns, [str] * len(columns), rows)
def parse_shortterm_reserves(tbl: Table) -> Table: """Parse the Short-term reserves table.""" index = None for fname in 'face_amount', 'face_amount_local_currency': if fname in tbl.columns: index = tbl.columns.index(fname) break assert index is not None return (tbl.create('asstype', lambda _: 'ShortTerm').rename( (fname, 'market_value')).create('ticker', lambda _: '').update( 'sedol', lambda row: row.sedol if row.sedol != '-' else '').select(VALUES_COLUMNS))
def parse(filename: str) -> Table: """Parse the PowerShares holdings file.""" with open(filename) as infile: reader = csv.reader(infile) header = next(reader) rows = list(reader) tbl = Table(header, [str] * len(header), rows) # Compute market value. tbl = utils.create_fraction_from_market_value(tbl, 'marketvalue') # Create asset type column. tbl = tbl.create('asstype', lambda _: 'Equity') # Create identifier columns. tbl = (tbl.check(['name']).rename( ('holdingsticker', 'ticker')).map('ticker', str.strip).rename( ('securitynum', 'cusip'))) # What about 'securitynum'? What is it? return tbl.select(['fraction', 'asstype', 'name', 'ticker', 'cusip'])
def check_holdings(holdings: Table): """Check that the holdings Table has the required columns.""" actual = set(holdings.columns) allowed = {'asstype', 'fraction'} | set(IDCOLUMNS) other = actual - allowed assert not other, "Extra columns found: {}".format(other) required = {'asstype', 'fraction'} assert required.issubset(actual), ( "Required columns missing: {}".format(required - actual)) assert set(IDCOLUMNS) & actual, "No ids columns found: {}".format(actual) assert all(cls in ASSTYPES for cls in holdings.values('asstype')) # Check that '-' don't appear in identifier columns. for column in IDCOLUMNS: if column not in holdings.columns: continue values = holdings.values(column) if '-' in values: raise ValueError("Invalid value '-' in column '{}'".format(column))
def parse(filename: str) -> Table: """Parse the SPDRs holdings file.""" header, rows = read_table(filename) tbl = Table(header, [str] * len(header), rows) # Use weight column as fraction directly. tbl = tbl.map('weight', float).rename(('weight', 'fraction')) total_value = sum(tbl.itervalues('fraction')) if not 99 <= total_value <= 101: logging.error("Total value is invalid: %s", total_value) tbl = tbl.map('fraction', lambda f: f/total_value) # Create asset type column. tbl = tbl.create('asstype', lambda _: 'Equity') # Add identifiers. tbl = (tbl .check(['name']) .rename(('identifier', 'ticker'))) return tbl.select(['fraction', 'asstype', 'name', 'ticker'])
def main(): """Collect all the assets and holdings and disaggregate.""" logging.basicConfig(level=logging.INFO, format='%(levelname)-8s: %(message)s') parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument( 'portfolio', help=('A CSV file which contains the tickers of assets and ' 'number of units')) parser.add_argument( '--dbdir', default=database.DEFAULT_DIR, help="Database directory to write all the downloaded files.") parser.add_argument( '-i', '--ignore-missing-issuer', action='store_true', help="Ignore positions where the issuer implementation is missing") parser.add_argument('-o', '--ignore-options', action='store_true', help=("Ignore options positions " "(only works with Beancount export file)")) parser.add_argument('-l', '--ignore-shorts', action='store_true', help="Ignore short positions") parser.add_argument( '-t', '--threshold', action='store', type=float, default=0, help="Remove holdings whose value is under a threshold") parser.add_argument('-F', '--full-table', action='store', help="Path to write the full table to.") parser.add_argument('-A', '--agg-table', action='store', help="Path to write the full table to.") parser.add_argument('-D', '--debug-output', action='store', help="Path to debugging output of grouping algorithm.") args = parser.parse_args() db = database.Database(args.dbdir) # Load up the list of assets from the exported Beancount file. assets = beansupport.read_portfolio(args.portfolio, args.ignore_options) assets.checkall(['ticker', 'account', 'issuer', 'price', 'quantity']) assets = assets.order(lambda row: (row.issuer, row.ticker)) # Fetch baskets for each of those. alltables = [] for row in assets: if row.quantity < 0 and args.ignore_shorts: continue if not row.issuer: holdings = Table(['fraction', 'asstype', 'ticker'], [str, str, str], [[1.0, 'Equity', row.ticker]]) else: downloader = issuers.get(row.issuer) if downloader is None: message = "Missing issuer: {}".format(issuer) if args.ignore_missing_issuer: logging.error(message) continue else: raise SystemExit(message) filename = database.getlatest(db, row.ticker) if filename is None: logging.error("Missing file for %s", row.ticker) continue logging.info("Parsing file '%s' with '%s'", filename, row.issuer) if not hasattr(downloader, 'parse'): logging.error("Parser for %s is not implemented", row.ticker) continue # Parse the file. holdings = downloader.parse(filename) check_holdings(holdings) # Add parent ETF and fixup columns. holdings = add_missing_columns(holdings) holdings = holdings.create('etf', lambda _, row=row: row.ticker) holdings = holdings.create('account', lambda _, row=row: row.account) holdings = holdings.select(COLUMNS) # Convert fraction to dollar amount. dollar_amount = row.quantity * row.price holdings = (holdings.create( 'amount', lambda row, a=dollar_amount: row.fraction * a).delete(['fraction' ])) alltables.append(holdings) fulltable = table.concat(*alltables) # Aggregate the holdings. aggtable, annotable = graph.group(fulltable, args.debug_output) if args.agg_table: with open(args.agg_table, 'w') as outfile: table.write_csv(aggtable, outfile) # Remove the holdings whose aggregate sum is under a threshold. if args.threshold: filt_annotable = annotable.filter( lambda row: aggtable.rows[row.group].amount > args.threshold) # Write out the full table. logging.info("Total amount from full holdings table: {:.2f}".format( numpy.sum(fulltable.array('amount')))) logging.info("Total amount from annotated holdings table: {:.2f}".format( numpy.sum(filt_annotable.array('amount')))) if args.full_table: with open(args.full_table, 'w') as outfile: table.write_csv(filt_annotable, outfile) # Cull out the tail of holdings for printing. tail = 0.90 amount = aggtable.array('amount') total_amount = numpy.sum(amount) logging.info('Total: {:.2f}'.format(total_amount)) cum_amount = numpy.cumsum(amount) headsize = len(amount[cum_amount < total_amount * tail]) print(aggtable.head(headsize))
def add_missing_columns(tbl: Table) -> Table: """Add empty identifier columns to the table.""" for column in IDCOLUMNS: if column not in tbl.columns: tbl = tbl.create(column, lambda _: '') return tbl
def create_fraction_from_market_value(tbl: Table, column: str) -> Table: """Create a 'fraction' column computed from the market value column.""" tbl = tbl.map(column, convert_dollar_amount) total_value = sum(max(0, value) for value in tbl.itervalues(column)) return tbl.create('fraction', lambda row: max(0, getattr(row, column)) / total_value)
def HoldingsTable(rows): """Normalized extracted contents of an holdings file download.""" return Table(['ticker', 'fraction', 'description'], [str, float, str], rows)
def parse_fixed_income(tbl: Table) -> Table: """Parse the Fixed income table.""" return (tbl.create('asstype', lambda _: 'FixedIncome').create( 'ticker', lambda _: '').update( 'sedol', lambda row: row.sedol if row.sedol != '-' else '').select(VALUES_COLUMNS))
def parse_equity(tbl: Table) -> Table: """Parse the Equity table.""" return (tbl.create('asstype', lambda _: 'Equity').map( 'ticker', str.strip).select(VALUES_COLUMNS))
def group(holdings: Table, debug_filename: str = None) -> Tuple[Table, Table]: """Group assets by similarity.""" # Compute the connected components. g = build_graph(holdings) cc = nx.connected_components(g) logging.info('Num connected components: %s', nx.number_connected_components(g)) # Process each component. counts = collections.defaultdict(int) debugfile = open(debug_filename, 'w') if debug_filename else None groups = [] for component in cc: # Separate out the rows and links. rows = [] links = [] for c in component: # pylint: disable=unidiomatic-typecheck (links if type(c) is tuple else rows).append(c) counts[len(rows)] += 1 groups.append(rows) # Print all groups to a test file. if debugfile: print_group(rows, links, debugfile) # if ('ticker', 'GOOG') in links or ('ticker', 'GOOGL') in links: # print_detailed_debug_info(c, g) # if 0: # # Print groups with mixed asset types. # if len(set(row.asstype for row in rows)) > 1: # print_group(rows, links) # if 0: # # Print groups without any ticker. # if len(rows) != 1: # continue # linkdict = dict(links) # if linkdict.get('ticker', None): # continue # print_group(rows, links) if debugfile is not None: debugfile.close() logging.info('Matched: {:%}'.format(1 - counts[1] / sum(counts.values()))) logging.info('Items distribution (log-floored):') # Convert to log map. logcounts = collections.defaultdict(int) for numitems, count in sorted(counts.items()): lognumitems = int(math.pow(2, int(math.log2(numitems)))) logcounts[lognumitems] += count for numitems, count in sorted(logcounts.items()): logging.info(' {:>3}~{:>3} items: {:10}'.format( numitems - 1, numitems, count)) # Reduce the rows and produce an aggregated table. aggrows = [] sorted_groups = sorted(groups, key=lambda grows: -sum(row.amount for row in grows)) for rows in sorted_groups: assert rows amount = sum(row.amount for row in rows) # Select the longest name. It seems to nealy always be the best variant. names = sorted(set(row.name for row in rows), key=len, reverse=True) name = names[0] symbol = ','.join(sorted(set(row.ticker for row in rows if row.ticker))) asstype = ','.join(sorted(set(row.asstype for row in rows))) aggrows.append((symbol, asstype, name, amount)) columns = ['symbol', 'asstype', 'name', 'amount'] aggtable = (Table(columns, [str, str, str, float], aggrows).order(lambda row: row.amount, asc=False)) # Reproduce the original table, but with the row groups annotated this time. annotation_map = {} for index, rows in enumerate(sorted_groups): for row in rows: annotation_map[row] = index annotable = (holdings.create( 'group', annotation_map.__getitem__).order(lambda row: (row.group, -row.amount)) ) assert len(holdings) == len(annotable) return aggtable, annotable