class Dimensioned(LabelledData): """ Dimensioned is a base class that allows the data contents of a class to be associated with dimensions. The contents associated with dimensions may be partitioned into one of three types * key dimensions: These are the dimensions that can be indexed via the __getitem__ method. Dimension objects supporting key dimensions must support indexing over these dimensions and may also support slicing. This list ordering of dimensions describes the positional components of each multi-dimensional indexing operation. For instance, if the key dimension names are 'weight' followed by 'height' for Dimensioned object 'obj', then obj[80,175] indexes a weight of 80 and height of 175. Accessed using either kdims or key_dimensions. * value dimensions: These dimensions correspond to any data held on the Dimensioned object not in the key dimensions. Indexing by value dimension is supported by dimension name (when there are multiple possible value dimensions); no slicing semantics is supported and all the data associated with that dimension will be returned at once. Note that it is not possible to mix value dimensions and deep dimensions. Accessed using either vdims or value_dimensions. * deep dimensions: These are dynamically computed dimensions that belong to other Dimensioned objects that are nested in the data. Objects that support this should enable the _deep_indexable flag. Note that it is not possible to mix value dimensions and deep dimensions. Accessed using either ddims or deep_dimensions. Dimensioned class support generalized methods for finding the range and type of values along a particular Dimension. The range method relies on the appropriate implementation of the dimension_values methods on subclasses. The index of an arbitrary dimension is its positional index in the list of all dimensions, starting with the key dimensions, followed by the value dimensions and ending with the deep dimensions. """ cdims = param.Dict(default=OrderedDict(), doc=""" The constant dimensions defined as a dictionary of Dimension:value pairs providing additional dimension information about the object. Aliased with constant_dimensions.""") kdims = param.List(bounds=(0, None), constant=True, doc=""" The key dimensions defined as list of dimensions that may be used in indexing (and potential slicing) semantics. The order of the dimensions listed here determines the semantics of each component of a multi-dimensional indexing operation. Aliased with key_dimensions.""") vdims = param.List(bounds=(0, None), constant=True, doc=""" The value dimensions defined as the list of dimensions used to describe the components of the data. If multiple value dimensions are supplied, a particular value dimension may be indexed by name after the key dimensions. Aliased with value_dimensions.""") group = param.String(default='Dimensioned', constant=True, doc=""" A string describing the data wrapped by the object.""") __abstract = True _sorted = False _dim_groups = ['kdims', 'vdims', 'cdims', 'ddims'] _dim_aliases = dict(key_dimensions='kdims', value_dimensions='vdims', constant_dimensions='cdims', deep_dimensions='ddims') # Long-name aliases @property def key_dimensions(self): return self.kdims @property def value_dimensions(self): return self.vdims @property def constant_dimensions(self): return self.cdims @property def deep_dimensions(self): return self.ddims def __init__(self, data, **params): for group in self._dim_groups+list(self._dim_aliases.keys()): if group in ['deep_dimensions', 'ddims']: continue if group in params: if group in self._dim_aliases: params[self._dim_aliases[group]] = params.pop(group) group = self._dim_aliases[group] if group == 'cdims': dimensions = {d if isinstance(d, Dimension) else Dimension(d): val for d, val in params.pop(group).items()} else: dimensions = [d if isinstance(d, Dimension) else Dimension(d) for d in params.pop(group)] params[group] = dimensions super(Dimensioned, self).__init__(data, **params) self.ndims = len(self.kdims) cdims = [(d.name, val) for d, val in self.cdims.items()] self._cached_constants = OrderedDict(cdims) self._cached_index_names = [d.name for d in self.kdims] self._cached_value_names = [d.name for d in self.vdims] self._settings = None def _valid_dimensions(self, dimensions): "Validates key dimension input" if not dimensions: return dimensions elif not isinstance(dimensions, list): dimensions = [dimensions] for dim in dimensions: if dim not in self._cached_index_names: raise Exception("Supplied dimensions %s not found." % dim) return dimensions @property def ddims(self): "The list of deep dimensions" if self._deep_indexable and len(self): return self.values()[0].dimensions() else: return [] def dimensions(self, selection='all', label=False): """ Provides convenient access to Dimensions on nested Dimensioned objects. Dimensions can be selected by their type, i.e. 'key' or 'value' dimensions. By default 'all' dimensions are returned. """ lambdas = {'k': (lambda x: x.kdims, {'full_breadth': False}), 'v': (lambda x: x.vdims, {}), 'c': (lambda x: x.cdims, {})} aliases = {'key': 'k', 'value': 'v', 'constant': 'c'} if selection == 'all': dims = [dim for group in self._dim_groups for dim in getattr(self, group)] elif isinstance(selection, list): dims = [dim for group in selection for dim in getattr(self, '%sdims' % aliases.get(group))] elif aliases.get(selection) in lambdas: selection = aliases.get(selection, selection) lmbd, kwargs = lambdas[selection] key_traversal = self.traverse(lmbd, **kwargs) dims = [dim for keydims in key_traversal for dim in keydims] else: raise KeyError("Invalid selection %r, valid selections include" "'all', 'value' and 'key' dimensions" % repr(selection)) return [dim.name if label else dim for dim in dims] def get_dimension(self, dimension, default=None): "Access a Dimension object by name or index." all_dims = self.dimensions() if isinstance(dimension, int): return all_dims[dimension] else: return {dim.name: dim for dim in all_dims}.get(dimension, default) def get_dimension_index(self, dim): """ Returns the index of the requested dimension. """ if isinstance(dim, int): if dim < len(self.dimensions()): return dim else: return IndexError('Dimension index out of bounds') try: return [d.name for d in self.dimensions()].index(dim) except ValueError: raise Exception("Dimension %s not found in %s." % (dim, self.__class__.__name__)) def get_dimension_type(self, dim): """ Returns the specified Dimension type if specified or if the dimension_values types are consistent otherwise None is returned. """ dim_obj = self.get_dimension(dim) if dim_obj and dim_obj.type is not None: return dim_obj.type dim_vals = [type(v) for v in self.dimension_values(dim)] if len(set(dim_vals)) == 1: return dim_vals[0] else: return None def __getitem__(self, key): """ Multi-dimensional indexing semantics is determined by the list of key dimensions. For instance, the first indexing component will index the first key dimension. After the key dimensions are given, *either* a value dimension name may follow (if there are multiple value dimensions) *or* deep dimensions may then be listed (for applicable deep dimensions). """ return self def select(self, selection_specs=None, **kwargs): """ Allows slicing or indexing into the Dimensioned object by supplying the dimension and index/slice as key value pairs. Select descends recursively through the data structure applying the key dimension selection. The 'value' keyword allows selecting the value dimensions on objects which have any declared. The selection may also be selectively applied to specific objects by supplying the selection_specs as an iterable of type.group.label specs, types or functions. """ # Apply all indexes applying on this object val_dim = ['value'] if self.vdims else [] sanitized = {sanitize_identifier(kd): kd for kd in self._cached_index_names} local_dims = (self._cached_index_names + list(sanitized.keys()) + val_dim) local_kwargs = {k: v for k, v in kwargs.items() if k in local_dims} # Check selection_spec applies if selection_specs is not None: matches = any(self.matches(spec) for spec in selection_specs) else: matches = True if local_kwargs and matches: select = [slice(None) for i in range(self.ndims)] for dim, val in local_kwargs.items(): if dim == 'value': select += [val] else: if isinstance(val, tuple): val = slice(*val) dim = sanitized.get(dim, dim) select[self.get_dimension_index(dim)] = val if self._deep_indexable: selection = self.get(tuple(select), self.clone(shared_data=False)) else: selection = self[tuple(select)] else: selection = self if type(selection) is not type(self): # Apply the selection on the selected object of a different type val_dim = ['value'] if selection.vdims else [] key_dims = selection.dimensions('key', label=True) + val_dim if any(kw in key_dims for kw in kwargs): selection = selection.select(selection_specs, **kwargs) elif selection._deep_indexable: # Apply the deep selection on each item in local selection items = [] for k, v in selection.items(): val_dim = ['value'] if v.vdims else [] dims = list(zip(*[(sanitize_identifier(kd), kd) for kd in v.dimensions('key', label=True)])) kdims, skdims = dims if dims else ([], []) key_dims = list(kdims) + list(skdims) + val_dim if any(kw in key_dims for kw in kwargs): items.append((k, v.select(selection_specs, **kwargs))) else: items.append((k, v)) selection = selection.clone(items) return selection def dimension_values(self, dimension): """ Returns the values along the specified dimension. This method must be implemented for all Dimensioned type. """ val = self._cached_constants.get(dimension, None) if val: return val else: raise Exception("Dimension %s not found in %s." % (dimension, self.__class__.__name__)) def range(self, dimension, data_range=True): """ Returns the range of values along the specified dimension. If data_range is True, the data may be used to try and infer the appropriate range. Otherwise, (None,None) is returned to indicate that no range is defined. """ dimension = self.get_dimension(dimension) if dimension is None: return (None, None) if dimension.range != (None, None): return dimension.range elif not data_range: return (None, None) soft_range = [r for r in dimension.soft_range if r is not None] if dimension in self.kdims or dimension in self.vdims: dim_vals = self.dimension_values(dimension.name) return find_range(dim_vals, soft_range) dname = dimension.name match_fn = lambda x: dname in x.dimensions(['key', 'value'], True) range_fn = lambda x: x.range(dname) ranges = self.traverse(range_fn, [match_fn]) drange = max_range(ranges) return drange def __repr__(self): return PrettyPrinter.pprint(self) def __call__(self, options=None, **kwargs): """ Apply the supplied options to a clone of the object which is then returned. """ groups = set(Store.options().groups.keys()) if kwargs and set(kwargs) <= groups: if not all(isinstance(v, dict) for v in kwargs.values()): raise Exception("The %s options must be specified using dictionary groups" % ','.join(repr(k) for k in kwargs.keys())) sanitized_group = sanitize_identifier(self.group) if self.label: identifier = ('%s.%s.%s' % (self.__class__.__name__, sanitized_group, sanitize_identifier(self.label))) elif sanitized_group != self.__class__.__name__: identifier = '%s.%s' % (self.__class__.__name__, sanitized_group) else: identifier = self.__class__.__name__ kwargs = {k:{identifier:v} for k,v in kwargs.items()} deep_clone = self.map(lambda x: x.clone(id=x.id)) StoreOptions.set_options(deep_clone, options, **kwargs) return deep_clone
return info except KeyError: return driver = webdriver.PhantomJS() jieba.set_dictionary('dict.txt.big.txt') jieba.enable_parallel(4) big_dict = OrderedDict() for each_link in wechat_links(driver): print(each_link) article = get_article(each_link) if article is not None: for each_word_cut in word_cuts(article): if len(each_word_cut) > 1: if big_dict.get(each_word_cut) is None: big_dict[each_word_cut] = 1 else: big_dict[each_word_cut] += 1 driver.quit() big_dict = sorted(big_dict.items(), key=lambda d: d[1], reverse=True) now = datetime.datetime.now() today = now.strftime('%Y%m%d%H%M%S') pfile = open("wechat_word_cut"+today+".pkl", "wb", buffering=1024) pfile.write(dumps(big_dict)) pfile.close() f = open("wechat_word_cut"+today+".csv", "wb", buffering=1024) for each_word_cut, word_count in big_dict: line = each_word_cut + "," + str(word_count) + chr(10)
class PositionTracker(object): def __init__(self, asset_finder): self.asset_finder = asset_finder # sid => position object self.positions = positiondict() # Arrays for quick calculations of positions value self._position_amounts = OrderedDict() self._position_last_sale_prices = OrderedDict() self._position_value_multipliers = OrderedDict() self._position_exposure_multipliers = OrderedDict() self._position_payout_multipliers = OrderedDict() self._unpaid_dividends = pd.DataFrame( columns=zp.DIVIDEND_PAYMENT_FIELDS, ) self._positions_store = zp.Positions() # Dict, keyed on dates, that contains lists of close position events # for any Assets in this tracker's positions self._auto_close_position_sids = {} def _update_asset(self, sid): try: self._position_value_multipliers[sid] self._position_exposure_multipliers[sid] self._position_payout_multipliers[sid] except KeyError: # Check if there is an AssetFinder if self.asset_finder is None: raise PositionTrackerMissingAssetFinder() # Collect the value multipliers from applicable sids asset = self.asset_finder.retrieve_asset(sid) if isinstance(asset, Equity): self._position_value_multipliers[sid] = 1 self._position_exposure_multipliers[sid] = 1 self._position_payout_multipliers[sid] = 0 if isinstance(asset, Future): self._position_value_multipliers[sid] = 0 self._position_exposure_multipliers[sid] = \ asset.contract_multiplier self._position_payout_multipliers[sid] = \ asset.contract_multiplier # Futures are closed on their notice_date if asset.notice_date: self._insert_auto_close_position_date( dt=asset.notice_date, sid=sid ) # If the Future does not have a notice_date, it will be closed # on its expiration_date elif asset.expiration_date: self._insert_auto_close_position_date( dt=asset.expiration_date, sid=sid ) def _insert_auto_close_position_date(self, dt, sid): """ Inserts the given SID in to the list of positions to be auto-closed by the given dt. Parameters ---------- dt : pandas.Timestamp The date before-which the given SID will be auto-closed sid : int The SID of the Asset to be auto-closed """ self._auto_close_position_sids.setdefault(dt, set()).add(sid) def auto_close_position_events(self, next_trading_day): """ Generates CLOSE_POSITION events for any SIDs whose auto-close date is before or equal to the given date. Parameters ---------- next_trading_day : pandas.Timestamp The time before-which certain Assets need to be closed Yields ------ Event A close position event for any sids that should be closed before the next_trading_day parameter """ past_asset_end_dates = set() # Check the auto_close_position_dates dict for SIDs to close for date, sids in self._auto_close_position_sids.items(): if date > next_trading_day: continue past_asset_end_dates.add(date) for sid in sids: # Yield a CLOSE_POSITION event event = Event({ 'dt': date, 'type': DATASOURCE_TYPE.CLOSE_POSITION, 'sid': sid, }) yield event # Clear out past dates while past_asset_end_dates: self._auto_close_position_sids.pop(past_asset_end_dates.pop()) def update_last_sale(self, event): # NOTE, PerformanceTracker already vetted as TRADE type sid = event.sid if sid not in self.positions: return 0 price = event.price if checknull(price): return 0 pos = self.positions[sid] old_price = pos.last_sale_price pos.last_sale_date = event.dt pos.last_sale_price = price self._position_last_sale_prices[sid] = price # Calculate cash adjustment on assets with multipliers return ((price - old_price) * self._position_payout_multipliers[sid] * pos.amount) def update_positions(self, positions): # update positions in batch self.positions.update(positions) for sid, pos in iteritems(positions): self._position_amounts[sid] = pos.amount self._position_last_sale_prices[sid] = pos.last_sale_price self._update_asset(sid) def update_position(self, sid, amount=None, last_sale_price=None, last_sale_date=None, cost_basis=None): pos = self.positions[sid] if amount is not None: pos.amount = amount self._position_amounts[sid] = amount self._position_values = None # invalidate cache self._update_asset(sid=sid) if last_sale_price is not None: pos.last_sale_price = last_sale_price self._position_last_sale_prices[sid] = last_sale_price self._position_values = None # invalidate cache if last_sale_date is not None: pos.last_sale_date = last_sale_date if cost_basis is not None: pos.cost_basis = cost_basis def execute_transaction(self, txn): # Update Position # ---------------- sid = txn.sid position = self.positions[sid] position.update(txn) self._position_amounts[sid] = position.amount self._position_last_sale_prices[sid] = position.last_sale_price self._update_asset(sid) def handle_commission(self, commission): # Adjust the cost basis of the stock if we own it if commission.sid in self.positions: self.positions[commission.sid].\ adjust_commission_cost_basis(commission) @property def position_values(self): iter_amount_price_multiplier = zip( itervalues(self._position_amounts), itervalues(self._position_last_sale_prices), itervalues(self._position_value_multipliers), ) return [ price * amount * multiplier for price, amount, multiplier in iter_amount_price_multiplier ] @property def position_exposures(self): iter_amount_price_multiplier = zip( itervalues(self._position_amounts), itervalues(self._position_last_sale_prices), itervalues(self._position_exposure_multipliers), ) return [ price * amount * multiplier for price, amount, multiplier in iter_amount_price_multiplier ] def calculate_positions_value(self): if len(self.position_values) == 0: return np.float64(0) return sum(self.position_values) def calculate_positions_exposure(self): if len(self.position_exposures) == 0: return np.float64(0) return sum(self.position_exposures) def _longs_count(self): return sum(1 for i in self.position_exposures if i > 0) def _long_exposure(self): return sum(i for i in self.position_exposures if i > 0) def _long_value(self): return sum(i for i in self.position_values if i > 0) def _shorts_count(self): return sum(1 for i in self.position_exposures if i < 0) def _short_exposure(self): return sum(i for i in self.position_exposures if i < 0) def _short_value(self): return sum(i for i in self.position_values if i < 0) def _gross_exposure(self): return self._long_exposure() + abs(self._short_exposure()) def _gross_value(self): return self._long_value() + abs(self._short_value()) def _net_exposure(self): return self.calculate_positions_exposure() def _net_value(self): return self.calculate_positions_value() def handle_split(self, split): if split.sid in self.positions: # Make the position object handle the split. It returns the # leftover cash from a fractional share, if there is any. position = self.positions[split.sid] leftover_cash = position.handle_split(split) self._position_amounts[split.sid] = position.amount self._position_last_sale_prices[split.sid] = \ position.last_sale_price self._update_asset(split.sid) return leftover_cash def _maybe_earn_dividend(self, dividend): """ Take a historical dividend record and return a Series with fields in zipline.protocol.DIVIDEND_FIELDS (plus an 'id' field) representing the cash/stock amount we are owed when the dividend is paid. """ if dividend['sid'] in self.positions: return self.positions[dividend['sid']].earn_dividend(dividend) else: return zp.dividend_payment() def earn_dividends(self, dividend_frame): """ Given a frame of dividends whose ex_dates are all the next trading day, calculate and store the cash and/or stock payments to be paid on each dividend's pay date. """ earned = dividend_frame.apply(self._maybe_earn_dividend, axis=1)\ .dropna(how='all') if len(earned) > 0: # Store the earned dividends so that they can be paid on the # dividends' pay_dates. self._unpaid_dividends = pd.concat( [self._unpaid_dividends, earned], ) def _maybe_pay_dividend(self, dividend): """ Take a historical dividend record, look up any stored record of cash/stock we are owed for that dividend, and return a Series with fields drawn from zipline.protocol.DIVIDEND_PAYMENT_FIELDS. """ try: unpaid_dividend = self._unpaid_dividends.loc[dividend['id']] return unpaid_dividend except KeyError: return zp.dividend_payment() def pay_dividends(self, dividend_frame): """ Given a frame of dividends whose pay_dates are all the next trading day, grant the cash and/or stock payments that were calculated on the given dividends' ex dates. """ payments = dividend_frame.apply(self._maybe_pay_dividend, axis=1)\ .dropna(how='all') # Mark these dividends as paid by dropping them from our unpaid # table. self._unpaid_dividends.drop(payments.index) # Add stock for any stock dividends paid. Again, the values here may # be negative in the case of short positions. stock_payments = payments[payments['payment_sid'].notnull()] for _, row in stock_payments.iterrows(): stock = row['payment_sid'] share_count = row['share_count'] # note we create a Position for stock dividend if we don't # already own the asset position = self.positions[stock] position.amount += share_count self._position_amounts[stock] = position.amount self._position_last_sale_prices[stock] = position.last_sale_price self._update_asset(stock) # Add cash equal to the net cash payed from all dividends. Note that # "negative cash" is effectively paid if we're short an asset, # representing the fact that we're required to reimburse the owner of # the stock for any dividends paid while borrowing. net_cash_payment = payments['cash_amount'].fillna(0).sum() return net_cash_payment def maybe_create_close_position_transaction(self, event): if not self._position_amounts.get(event.sid): return None if 'price' in event: price = event.price else: price = self._position_last_sale_prices[event.sid] txn = Transaction( sid=event.sid, amount=(-1 * self._position_amounts[event.sid]), dt=event.dt, price=price, commission=0, order_id=0 ) return txn def get_positions(self): positions = self._positions_store for sid, pos in iteritems(self.positions): if pos.amount == 0: # Clear out the position if it has become empty since the last # time get_positions was called. Catching the KeyError is # faster than checking `if sid in positions`, and this can be # potentially called in a tight inner loop. try: del positions[sid] except KeyError: pass continue # Note that this will create a position if we don't currently have # an entry position = positions[sid] position.amount = pos.amount position.cost_basis = pos.cost_basis position.last_sale_price = pos.last_sale_price return positions def get_positions_list(self): positions = [] for sid, pos in iteritems(self.positions): if pos.amount != 0: positions.append(pos.to_dict()) return positions def __getstate__(self): state_dict = {} state_dict['asset_finder'] = self.asset_finder state_dict['positions'] = dict(self.positions) state_dict['unpaid_dividends'] = self._unpaid_dividends state_dict['auto_close_position_sids'] = self._auto_close_position_sids STATE_VERSION = 3 state_dict[VERSION_LABEL] = STATE_VERSION return state_dict def __setstate__(self, state): OLDEST_SUPPORTED_STATE = 3 version = state.pop(VERSION_LABEL) if version < OLDEST_SUPPORTED_STATE: raise BaseException("PositionTracker saved state is too old.") self.asset_finder = state['asset_finder'] self.positions = positiondict() # note that positions_store is temporary and gets regened from # .positions self._positions_store = zp.Positions() self._unpaid_dividends = state['unpaid_dividends'] self._auto_close_position_sids = state['auto_close_position_sids'] # Arrays for quick calculations of positions value self._position_amounts = OrderedDict() self._position_last_sale_prices = OrderedDict() self._position_value_multipliers = OrderedDict() self._position_exposure_multipliers = OrderedDict() self._position_payout_multipliers = OrderedDict() # Update positions is called without a finder self.update_positions(state['positions'])
class PositionTracker(object): def __init__(self): # sid => position object self.positions = positiondict() # Arrays for quick calculations of positions value self._position_amounts = OrderedDict() self._position_last_sale_prices = OrderedDict() self._position_value_multipliers = OrderedDict() self._position_exposure_multipliers = OrderedDict() self._position_payout_multipliers = OrderedDict() self._unpaid_dividends = pd.DataFrame( columns=zp.DIVIDEND_PAYMENT_FIELDS, ) self._positions_store = zp.Positions() @with_environment() def _retrieve_asset(self, sid, env=None): return env.asset_finder.retrieve_asset(sid) def _update_multipliers(self, sid): try: self._position_value_multipliers[sid] self._position_exposure_multipliers[sid] self._position_payout_multipliers[sid] except KeyError: # Collect the value multipliers from applicable sids asset = self._retrieve_asset(sid) if isinstance(asset, Equity): self._position_value_multipliers[sid] = 1 self._position_exposure_multipliers[sid] = 1 self._position_payout_multipliers[sid] = 0 if isinstance(asset, Future): self._position_value_multipliers[sid] = 0 self._position_exposure_multipliers[sid] = \ asset.contract_multiplier self._position_payout_multipliers[sid] = \ asset.contract_multiplier def update_last_sale(self, event): # NOTE, PerformanceTracker already vetted as TRADE type sid = event.sid if sid not in self.positions: return 0 price = event.price if checknull(price): return 0 pos = self.positions[sid] old_price = pos.last_sale_price pos.last_sale_date = event.dt pos.last_sale_price = price self._position_last_sale_prices[sid] = price # Calculate cash adjustment on assets with multipliers return ((price - old_price) * self._position_payout_multipliers[sid] * pos.amount) def update_positions(self, positions): # update positions in batch self.positions.update(positions) for sid, pos in iteritems(positions): self._position_amounts[sid] = pos.amount self._position_last_sale_prices[sid] = pos.last_sale_price self._update_multipliers(sid) def update_position(self, sid, amount=None, last_sale_price=None, last_sale_date=None, cost_basis=None): pos = self.positions[sid] if amount is not None: pos.amount = amount self._position_amounts[sid] = amount self._position_values = None # invalidate cache self._update_multipliers(sid=sid) if last_sale_price is not None: pos.last_sale_price = last_sale_price self._position_last_sale_prices[sid] = last_sale_price self._position_values = None # invalidate cache if last_sale_date is not None: pos.last_sale_date = last_sale_date if cost_basis is not None: pos.cost_basis = cost_basis def execute_transaction(self, txn): # Update Position # ---------------- sid = txn.sid position = self.positions[sid] position.update(txn) self._position_amounts[sid] = position.amount self._position_last_sale_prices[sid] = position.last_sale_price self._update_multipliers(sid) def handle_commission(self, commission): # Adjust the cost basis of the stock if we own it if commission.sid in self.positions: self.positions[commission.sid].\ adjust_commission_cost_basis(commission) @property def position_values(self): iter_amount_price_multiplier = zip( itervalues(self._position_amounts), itervalues(self._position_last_sale_prices), itervalues(self._position_value_multipliers), ) return [ price * amount * multiplier for price, amount, multiplier in iter_amount_price_multiplier ] @property def position_exposures(self): iter_amount_price_multiplier = zip( itervalues(self._position_amounts), itervalues(self._position_last_sale_prices), itervalues(self._position_exposure_multipliers), ) return [ price * amount * multiplier for price, amount, multiplier in iter_amount_price_multiplier ] def calculate_positions_value(self): if len(self.position_values) == 0: return np.float64(0) return sum(self.position_values) def calculate_positions_exposure(self): if len(self.position_exposures) == 0: return np.float64(0) return sum(self.position_exposures) def _longs_count(self): return sum(1 for i in self.position_exposures if i > 0) def _long_exposure(self): return sum(i for i in self.position_exposures if i > 0) def _long_value(self): return sum(i for i in self.position_values if i > 0) def _shorts_count(self): return sum(1 for i in self.position_exposures if i < 0) def _short_exposure(self): return sum(i for i in self.position_exposures if i < 0) def _short_value(self): return sum(i for i in self.position_values if i < 0) def _gross_exposure(self): return self._long_exposure() + abs(self._short_exposure()) def _gross_value(self): return self._long_value() + abs(self._short_value()) def _net_exposure(self): return self.calculate_positions_exposure() def _net_value(self): return self.calculate_positions_value() def handle_split(self, split): if split.sid in self.positions: # Make the position object handle the split. It returns the # leftover cash from a fractional share, if there is any. position = self.positions[split.sid] leftover_cash = position.handle_split(split) self._position_amounts[split.sid] = position.amount self._position_last_sale_prices[split.sid] = \ position.last_sale_price self._update_multipliers(split.sid) return leftover_cash def _maybe_earn_dividend(self, dividend): """ Take a historical dividend record and return a Series with fields in zipline.protocol.DIVIDEND_FIELDS (plus an 'id' field) representing the cash/stock amount we are owed when the dividend is paid. """ if dividend['sid'] in self.positions: return self.positions[dividend['sid']].earn_dividend(dividend) else: return zp.dividend_payment() def earn_dividends(self, dividend_frame): """ Given a frame of dividends whose ex_dates are all the next trading day, calculate and store the cash and/or stock payments to be paid on each dividend's pay date. """ earned = dividend_frame.apply(self._maybe_earn_dividend, axis=1)\ .dropna(how='all') if len(earned) > 0: # Store the earned dividends so that they can be paid on the # dividends' pay_dates. self._unpaid_dividends = pd.concat( [self._unpaid_dividends, earned], ) def _maybe_pay_dividend(self, dividend): """ Take a historical dividend record, look up any stored record of cash/stock we are owed for that dividend, and return a Series with fields drawn from zipline.protocol.DIVIDEND_PAYMENT_FIELDS. """ try: unpaid_dividend = self._unpaid_dividends.loc[dividend['id']] return unpaid_dividend except KeyError: return zp.dividend_payment() def pay_dividends(self, dividend_frame): """ Given a frame of dividends whose pay_dates are all the next trading day, grant the cash and/or stock payments that were calculated on the given dividends' ex dates. """ payments = dividend_frame.apply(self._maybe_pay_dividend, axis=1)\ .dropna(how='all') # Mark these dividends as paid by dropping them from our unpaid # table. self._unpaid_dividends.drop(payments.index) # Add stock for any stock dividends paid. Again, the values here may # be negative in the case of short positions. stock_payments = payments[payments['payment_sid'].notnull()] for _, row in stock_payments.iterrows(): stock = row['payment_sid'] share_count = row['share_count'] # note we create a Position for stock dividend if we don't # already own the asset position = self.positions[stock] position.amount += share_count self._position_amounts[stock] = position.amount self._position_last_sale_prices[stock] = position.last_sale_price self._update_multipliers(stock) # Add cash equal to the net cash payed from all dividends. Note that # "negative cash" is effectively paid if we're short an asset, # representing the fact that we're required to reimburse the owner of # the stock for any dividends paid while borrowing. net_cash_payment = payments['cash_amount'].fillna(0).sum() return net_cash_payment def create_close_position_transaction(self, event): if not self._position_amounts.get(event.sid): return None txn = Transaction( sid=event.sid, amount=(-1 * self._position_amounts[event.sid]), dt=event.dt, price=event.price, commission=0, order_id=0 ) return txn def get_positions(self): positions = self._positions_store for sid, pos in iteritems(self.positions): if pos.amount == 0: # Clear out the position if it has become empty since the last # time get_positions was called. Catching the KeyError is # faster than checking `if sid in positions`, and this can be # potentially called in a tight inner loop. try: del positions[sid] except KeyError: pass continue # Note that this will create a position if we don't currently have # an entry position = positions[sid] position.amount = pos.amount position.cost_basis = pos.cost_basis position.last_sale_price = pos.last_sale_price return positions def get_positions_list(self): positions = [] for sid, pos in iteritems(self.positions): if pos.amount != 0: positions.append(pos.to_dict()) return positions def __getstate__(self): state_dict = {} state_dict['positions'] = dict(self.positions) state_dict['unpaid_dividends'] = self._unpaid_dividends STATE_VERSION = 1 state_dict[VERSION_LABEL] = STATE_VERSION return state_dict def __setstate__(self, state): OLDEST_SUPPORTED_STATE = 1 version = state.pop(VERSION_LABEL) if version < OLDEST_SUPPORTED_STATE: raise BaseException("PositionTracker saved state is too old.") self.positions = positiondict() # note that positions_store is temporary and gets regened from # .positions self._positions_store = zp.Positions() self._unpaid_dividends = state['unpaid_dividends'] # Arrays for quick calculations of positions value self._position_amounts = OrderedDict() self._position_last_sale_prices = OrderedDict() self._position_value_multipliers = OrderedDict() self._position_exposure_multipliers = OrderedDict() self._position_payout_multipliers = OrderedDict() self.update_positions(state['positions'])
class Dimensioned(LabelledData): """ Dimensioned is a base class that allows the data contents of a class to be associated with dimensions. The contents associated with dimensions may be partitioned into one of three types * key_dimensions: These are the dimensions that can be indexed via the __getitem__ method. Dimension objects supporting key dimensions must support indexing over these dimensions and may also support slicing. This list ordering of dimensions describes the positional components of each multi-dimensional indexing operation. For instance, if the key dimension names are 'weight' followed by 'height' for Dimensioned object 'obj', then obj[80,175] indexes a weight of 80 and height of 175. * value_dimensions: These dimensions correspond to any data held on the Dimensioned object not in the key dimensions. Indexing by value dimension is supported by dimension name (when there are multiple possible value dimensions); no slicing semantics is supported and all the data associated with that dimension will be returned at once. Note that it is not possible to mix value_dimensions and deep_dimensions. * deep_dimensions: These are dynamically computed dimensions that belong to other Dimensioned objects that are nested in the data. Objects that support this should enable the _deep_indexable flag. Note that it is not possible to mix value_dimensions and deep_dimensions. Dimensioned class support generalized methods for finding the range and type of values along a particular Dimension. The range method relies on the appropriate implementation of the dimension_values methods on subclasses. The index of an arbitrary dimension is its positional index in the list of all dimensions, starting with the key dimensions, followed by the value dimensions and ending with the deep dimensions. """ constant_dimensions = param.Dict(default=OrderedDict(), doc=""" A dictionary of Dimension:value pairs providing additional dimension information about the object.""") key_dimensions = param.List(bounds=(0, None), constant=True, doc=""" The list of dimensions that may be used in indexing (and potential slicing) semantics. The order of the dimensions listed here determines the semantics of each component of a multi-dimensional indexing operation.""") value_dimensions = param.List(bounds=(0, None), constant=True, doc=""" The list of dimensions used to describe the components of the data. If multiple value dimensions are supplied, a particular value dimension may be indexed by name after the key dimensions.""") group = param.String(default='Dimensioned', constant=True, doc=""" A string describing the data wrapped by the object.""") __abstract = True _sorted = False _dim_groups = ['key_dimensions', 'value_dimensions', 'deep_dimensions'] def __init__(self, data, **params): for group in self._dim_groups[0:2]: if group in params: if 'constant' in group: dimensions = { d if isinstance(d, Dimension) else Dimension(d): val for d, val in params.pop(group) } else: dimensions = [ d if isinstance(d, Dimension) else Dimension(d) for d in params.pop(group) ] params[group] = dimensions super(Dimensioned, self).__init__(data, **params) self.ndims = len(self.key_dimensions) constant_dimensions = [(d.name, val) for d, val in self.constant_dimensions.items()] self._cached_constants = OrderedDict(constant_dimensions) self._cached_index_names = [d.name for d in self.key_dimensions] self._cached_value_names = [d.name for d in self.value_dimensions] self._settings = None def _valid_dimensions(self, dimensions): "Validates key dimension input" if not dimensions: return dimensions elif not isinstance(dimensions, list): dimensions = [dimensions] for dim in dimensions: if dim not in self._cached_index_names: raise Exception("Supplied dimensions %s not found." % dim) return dimensions @property def deep_dimensions(self): "The list of deep dimensions" if self._deep_indexable and len(self): return self.values()[0].dimensions() else: return [] def dimensions(self, selection='all', label=False): """ Provides convenient access to Dimensions on nested Dimensioned objects. Dimensions can be selected by their type, i.e. 'key' or 'value' dimensions. By default 'all' dimensions are returned. """ lambdas = { 'key': (lambda x: x.key_dimensions, { 'full_breadth': False }), 'value': (lambda x: x.value_dimensions, {}), 'constant': (lambda x: x.constant_dimensions, {}) } if selection == 'all': dims = [ dim for group in self._dim_groups for dim in getattr(self, group) ] elif selection in ['key', 'value', 'constant']: lmbd, kwargs = lambdas[selection] key_traversal = self.traverse(lmbd, **kwargs) dims = [dim for keydims in key_traversal for dim in keydims] else: raise KeyError("Invalid selection %r, valid selections include" "'all', 'value' and 'key' dimensions" % repr(selection)) return [dim.name if label else dim for dim in dims] def get_dimension(self, dimension, default=None): "Access a Dimension object by name or index." all_dims = self.dimensions() if isinstance(dimension, int): return all_dims[dimension] else: return {dim.name: dim for dim in all_dims}.get(dimension, default) def get_dimension_index(self, dim): """ Returns the index of the requested dimension. """ if isinstance(dim, int): if dim < len(self.dimensions()): return dim else: return IndexError('Dimension index out of bounds') try: return [d.name for d in self.dimensions()].index(dim) except ValueError: raise Exception("Dimension %s not found in %s." % (dim, self.__class__.__name__)) def get_dimension_type(self, dim): """ Returns the specified Dimension type if specified or if the dimension_values types are consistent otherwise None is returned. """ dim_obj = self.get_dimension(dim) if dim_obj and dim_obj.type is not None: return dim_obj.type dim_vals = [type(v) for v in self.dimension_values(dim)] if len(set(dim_vals)) == 1: return dim_vals[0] else: return None def __getitem__(self, key): """ Multi-dimensional indexing semantics is determined by the list of key_dimensions. For instance, the first indexing component will index the first key dimension. After the key dimensions are given, *either* a value dimension name may follow (if there are multiple value dimensions) *or* deep dimensions may then be listed (for applicable deep dimensions). """ return self def select(self, ignore_invalid=False, **kwargs): """ Allows slicing or indexing into the Dimensioned object by supplying the dimension and index/slice as key value pairs. """ valid_kwargs = { k: v for k, v in kwargs.items() if k in self.dimensions(label=True) } if not len(valid_kwargs) == len(kwargs) and not ignore_invalid: raise KeyError("Invalid Dimension supplied.") kwargs = {k: kwargs[k] for k in valid_kwargs.keys()} deep_select = any([ kw for kw in kwargs.keys() if (kw in self.deep_dimensions) and ( kw not in self._cached_index_names) ]) selection_depth = len( self.dimensions('key')) if deep_select else self.ndims selection = [slice(None) for i in range(selection_depth)] for dim, val in kwargs.items(): if isinstance(val, tuple): val = slice(*val) selection[self.get_dimension_index(dim)] = val return self.__getitem__(tuple(selection)) def dimension_values(self, dimension): """ Returns the values along the specified dimension. This method must be implemented for all Dimensioned type. """ val = self._cached_constants.get(dimension, None) if val: return val else: raise Exception("Dimension %s not found in %s." % (dimension, self.__class__.__name__)) def range(self, dim, data_range=True): """ Returns the range of values along the specified dimension. If data_range is True, the data may be used to try and infer the appropriate range. Otherwise, (None,None) is returned to indicate that no range is defined. """ dimension = self.get_dimension(dim) if dimension.range != (None, None): return dimension.range elif not data_range: return (None, None) soft_range = [r for r in dimension.soft_range if r is not None] dim_vals = self.dimension_values(dimension.name) try: dim_vals = np.concatenate([dim_vals, soft_range]) return np.min(dim_vals), np.max(dim_vals) except: try: if dim in self.dimensions() and len(dim_vals): if not self._sorted: dim_vals = sorted(dim_vals) return (dim_vals[0], dim_vals[-1]) except: pass return (None, None) def __repr__(self): return PrettyPrinter.pprint(self) def __call__(self, options=None, **kwargs): """ Apply the supplied options to a clone of the object which is then returned. """ deep_clone = self.map(lambda x: x.clone(id=x.id)) StoreOptions.set_options(deep_clone, options, **kwargs) return deep_clone
class Dimensioned(LabelledData): """ Dimensioned is a base class that allows the data contents of a class to be associated with dimensions. The contents associated with dimensions may be partitioned into one of three types * key_dimensions: These are the dimensions that can be indexed via the __getitem__ method. Dimension objects supporting key dimensions must support indexing over these dimensions and may also support slicing. This list ordering of dimensions describes the positional components of each multi-dimensional indexing operation. For instance, if the key dimension names are 'weight' followed by 'height' for Dimensioned object 'obj', then obj[80,175] indexes a weight of 80 and height of 175. * value_dimensions: These dimensions correspond to any data held on the Dimensioned object not in the key dimensions. Indexing by value dimension is supported by dimension name (when there are multiple possible value dimensions); no slicing semantics is supported and all the data associated with that dimension will be returned at once. Note that it is not possible to mix value_dimensions and deep_dimensions. * deep_dimensions: These are dynamically computed dimensions that belong to other Dimensioned objects that are nested in the data. Objects that support this should enable the _deep_indexable flag. Note that it is not possible to mix value_dimensions and deep_dimensions. Dimensioned class support generalized methods for finding the range and type of values along a particular Dimension. The range method relies on the appropriate implementation of the dimension_values methods on subclasses. The index of an arbitrary dimension is its positional index in the list of all dimensions, starting with the key dimensions, followed by the value dimensions and ending with the deep dimensions. """ constant_dimensions = param.Dict(default=OrderedDict(), doc=""" A dictionary of Dimension:value pairs providing additional dimension information about the object.""") key_dimensions = param.List(bounds=(0, None), constant=True, doc=""" The list of dimensions that may be used in indexing (and potential slicing) semantics. The order of the dimensions listed here determines the semantics of each component of a multi-dimensional indexing operation.""") value_dimensions = param.List(bounds=(0, None), constant=True, doc=""" The list of dimensions used to describe the components of the data. If multiple value dimensions are supplied, a particular value dimension may be indexed by name after the key dimensions.""") group = param.String(default='Dimensioned', constant=True, doc=""" A string describing the data wrapped by the object.""") __abstract = True _sorted = False _dim_groups = ['key_dimensions', 'value_dimensions', 'deep_dimensions'] def __init__(self, data, **params): for group in self._dim_groups[0:2]: if group in params: if 'constant' in group: dimensions = {d if isinstance(d, Dimension) else Dimension(d): val for d, val in params.pop(group)} else: dimensions = [d if isinstance(d, Dimension) else Dimension(d) for d in params.pop(group)] params[group] = dimensions super(Dimensioned, self).__init__(data, **params) self.ndims = len(self.key_dimensions) constant_dimensions = [(d.name, val) for d, val in self.constant_dimensions.items()] self._cached_constants = OrderedDict(constant_dimensions) self._cached_index_names = [d.name for d in self.key_dimensions] self._cached_value_names = [d.name for d in self.value_dimensions] self._settings = None def _valid_dimensions(self, dimensions): "Validates key dimension input" if not dimensions: return dimensions elif not isinstance(dimensions, list): dimensions = [dimensions] for dim in dimensions: if dim not in self._cached_index_names: raise Exception("Supplied dimensions %s not found." % dim) return dimensions @property def deep_dimensions(self): "The list of deep dimensions" if self._deep_indexable and len(self): return self.values()[0].dimensions() else: return [] def dimensions(self, selection='all', label=False): """ Provides convenient access to Dimensions on nested Dimensioned objects. Dimensions can be selected by their type, i.e. 'key' or 'value' dimensions. By default 'all' dimensions are returned. """ lambdas = {'key': (lambda x: x.key_dimensions, {'full_breadth': False}), 'value': (lambda x: x.value_dimensions, {}), 'constant': (lambda x: x.constant_dimensions, {})} if selection == 'all': dims = [dim for group in self._dim_groups for dim in getattr(self, group)] elif selection in ['key', 'value', 'constant']: lmbd, kwargs = lambdas[selection] key_traversal = self.traverse(lmbd, **kwargs) dims = [dim for keydims in key_traversal for dim in keydims] else: raise KeyError("Invalid selection %r, valid selections include" "'all', 'value' and 'key' dimensions" % repr(selection)) return [dim.name if label else dim for dim in dims] def get_dimension(self, dimension, default=None): "Access a Dimension object by name or index." all_dims = self.dimensions() if isinstance(dimension, int): return all_dims[dimension] else: return {dim.name: dim for dim in all_dims}.get(dimension, default) def get_dimension_index(self, dim): """ Returns the index of the requested dimension. """ if isinstance(dim, int): if dim < len(self.dimensions()): return dim else: return IndexError('Dimension index out of bounds') try: return [d.name for d in self.dimensions()].index(dim) except ValueError: raise Exception("Dimension %s not found in %s." % (dim, self.__class__.__name__)) def get_dimension_type(self, dim): """ Returns the specified Dimension type if specified or if the dimension_values types are consistent otherwise None is returned. """ dim_obj = self.get_dimension(dim) if dim_obj and dim_obj.type is not None: return dim_obj.type dim_vals = [type(v) for v in self.dimension_values(dim)] if len(set(dim_vals)) == 1: return dim_vals[0] else: return None def __getitem__(self, key): """ Multi-dimensional indexing semantics is determined by the list of key_dimensions. For instance, the first indexing component will index the first key dimension. After the key dimensions are given, *either* a value dimension name may follow (if there are multiple value dimensions) *or* deep dimensions may then be listed (for applicable deep dimensions). """ return self def select(self, ignore_invalid=False, **kwargs): """ Allows slicing or indexing into the Dimensioned object by supplying the dimension and index/slice as key value pairs. """ valid_kwargs = {k: v for k, v in kwargs.items() if k in self.dimensions(label=True)} if not len(valid_kwargs) == len(kwargs) and not ignore_invalid: raise KeyError("Invalid Dimension supplied.") kwargs = {k: kwargs[k] for k in valid_kwargs.keys()} deep_select = any([kw for kw in kwargs.keys() if (kw in self.deep_dimensions) and (kw not in self._cached_index_names)]) selection_depth = len(self.dimensions('key')) if deep_select else self.ndims selection = [slice(None) for i in range(selection_depth)] for dim, val in kwargs.items(): if isinstance(val, tuple): val = slice(*val) selection[self.get_dimension_index(dim)] = val return self.__getitem__(tuple(selection)) def dimension_values(self, dimension): """ Returns the values along the specified dimension. This method must be implemented for all Dimensioned type. """ val = self._cached_constants.get(dimension, None) if val: return val else: raise Exception("Dimension %s not found in %s." % (dimension, self.__class__.__name__)) def range(self, dim, data_range=True): """ Returns the range of values along the specified dimension. If data_range is True, the data may be used to try and infer the appropriate range. Otherwise, (None,None) is returned to indicate that no range is defined. """ dimension = self.get_dimension(dim) if dimension is None: return (None, None) if dimension.range != (None, None): return dimension.range elif not data_range: return (None, None) soft_range = [r for r in dimension.soft_range if r is not None] dim_vals = self.dimension_values(dimension.name) try: dim_vals = np.concatenate([dim_vals, soft_range]) return np.min(dim_vals), np.max(dim_vals) except: try: if dim in self.dimensions() and len(dim_vals): if not self._sorted: dim_vals = sorted(dim_vals) return (dim_vals[0], dim_vals[-1]) except: pass return (None, None) def __repr__(self): return PrettyPrinter.pprint(self) def __call__(self, options=None, **kwargs): """ Apply the supplied options to a clone of the object which is then returned. """ deep_clone = self.map(lambda x: x.clone(id=x.id)) StoreOptions.set_options(deep_clone, options, **kwargs) return deep_clone
class Dimensioned(LabelledData): """ Dimensioned is a base class that allows the data contents of a class to be associated with dimensions. The contents associated with dimensions may be partitioned into one of three types * key dimensions: These are the dimensions that can be indexed via the __getitem__ method. Dimension objects supporting key dimensions must support indexing over these dimensions and may also support slicing. This list ordering of dimensions describes the positional components of each multi-dimensional indexing operation. For instance, if the key dimension names are 'weight' followed by 'height' for Dimensioned object 'obj', then obj[80,175] indexes a weight of 80 and height of 175. Accessed using either kdims or key_dimensions. * value dimensions: These dimensions correspond to any data held on the Dimensioned object not in the key dimensions. Indexing by value dimension is supported by dimension name (when there are multiple possible value dimensions); no slicing semantics is supported and all the data associated with that dimension will be returned at once. Note that it is not possible to mix value dimensions and deep dimensions. Accessed using either vdims or value_dimensions. * deep dimensions: These are dynamically computed dimensions that belong to other Dimensioned objects that are nested in the data. Objects that support this should enable the _deep_indexable flag. Note that it is not possible to mix value dimensions and deep dimensions. Accessed using either ddims or deep_dimensions. Dimensioned class support generalized methods for finding the range and type of values along a particular Dimension. The range method relies on the appropriate implementation of the dimension_values methods on subclasses. The index of an arbitrary dimension is its positional index in the list of all dimensions, starting with the key dimensions, followed by the value dimensions and ending with the deep dimensions. """ cdims = param.Dict(default=OrderedDict(), doc=""" The constant dimensions defined as a dictionary of Dimension:value pairs providing additional dimension information about the object. Aliased with constant_dimensions.""") kdims = param.List(bounds=(0, None), constant=True, doc=""" The key dimensions defined as list of dimensions that may be used in indexing (and potential slicing) semantics. The order of the dimensions listed here determines the semantics of each component of a multi-dimensional indexing operation. Aliased with key_dimensions.""") vdims = param.List(bounds=(0, None), constant=True, doc=""" The value dimensions defined as the list of dimensions used to describe the components of the data. If multiple value dimensions are supplied, a particular value dimension may be indexed by name after the key dimensions. Aliased with value_dimensions.""") group = param.String(default='Dimensioned', constant=True, doc=""" A string describing the data wrapped by the object.""") __abstract = True _sorted = False _dim_groups = ['kdims', 'vdims', 'cdims', 'ddims'] _dim_aliases = dict(key_dimensions='kdims', value_dimensions='vdims', constant_dimensions='cdims', deep_dimensions='ddims') # Long-name aliases @property def key_dimensions(self): return self.kdims @property def value_dimensions(self): return self.vdims @property def constant_dimensions(self): return self.cdims @property def deep_dimensions(self): return self.ddims def __init__(self, data, **params): for group in self._dim_groups+list(self._dim_aliases.keys()): if group in ['deep_dimensions', 'ddims']: continue if group in params: if group in self._dim_aliases: params[self._dim_aliases[group]] = params.pop(group) group = self._dim_aliases[group] if group == 'cdims': dimensions = {d if isinstance(d, Dimension) else Dimension(d): val for d, val in params.pop(group).items()} else: dimensions = [d if isinstance(d, Dimension) else Dimension(d) for d in params.pop(group)] params[group] = dimensions super(Dimensioned, self).__init__(data, **params) self.ndims = len(self.kdims) cdims = [(d.name, val) for d, val in self.cdims.items()] self._cached_constants = OrderedDict(cdims) self._cached_index_names = [d.name for d in self.kdims] self._cached_value_names = [d.name for d in self.vdims] self._settings = None def _valid_dimensions(self, dimensions): "Validates key dimension input" if not dimensions: return dimensions elif not isinstance(dimensions, list): dimensions = [dimensions] for dim in dimensions: if dim not in self._cached_index_names: raise Exception("Supplied dimensions %s not found." % dim) return dimensions @property def ddims(self): "The list of deep dimensions" if self._deep_indexable and len(self): return self.values()[0].dimensions() else: return [] def dimensions(self, selection='all', label=False): """ Provides convenient access to Dimensions on nested Dimensioned objects. Dimensions can be selected by their type, i.e. 'key' or 'value' dimensions. By default 'all' dimensions are returned. """ lambdas = {'k': (lambda x: x.kdims, {'full_breadth': False}), 'v': (lambda x: x.vdims, {}), 'c': (lambda x: x.cdims, {})} aliases = {'key': 'k', 'value': 'v', 'constant': 'c'} if selection == 'all': dims = [dim for group in self._dim_groups for dim in getattr(self, group)] elif isinstance(selection, list): dims = [dim for group in selection for dim in getattr(self, '%sdims' % aliases.get(group))] elif aliases.get(selection) in lambdas: selection = aliases.get(selection, selection) lmbd, kwargs = lambdas[selection] key_traversal = self.traverse(lmbd, **kwargs) dims = [dim for keydims in key_traversal for dim in keydims] else: raise KeyError("Invalid selection %r, valid selections include" "'all', 'value' and 'key' dimensions" % repr(selection)) return [dim.name if label else dim for dim in dims] def get_dimension(self, dimension, default=None): "Access a Dimension object by name or index." all_dims = self.dimensions() if isinstance(dimension, Dimension): dimension = dimension.name if isinstance(dimension, int) and dimension < len(all_dims): return all_dims[dimension] else: return {dim.name: dim for dim in all_dims}.get(dimension, default) def get_dimension_index(self, dim): """ Returns the index of the requested dimension. """ if isinstance(dim, int): if dim < len(self.dimensions()): return dim else: return IndexError('Dimension index out of bounds') try: sanitized = {sanitize_identifier(kd): kd for kd in self._cached_index_names} return [d.name for d in self.dimensions()].index(sanitized.get(dim, dim)) except ValueError: raise Exception("Dimension %s not found in %s." % (dim, self.__class__.__name__)) def get_dimension_type(self, dim): """ Returns the specified Dimension type if specified or if the dimension_values types are consistent otherwise None is returned. """ dim_obj = self.get_dimension(dim) if dim_obj and dim_obj.type is not None: return dim_obj.type dim_vals = [type(v) for v in self.dimension_values(dim)] if len(set(dim_vals)) == 1: return dim_vals[0] else: return None def __getitem__(self, key): """ Multi-dimensional indexing semantics is determined by the list of key dimensions. For instance, the first indexing component will index the first key dimension. After the key dimensions are given, *either* a value dimension name may follow (if there are multiple value dimensions) *or* deep dimensions may then be listed (for applicable deep dimensions). """ return self def select(self, selection_specs=None, **kwargs): """ Allows slicing or indexing into the Dimensioned object by supplying the dimension and index/slice as key value pairs. Select descends recursively through the data structure applying the key dimension selection. The 'value' keyword allows selecting the value dimensions on objects which have any declared. The selection may also be selectively applied to specific objects by supplying the selection_specs as an iterable of type.group.label specs, types or functions. """ # Apply all indexes applying on this object val_dim = ['value'] if self.vdims else [] sanitized = {sanitize_identifier(kd): kd for kd in self._cached_index_names} local_dims = (self._cached_index_names + list(sanitized.keys()) + val_dim) local_kwargs = {k: v for k, v in kwargs.items() if k in local_dims} # Check selection_spec applies if selection_specs is not None: matches = any(self.matches(spec) for spec in selection_specs) else: matches = True if local_kwargs and matches: select = [slice(None) for i in range(self.ndims)] for dim, val in local_kwargs.items(): if dim == 'value': select += [val] else: if isinstance(val, tuple): val = slice(*val) dim = sanitized.get(dim, dim) select[self.get_dimension_index(dim)] = val if self._deep_indexable: selection = self.get(tuple(select), self.clone(shared_data=False)) else: selection = self[tuple(select)] else: selection = self if type(selection) is not type(self): # Apply the selection on the selected object of a different type val_dim = ['value'] if selection.vdims else [] key_dims = selection.dimensions('key', label=True) + val_dim if any(kw in key_dims for kw in kwargs): selection = selection.select(selection_specs, **kwargs) elif selection._deep_indexable: # Apply the deep selection on each item in local selection items = [] for k, v in selection.items(): val_dim = ['value'] if v.vdims else [] dims = list(zip(*[(sanitize_identifier(kd), kd) for kd in v.dimensions('key', label=True)])) kdims, skdims = dims if dims else ([], []) key_dims = list(kdims) + list(skdims) + val_dim if any(kw in key_dims for kw in kwargs): items.append((k, v.select(selection_specs, **kwargs))) else: items.append((k, v)) selection = selection.clone(items) return selection def dimension_values(self, dimension): """ Returns the values along the specified dimension. This method must be implemented for all Dimensioned type. """ val = self._cached_constants.get(dimension, None) if val: return val else: raise Exception("Dimension %s not found in %s." % (dimension, self.__class__.__name__)) def range(self, dimension, data_range=True): """ Returns the range of values along the specified dimension. If data_range is True, the data may be used to try and infer the appropriate range. Otherwise, (None,None) is returned to indicate that no range is defined. """ dimension = self.get_dimension(dimension) if dimension is None: return (None, None) if dimension.range != (None, None): return dimension.range elif not data_range: return (None, None) soft_range = [r for r in dimension.soft_range if r is not None] if dimension in self.kdims or dimension in self.vdims: dim_vals = self.dimension_values(dimension.name) return find_range(dim_vals, soft_range) dname = dimension.name match_fn = lambda x: dname in x.dimensions(['key', 'value'], True) range_fn = lambda x: x.range(dname) ranges = self.traverse(range_fn, [match_fn]) drange = max_range(ranges) return drange def __repr__(self): return PrettyPrinter.pprint(self) def __call__(self, options=None, **kwargs): """ Apply the supplied options to a clone of the object which is then returned. Note that if no options are supplied at all, all ids are reset. """ groups = set(Store.options().groups.keys()) if kwargs and set(kwargs) <= groups: if not all(isinstance(v, dict) for v in kwargs.values()): raise Exception("The %s options must be specified using dictionary groups" % ','.join(repr(k) for k in kwargs.keys())) # Check whether the user is specifying targets (such as 'Image.Foo') entries = Store.options().children targets = [k.split('.')[0] in entries for grp in kwargs.values() for k in grp] if any(targets) and not all(targets): raise Exception("Cannot mix target specification keys such as 'Image' with non-target keywords.") elif not any(targets): # Not targets specified - add current object as target sanitized_group = sanitize_identifier(self.group) if self.label: identifier = ('%s.%s.%s' % (self.__class__.__name__, sanitized_group, sanitize_identifier(self.label))) elif sanitized_group != self.__class__.__name__: identifier = '%s.%s' % (self.__class__.__name__, sanitized_group) else: identifier = self.__class__.__name__ kwargs = {k:{identifier:v} for k,v in kwargs.items()} if options is None and kwargs=={}: deep_clone = self.map(lambda x: x.clone(id=None)) else: deep_clone = self.map(lambda x: x.clone(id=x.id)) StoreOptions.set_options(deep_clone, options, **kwargs) return deep_clone
class PositionTracker(object): def __init__(self): # sid => position object self.positions = positiondict() # Arrays for quick calculations of positions value self._position_amounts = OrderedDict() self._position_last_sale_prices = OrderedDict() self._unpaid_dividends = pd.DataFrame( columns=zp.DIVIDEND_PAYMENT_FIELDS, ) self._positions_store = zp.Positions() def update_last_sale(self, event): # NOTE, PerformanceTracker already vetted as TRADE type sid = event.sid if sid not in self.positions: return price = event.price if not checknull(price): pos = self.positions[sid] pos.last_sale_date = event.dt pos.last_sale_price = price self._position_last_sale_prices[sid] = price self._position_values = None # invalidate cache sid = event.sid price = event.price def update_positions(self, positions): # update positions in batch self.positions.update(positions) for sid, pos in iteritems(positions): self._position_amounts[sid] = pos.amount self._position_last_sale_prices[sid] = pos.last_sale_price # Invalidate cache. self._position_values = None # invalidate cache def update_position(self, sid, amount=None, last_sale_price=None, last_sale_date=None, cost_basis=None): pos = self.positions[sid] if amount is not None: pos.amount = amount self._position_amounts[sid] = amount self._position_values = None # invalidate cache if last_sale_price is not None: pos.last_sale_price = last_sale_price self._position_last_sale_prices[sid] = last_sale_price self._position_values = None # invalidate cache if last_sale_date is not None: pos.last_sale_date = last_sale_date if cost_basis is not None: pos.cost_basis = cost_basis def execute_transaction(self, txn): # Update Position # ---------------- sid = txn.sid position = self.positions[sid] position.update(txn) self._position_amounts[sid] = position.amount self._position_last_sale_prices[sid] = position.last_sale_price self._position_values = None # invalidate cache def handle_commission(self, commission): # Adjust the cost basis of the stock if we own it if commission.sid in self.positions: self.positions[commission.sid].\ adjust_commission_cost_basis(commission) _position_values = None @property def position_values(self): """ Invalidate any time self._position_amounts or self._position_last_sale_prices is changed. """ if self._position_values is None: vals = list(map(mul, self._position_amounts.values(), self._position_last_sale_prices.values())) self._position_values = vals return self._position_values def calculate_positions_value(self): if len(self.position_values) == 0: return np.float64(0) return sum(self.position_values) def _longs_count(self): return sum(map(lambda x: x > 0, self.position_values)) def _long_exposure(self): return sum(filter(lambda x: x > 0, self.position_values)) def _shorts_count(self): return sum(map(lambda x: x < 0, self.position_values)) def _short_exposure(self): return sum(filter(lambda x: x < 0, self.position_values)) def _gross_exposure(self): return self._long_exposure() + abs(self._short_exposure()) def _net_exposure(self): return self.calculate_positions_value() def handle_split(self, split): if split.sid in self.positions: # Make the position object handle the split. It returns the # leftover cash from a fractional share, if there is any. position = self.positions[split.sid] leftover_cash = position.handle_split(split) self._position_amounts[split.sid] = position.amount self._position_last_sale_prices[split.sid] = \ position.last_sale_price self._position_values = None # invalidate cache return leftover_cash def _maybe_earn_dividend(self, dividend): """ Take a historical dividend record and return a Series with fields in zipline.protocol.DIVIDEND_FIELDS (plus an 'id' field) representing the cash/stock amount we are owed when the dividend is paid. """ if dividend['sid'] in self.positions: return self.positions[dividend['sid']].earn_dividend(dividend) else: return zp.dividend_payment() def earn_dividends(self, dividend_frame): """ Given a frame of dividends whose ex_dates are all the next trading day, calculate and store the cash and/or stock payments to be paid on each dividend's pay date. """ earned = dividend_frame.apply(self._maybe_earn_dividend, axis=1)\ .dropna(how='all') if len(earned) > 0: # Store the earned dividends so that they can be paid on the # dividends' pay_dates. self._unpaid_dividends = pd.concat( [self._unpaid_dividends, earned], ) def _maybe_pay_dividend(self, dividend): """ Take a historical dividend record, look up any stored record of cash/stock we are owed for that dividend, and return a Series with fields drawn from zipline.protocol.DIVIDEND_PAYMENT_FIELDS. """ try: unpaid_dividend = self._unpaid_dividends.loc[dividend['id']] return unpaid_dividend except KeyError: return zp.dividend_payment() def pay_dividends(self, dividend_frame): """ Given a frame of dividends whose pay_dates are all the next trading day, grant the cash and/or stock payments that were calculated on the given dividends' ex dates. """ payments = dividend_frame.apply(self._maybe_pay_dividend, axis=1)\ .dropna(how='all') # Mark these dividends as paid by dropping them from our unpaid # table. self._unpaid_dividends.drop(payments.index) # Add stock for any stock dividends paid. Again, the values here may # be negative in the case of short positions. stock_payments = payments[payments['payment_sid'].notnull()] for _, row in stock_payments.iterrows(): stock = row['payment_sid'] share_count = row['share_count'] # note we create a Position for stock dividend if we don't # already own the security position = self.positions[stock] position.amount += share_count self._position_amounts[stock] = position.amount self._position_last_sale_prices[stock] = position.last_sale_price # Add cash equal to the net cash payed from all dividends. Note that # "negative cash" is effectively paid if we're short a security, # representing the fact that we're required to reimburse the owner of # the stock for any dividends paid while borrowing. net_cash_payment = payments['cash_amount'].fillna(0).sum() return net_cash_payment def create_close_position_transaction(self, event): if not self._position_amounts.get(event.sid): return None txn = Transaction( sid=event.sid, amount=(-1 * self._position_amounts[event.sid]), dt=event.dt, price=event.price, commission=0, order_id=0 ) return txn def get_positions(self): positions = self._positions_store for sid, pos in iteritems(self.positions): if pos.amount == 0: # Clear out the position if it has become empty since the last # time get_positions was called. Catching the KeyError is # faster than checking `if sid in positions`, and this can be # potentially called in a tight inner loop. try: del positions[sid] except KeyError: pass continue # Note that this will create a position if we don't currently have # an entry position = positions[sid] position.amount = pos.amount position.cost_basis = pos.cost_basis position.last_sale_price = pos.last_sale_price return positions def get_positions_list(self): positions = [] for sid, pos in iteritems(self.positions): if pos.amount != 0: positions.append(pos.to_dict()) return positions def __getstate__(self): state_dict = {} state_dict['positions'] = dict(self.positions) state_dict['unpaid_dividends'] = self._unpaid_dividends STATE_VERSION = 1 state_dict[VERSION_LABEL] = STATE_VERSION return state_dict def __setstate__(self, state): OLDEST_SUPPORTED_STATE = 1 version = state.pop(VERSION_LABEL) if version < OLDEST_SUPPORTED_STATE: raise BaseException("PositionTracker saved state is too old.") self.positions = positiondict() # note that positions_store is temporary and gets regened from # .positions self._positions_store = zp.Positions() self._unpaid_dividends = state['unpaid_dividends'] # Arrays for quick calculations of positions value self._position_amounts = OrderedDict() self._position_last_sale_prices = OrderedDict() self.update_positions(state['positions'])