Пример #1
0
class Dimensioned(LabelledData):
    """
    Dimensioned is a base class that allows the data contents of a
    class to be associated with dimensions. The contents associated
    with dimensions may be partitioned into one of three types

    * key dimensions: These are the dimensions that can be indexed via
                      the __getitem__ method. Dimension objects
                      supporting key dimensions must support indexing
                      over these dimensions and may also support
                      slicing. This list ordering of dimensions
                      describes the positional components of each
                      multi-dimensional indexing operation.

                      For instance, if the key dimension names are
                      'weight' followed by 'height' for Dimensioned
                      object 'obj', then obj[80,175] indexes a weight
                      of 80 and height of 175.

                      Accessed using either kdims or key_dimensions.

    * value dimensions: These dimensions correspond to any data held
                        on the Dimensioned object not in the key
                        dimensions. Indexing by value dimension is
                        supported by dimension name (when there are
                        multiple possible value dimensions); no
                        slicing semantics is supported and all the
                        data associated with that dimension will be
                        returned at once. Note that it is not possible
                        to mix value dimensions and deep dimensions.

                        Accessed using either vdims or value_dimensions.


    * deep dimensions: These are dynamically computed dimensions that
                       belong to other Dimensioned objects that are
                       nested in the data. Objects that support this
                       should enable the _deep_indexable flag. Note
                       that it is not possible to mix value dimensions
                       and deep dimensions.

                       Accessed using either ddims or deep_dimensions.

    Dimensioned class support generalized methods for finding the
    range and type of values along a particular Dimension. The range
    method relies on the appropriate implementation of the
    dimension_values methods on subclasses.

    The index of an arbitrary dimension is its positional index in the
    list of all dimensions, starting with the key dimensions, followed
    by the value dimensions and ending with the deep dimensions.
    """

    cdims = param.Dict(default=OrderedDict(), doc="""
       The constant dimensions defined as a dictionary of Dimension:value
       pairs providing additional dimension information about the object.

       Aliased with constant_dimensions.""")

    kdims = param.List(bounds=(0, None), constant=True, doc="""
       The key dimensions defined as list of dimensions that may be
       used in indexing (and potential slicing) semantics. The order
       of the dimensions listed here determines the semantics of each
       component of a multi-dimensional indexing operation.

       Aliased with key_dimensions.""")

    vdims = param.List(bounds=(0, None), constant=True, doc="""
       The value dimensions defined as the list of dimensions used to
       describe the components of the data. If multiple value
       dimensions are supplied, a particular value dimension may be
       indexed by name after the key dimensions.

       Aliased with value_dimensions.""")

    group = param.String(default='Dimensioned', constant=True, doc="""
       A string describing the data wrapped by the object.""")

    __abstract = True
    _sorted = False
    _dim_groups = ['kdims', 'vdims', 'cdims', 'ddims']
    _dim_aliases = dict(key_dimensions='kdims', value_dimensions='vdims',
                        constant_dimensions='cdims', deep_dimensions='ddims')


    # Long-name aliases

    @property
    def key_dimensions(self): return self.kdims

    @property
    def value_dimensions(self): return self.vdims

    @property
    def constant_dimensions(self): return self.cdims

    @property
    def deep_dimensions(self): return self.ddims

    def __init__(self, data, **params):
        for group in self._dim_groups+list(self._dim_aliases.keys()):
            if group in ['deep_dimensions', 'ddims']: continue
            if group in params:
                if group in self._dim_aliases:
                    params[self._dim_aliases[group]] = params.pop(group)
                    group = self._dim_aliases[group]
                if group == 'cdims':
                    dimensions = {d if isinstance(d, Dimension) else Dimension(d): val
                                  for d, val in params.pop(group).items()}
                else:
                    dimensions = [d if isinstance(d, Dimension) else Dimension(d)
                                  for d in params.pop(group)]
                params[group] = dimensions
        super(Dimensioned, self).__init__(data, **params)
        self.ndims = len(self.kdims)
        cdims = [(d.name, val) for d, val in self.cdims.items()]
        self._cached_constants = OrderedDict(cdims)
        self._cached_index_names = [d.name for d in self.kdims]
        self._cached_value_names = [d.name for d in self.vdims]
        self._settings = None


    def _valid_dimensions(self, dimensions):
        "Validates key dimension input"
        if not dimensions:
            return dimensions
        elif not isinstance(dimensions, list):
            dimensions = [dimensions]

        for dim in dimensions:
            if dim not in self._cached_index_names:
                raise Exception("Supplied dimensions %s not found." % dim)
        return dimensions


    @property
    def ddims(self):
        "The list of deep dimensions"
        if self._deep_indexable and len(self):
            return self.values()[0].dimensions()
        else:
            return []


    def dimensions(self, selection='all', label=False):
        """
        Provides convenient access to Dimensions on nested
        Dimensioned objects. Dimensions can be selected
        by their type, i.e. 'key' or 'value' dimensions.
        By default 'all' dimensions are returned.
        """
        lambdas = {'k': (lambda x: x.kdims, {'full_breadth': False}),
                   'v': (lambda x: x.vdims, {}),
                   'c': (lambda x: x.cdims, {})}
        aliases = {'key': 'k', 'value': 'v', 'constant': 'c'}
        if selection == 'all':
            dims = [dim for group in self._dim_groups
                    for dim in getattr(self, group)]
        elif isinstance(selection, list):
            dims =  [dim for group in selection
                     for dim in getattr(self, '%sdims' % aliases.get(group))]
        elif aliases.get(selection) in lambdas:
            selection = aliases.get(selection, selection)
            lmbd, kwargs = lambdas[selection]
            key_traversal = self.traverse(lmbd, **kwargs)
            dims = [dim for keydims in key_traversal for dim in keydims]
        else:
            raise KeyError("Invalid selection %r, valid selections include"
                           "'all', 'value' and 'key' dimensions" % repr(selection))
        return [dim.name if label else dim for dim in dims]


    def get_dimension(self, dimension, default=None):
        "Access a Dimension object by name or index."
        all_dims = self.dimensions()
        if isinstance(dimension, int):
            return all_dims[dimension]
        else:
            return {dim.name: dim for dim in all_dims}.get(dimension, default)


    def get_dimension_index(self, dim):
        """
        Returns the index of the requested dimension.
        """
        if isinstance(dim, int):
            if dim < len(self.dimensions()):
                return dim
            else:
                return IndexError('Dimension index out of bounds')
        try:
            return [d.name for d in self.dimensions()].index(dim)
        except ValueError:
            raise Exception("Dimension %s not found in %s." %
                            (dim, self.__class__.__name__))


    def get_dimension_type(self, dim):
        """
        Returns the specified Dimension type if specified or
        if the dimension_values types are consistent otherwise
        None is returned.
        """
        dim_obj = self.get_dimension(dim)
        if dim_obj and dim_obj.type is not None:
            return dim_obj.type
        dim_vals = [type(v) for v in self.dimension_values(dim)]
        if len(set(dim_vals)) == 1:
            return dim_vals[0]
        else:
            return None

    def __getitem__(self, key):
        """
        Multi-dimensional indexing semantics is determined by the list
        of key dimensions. For instance, the first indexing component
        will index the first key dimension.

        After the key dimensions are given, *either* a value dimension
        name may follow (if there are multiple value dimensions) *or*
        deep dimensions may then be listed (for applicable deep
        dimensions).
        """
        return self


    def select(self, selection_specs=None, **kwargs):
        """
        Allows slicing or indexing into the Dimensioned object
        by supplying the dimension and index/slice as key
        value pairs. Select descends recursively through the
        data structure applying the key dimension selection.
        The 'value' keyword allows selecting the
        value dimensions on objects which have any declared.

        The selection may also be selectively applied to
        specific objects by supplying the selection_specs
        as an iterable of type.group.label specs, types or
        functions.
        """

        # Apply all indexes applying on this object
        val_dim = ['value'] if self.vdims else []
        sanitized = {sanitize_identifier(kd): kd
                     for kd in self._cached_index_names}
        local_dims = (self._cached_index_names
                      + list(sanitized.keys()) + val_dim)
        local_kwargs = {k: v for k, v in kwargs.items()
                        if k in local_dims}

        # Check selection_spec applies
        if selection_specs is not None:
            matches = any(self.matches(spec)
                          for spec in selection_specs)
        else:
            matches = True

        if local_kwargs and matches:
            select = [slice(None) for i in range(self.ndims)]
            for dim, val in local_kwargs.items():
                if dim == 'value':
                    select += [val]
                else:
                    if isinstance(val, tuple): val = slice(*val)
                    dim = sanitized.get(dim, dim)
                    select[self.get_dimension_index(dim)] = val
            if self._deep_indexable:
                selection = self.get(tuple(select),
                                     self.clone(shared_data=False))
            else:
                selection = self[tuple(select)]
        else:
            selection = self

        if type(selection) is not type(self):
            # Apply the selection on the selected object of a different type
            val_dim = ['value'] if selection.vdims else []
            key_dims = selection.dimensions('key', label=True) + val_dim
            if any(kw in key_dims for kw in kwargs):
                selection = selection.select(selection_specs, **kwargs)
        elif selection._deep_indexable:
            # Apply the deep selection on each item in local selection
            items = []
            for k, v in selection.items():
                val_dim = ['value'] if v.vdims else []
                dims = list(zip(*[(sanitize_identifier(kd), kd)
                                  for kd in v.dimensions('key', label=True)]))
                kdims, skdims = dims if dims else ([], [])
                key_dims = list(kdims) + list(skdims) + val_dim
                if any(kw in key_dims for kw in kwargs):
                    items.append((k, v.select(selection_specs, **kwargs)))
                else:
                    items.append((k, v))
            selection = selection.clone(items)
        return selection


    def dimension_values(self, dimension):
        """
        Returns the values along the specified dimension. This method
        must be implemented for all Dimensioned type.
        """
        val = self._cached_constants.get(dimension, None)
        if val:
            return val
        else:
            raise Exception("Dimension %s not found in %s." %
                            (dimension, self.__class__.__name__))


    def range(self, dimension, data_range=True):
        """
        Returns the range of values along the specified dimension.

        If data_range is True, the data may be used to try and infer
        the appropriate range. Otherwise, (None,None) is returned to
        indicate that no range is defined.
        """
        dimension = self.get_dimension(dimension)
        if dimension is None:
            return (None, None)
        if dimension.range != (None, None):
            return dimension.range
        elif not data_range:
            return (None, None)
        soft_range = [r for r in dimension.soft_range
                      if r is not None]
        if dimension in self.kdims or dimension in self.vdims:
            dim_vals = self.dimension_values(dimension.name)
            return find_range(dim_vals, soft_range)
        dname = dimension.name
        match_fn = lambda x: dname in x.dimensions(['key', 'value'], True)
        range_fn = lambda x: x.range(dname)
        ranges = self.traverse(range_fn, [match_fn])
        drange = max_range(ranges)
        return drange


    def __repr__(self):
        return PrettyPrinter.pprint(self)


    def __call__(self, options=None, **kwargs):
        """
        Apply the supplied options to a clone of the object which is
        then returned.
        """
        groups = set(Store.options().groups.keys())
        if kwargs and set(kwargs) <= groups:
            if not all(isinstance(v, dict) for v in kwargs.values()):
                raise Exception("The %s options must be specified using dictionary groups" %
                                ','.join(repr(k) for k in kwargs.keys()))

            sanitized_group = sanitize_identifier(self.group)
            if self.label:
                identifier = ('%s.%s.%s' % (self.__class__.__name__,
                                            sanitized_group,
                                            sanitize_identifier(self.label)))
            elif  sanitized_group != self.__class__.__name__:
                identifier = '%s.%s' % (self.__class__.__name__, sanitized_group)
            else:
                identifier = self.__class__.__name__

            kwargs = {k:{identifier:v} for k,v in kwargs.items()}
        deep_clone = self.map(lambda x: x.clone(id=x.id))
        StoreOptions.set_options(deep_clone, options, **kwargs)
        return deep_clone
Пример #2
0
        return info
    except KeyError:
        return

driver = webdriver.PhantomJS()
jieba.set_dictionary('dict.txt.big.txt')
jieba.enable_parallel(4)

big_dict = OrderedDict()        
for each_link in wechat_links(driver):
    print(each_link)
    article = get_article(each_link)
    if article is not None:
        for each_word_cut in word_cuts(article):
            if len(each_word_cut) > 1:
                if big_dict.get(each_word_cut) is None:
                    big_dict[each_word_cut] = 1
                else:
                    big_dict[each_word_cut] += 1
                    
driver.quit()
big_dict = sorted(big_dict.items(), key=lambda d: d[1], reverse=True)

now = datetime.datetime.now()
today = now.strftime('%Y%m%d%H%M%S')
pfile = open("wechat_word_cut"+today+".pkl", "wb", buffering=1024)
pfile.write(dumps(big_dict))
pfile.close()
f = open("wechat_word_cut"+today+".csv", "wb", buffering=1024)
for each_word_cut, word_count in big_dict:
    line = each_word_cut + "," + str(word_count) + chr(10)
class PositionTracker(object):

    def __init__(self, asset_finder):
        self.asset_finder = asset_finder

        # sid => position object
        self.positions = positiondict()
        # Arrays for quick calculations of positions value
        self._position_amounts = OrderedDict()
        self._position_last_sale_prices = OrderedDict()
        self._position_value_multipliers = OrderedDict()
        self._position_exposure_multipliers = OrderedDict()
        self._position_payout_multipliers = OrderedDict()
        self._unpaid_dividends = pd.DataFrame(
            columns=zp.DIVIDEND_PAYMENT_FIELDS,
        )
        self._positions_store = zp.Positions()

        # Dict, keyed on dates, that contains lists of close position events
        # for any Assets in this tracker's positions
        self._auto_close_position_sids = {}

    def _update_asset(self, sid):
        try:
            self._position_value_multipliers[sid]
            self._position_exposure_multipliers[sid]
            self._position_payout_multipliers[sid]
        except KeyError:
            # Check if there is an AssetFinder
            if self.asset_finder is None:
                raise PositionTrackerMissingAssetFinder()

            # Collect the value multipliers from applicable sids
            asset = self.asset_finder.retrieve_asset(sid)
            if isinstance(asset, Equity):
                self._position_value_multipliers[sid] = 1
                self._position_exposure_multipliers[sid] = 1
                self._position_payout_multipliers[sid] = 0
            if isinstance(asset, Future):
                self._position_value_multipliers[sid] = 0
                self._position_exposure_multipliers[sid] = \
                    asset.contract_multiplier
                self._position_payout_multipliers[sid] = \
                    asset.contract_multiplier
                # Futures are closed on their notice_date
                if asset.notice_date:
                    self._insert_auto_close_position_date(
                        dt=asset.notice_date,
                        sid=sid
                    )
                # If the Future does not have a notice_date, it will be closed
                # on its expiration_date
                elif asset.expiration_date:
                    self._insert_auto_close_position_date(
                        dt=asset.expiration_date,
                        sid=sid
                    )

    def _insert_auto_close_position_date(self, dt, sid):
        """
        Inserts the given SID in to the list of positions to be auto-closed by
        the given dt.

        Parameters
        ----------
        dt : pandas.Timestamp
            The date before-which the given SID will be auto-closed
        sid : int
            The SID of the Asset to be auto-closed
        """
        self._auto_close_position_sids.setdefault(dt, set()).add(sid)

    def auto_close_position_events(self, next_trading_day):
        """
        Generates CLOSE_POSITION events for any SIDs whose auto-close date is
        before or equal to the given date.

        Parameters
        ----------
        next_trading_day : pandas.Timestamp
            The time before-which certain Assets need to be closed

        Yields
        ------
        Event
            A close position event for any sids that should be closed before
            the next_trading_day parameter
        """
        past_asset_end_dates = set()

        # Check the auto_close_position_dates dict for SIDs to close
        for date, sids in self._auto_close_position_sids.items():
            if date > next_trading_day:
                continue
            past_asset_end_dates.add(date)

            for sid in sids:
                # Yield a CLOSE_POSITION event
                event = Event({
                    'dt': date,
                    'type': DATASOURCE_TYPE.CLOSE_POSITION,
                    'sid': sid,
                })
                yield event

        # Clear out past dates
        while past_asset_end_dates:
            self._auto_close_position_sids.pop(past_asset_end_dates.pop())

    def update_last_sale(self, event):
        # NOTE, PerformanceTracker already vetted as TRADE type
        sid = event.sid
        if sid not in self.positions:
            return 0

        price = event.price

        if checknull(price):
            return 0

        pos = self.positions[sid]
        old_price = pos.last_sale_price
        pos.last_sale_date = event.dt
        pos.last_sale_price = price
        self._position_last_sale_prices[sid] = price

        # Calculate cash adjustment on assets with multipliers
        return ((price - old_price) * self._position_payout_multipliers[sid]
                * pos.amount)

    def update_positions(self, positions):
        # update positions in batch
        self.positions.update(positions)
        for sid, pos in iteritems(positions):
            self._position_amounts[sid] = pos.amount
            self._position_last_sale_prices[sid] = pos.last_sale_price
            self._update_asset(sid)

    def update_position(self, sid, amount=None, last_sale_price=None,
                        last_sale_date=None, cost_basis=None):
        pos = self.positions[sid]

        if amount is not None:
            pos.amount = amount
            self._position_amounts[sid] = amount
            self._position_values = None  # invalidate cache
            self._update_asset(sid=sid)
        if last_sale_price is not None:
            pos.last_sale_price = last_sale_price
            self._position_last_sale_prices[sid] = last_sale_price
            self._position_values = None  # invalidate cache
        if last_sale_date is not None:
            pos.last_sale_date = last_sale_date
        if cost_basis is not None:
            pos.cost_basis = cost_basis

    def execute_transaction(self, txn):
        # Update Position
        # ----------------
        sid = txn.sid
        position = self.positions[sid]
        position.update(txn)
        self._position_amounts[sid] = position.amount
        self._position_last_sale_prices[sid] = position.last_sale_price
        self._update_asset(sid)

    def handle_commission(self, commission):
        # Adjust the cost basis of the stock if we own it
        if commission.sid in self.positions:
            self.positions[commission.sid].\
                adjust_commission_cost_basis(commission)

    @property
    def position_values(self):
        iter_amount_price_multiplier = zip(
            itervalues(self._position_amounts),
            itervalues(self._position_last_sale_prices),
            itervalues(self._position_value_multipliers),
        )
        return [
            price * amount * multiplier for
            price, amount, multiplier in iter_amount_price_multiplier
        ]

    @property
    def position_exposures(self):
        iter_amount_price_multiplier = zip(
            itervalues(self._position_amounts),
            itervalues(self._position_last_sale_prices),
            itervalues(self._position_exposure_multipliers),
        )
        return [
            price * amount * multiplier for
            price, amount, multiplier in iter_amount_price_multiplier
        ]

    def calculate_positions_value(self):
        if len(self.position_values) == 0:
            return np.float64(0)

        return sum(self.position_values)

    def calculate_positions_exposure(self):
        if len(self.position_exposures) == 0:
            return np.float64(0)

        return sum(self.position_exposures)

    def _longs_count(self):
        return sum(1 for i in self.position_exposures if i > 0)

    def _long_exposure(self):
        return sum(i for i in self.position_exposures if i > 0)

    def _long_value(self):
        return sum(i for i in self.position_values if i > 0)

    def _shorts_count(self):
        return sum(1 for i in self.position_exposures if i < 0)

    def _short_exposure(self):
        return sum(i for i in self.position_exposures if i < 0)

    def _short_value(self):
        return sum(i for i in self.position_values if i < 0)

    def _gross_exposure(self):
        return self._long_exposure() + abs(self._short_exposure())

    def _gross_value(self):
        return self._long_value() + abs(self._short_value())

    def _net_exposure(self):
        return self.calculate_positions_exposure()

    def _net_value(self):
        return self.calculate_positions_value()

    def handle_split(self, split):
        if split.sid in self.positions:
            # Make the position object handle the split. It returns the
            # leftover cash from a fractional share, if there is any.
            position = self.positions[split.sid]
            leftover_cash = position.handle_split(split)
            self._position_amounts[split.sid] = position.amount
            self._position_last_sale_prices[split.sid] = \
                position.last_sale_price
            self._update_asset(split.sid)
            return leftover_cash

    def _maybe_earn_dividend(self, dividend):
        """
        Take a historical dividend record and return a Series with fields in
        zipline.protocol.DIVIDEND_FIELDS (plus an 'id' field) representing
        the cash/stock amount we are owed when the dividend is paid.
        """
        if dividend['sid'] in self.positions:
            return self.positions[dividend['sid']].earn_dividend(dividend)
        else:
            return zp.dividend_payment()

    def earn_dividends(self, dividend_frame):
        """
        Given a frame of dividends whose ex_dates are all the next trading day,
        calculate and store the cash and/or stock payments to be paid on each
        dividend's pay date.
        """
        earned = dividend_frame.apply(self._maybe_earn_dividend, axis=1)\
                               .dropna(how='all')
        if len(earned) > 0:
            # Store the earned dividends so that they can be paid on the
            # dividends' pay_dates.
            self._unpaid_dividends = pd.concat(
                [self._unpaid_dividends, earned],
            )

    def _maybe_pay_dividend(self, dividend):
        """
        Take a historical dividend record, look up any stored record of
        cash/stock we are owed for that dividend, and return a Series
        with fields drawn from zipline.protocol.DIVIDEND_PAYMENT_FIELDS.
        """
        try:
            unpaid_dividend = self._unpaid_dividends.loc[dividend['id']]
            return unpaid_dividend
        except KeyError:
            return zp.dividend_payment()

    def pay_dividends(self, dividend_frame):
        """
        Given a frame of dividends whose pay_dates are all the next trading
        day, grant the cash and/or stock payments that were calculated on the
        given dividends' ex dates.
        """
        payments = dividend_frame.apply(self._maybe_pay_dividend, axis=1)\
                                 .dropna(how='all')

        # Mark these dividends as paid by dropping them from our unpaid
        # table.
        self._unpaid_dividends.drop(payments.index)

        # Add stock for any stock dividends paid.  Again, the values here may
        # be negative in the case of short positions.
        stock_payments = payments[payments['payment_sid'].notnull()]
        for _, row in stock_payments.iterrows():
            stock = row['payment_sid']
            share_count = row['share_count']
            # note we create a Position for stock dividend if we don't
            # already own the asset
            position = self.positions[stock]

            position.amount += share_count
            self._position_amounts[stock] = position.amount
            self._position_last_sale_prices[stock] = position.last_sale_price
            self._update_asset(stock)

        # Add cash equal to the net cash payed from all dividends.  Note that
        # "negative cash" is effectively paid if we're short an asset,
        # representing the fact that we're required to reimburse the owner of
        # the stock for any dividends paid while borrowing.
        net_cash_payment = payments['cash_amount'].fillna(0).sum()
        return net_cash_payment

    def maybe_create_close_position_transaction(self, event):
        if not self._position_amounts.get(event.sid):
            return None
        if 'price' in event:
            price = event.price
        else:
            price = self._position_last_sale_prices[event.sid]
        txn = Transaction(
            sid=event.sid,
            amount=(-1 * self._position_amounts[event.sid]),
            dt=event.dt,
            price=price,
            commission=0,
            order_id=0
        )
        return txn

    def get_positions(self):

        positions = self._positions_store

        for sid, pos in iteritems(self.positions):

            if pos.amount == 0:
                # Clear out the position if it has become empty since the last
                # time get_positions was called.  Catching the KeyError is
                # faster than checking `if sid in positions`, and this can be
                # potentially called in a tight inner loop.
                try:
                    del positions[sid]
                except KeyError:
                    pass
                continue

            # Note that this will create a position if we don't currently have
            # an entry
            position = positions[sid]
            position.amount = pos.amount
            position.cost_basis = pos.cost_basis
            position.last_sale_price = pos.last_sale_price
        return positions

    def get_positions_list(self):
        positions = []
        for sid, pos in iteritems(self.positions):
            if pos.amount != 0:
                positions.append(pos.to_dict())
        return positions

    def __getstate__(self):
        state_dict = {}

        state_dict['asset_finder'] = self.asset_finder
        state_dict['positions'] = dict(self.positions)
        state_dict['unpaid_dividends'] = self._unpaid_dividends
        state_dict['auto_close_position_sids'] = self._auto_close_position_sids

        STATE_VERSION = 3
        state_dict[VERSION_LABEL] = STATE_VERSION
        return state_dict

    def __setstate__(self, state):
        OLDEST_SUPPORTED_STATE = 3
        version = state.pop(VERSION_LABEL)

        if version < OLDEST_SUPPORTED_STATE:
            raise BaseException("PositionTracker saved state is too old.")

        self.asset_finder = state['asset_finder']
        self.positions = positiondict()
        # note that positions_store is temporary and gets regened from
        # .positions
        self._positions_store = zp.Positions()

        self._unpaid_dividends = state['unpaid_dividends']
        self._auto_close_position_sids = state['auto_close_position_sids']

        # Arrays for quick calculations of positions value
        self._position_amounts = OrderedDict()
        self._position_last_sale_prices = OrderedDict()
        self._position_value_multipliers = OrderedDict()
        self._position_exposure_multipliers = OrderedDict()
        self._position_payout_multipliers = OrderedDict()

        # Update positions is called without a finder
        self.update_positions(state['positions'])
Пример #4
0
class PositionTracker(object):

    def __init__(self):
        # sid => position object
        self.positions = positiondict()
        # Arrays for quick calculations of positions value
        self._position_amounts = OrderedDict()
        self._position_last_sale_prices = OrderedDict()
        self._position_value_multipliers = OrderedDict()
        self._position_exposure_multipliers = OrderedDict()
        self._position_payout_multipliers = OrderedDict()
        self._unpaid_dividends = pd.DataFrame(
            columns=zp.DIVIDEND_PAYMENT_FIELDS,
        )
        self._positions_store = zp.Positions()

    @with_environment()
    def _retrieve_asset(self, sid, env=None):
        return env.asset_finder.retrieve_asset(sid)

    def _update_multipliers(self, sid):
        try:
            self._position_value_multipliers[sid]
            self._position_exposure_multipliers[sid]
            self._position_payout_multipliers[sid]
        except KeyError:
            # Collect the value multipliers from applicable sids
            asset = self._retrieve_asset(sid)
            if isinstance(asset, Equity):
                self._position_value_multipliers[sid] = 1
                self._position_exposure_multipliers[sid] = 1
                self._position_payout_multipliers[sid] = 0
            if isinstance(asset, Future):
                self._position_value_multipliers[sid] = 0
                self._position_exposure_multipliers[sid] = \
                    asset.contract_multiplier
                self._position_payout_multipliers[sid] = \
                    asset.contract_multiplier

    def update_last_sale(self, event):
        # NOTE, PerformanceTracker already vetted as TRADE type
        sid = event.sid
        if sid not in self.positions:
            return 0

        price = event.price

        if checknull(price):
            return 0

        pos = self.positions[sid]
        old_price = pos.last_sale_price
        pos.last_sale_date = event.dt
        pos.last_sale_price = price
        self._position_last_sale_prices[sid] = price

        # Calculate cash adjustment on assets with multipliers
        return ((price - old_price) * self._position_payout_multipliers[sid]
                * pos.amount)

    def update_positions(self, positions):
        # update positions in batch
        self.positions.update(positions)
        for sid, pos in iteritems(positions):
            self._position_amounts[sid] = pos.amount
            self._position_last_sale_prices[sid] = pos.last_sale_price
            self._update_multipliers(sid)

    def update_position(self, sid, amount=None, last_sale_price=None,
                        last_sale_date=None, cost_basis=None):
        pos = self.positions[sid]

        if amount is not None:
            pos.amount = amount
            self._position_amounts[sid] = amount
            self._position_values = None  # invalidate cache
            self._update_multipliers(sid=sid)
        if last_sale_price is not None:
            pos.last_sale_price = last_sale_price
            self._position_last_sale_prices[sid] = last_sale_price
            self._position_values = None  # invalidate cache
        if last_sale_date is not None:
            pos.last_sale_date = last_sale_date
        if cost_basis is not None:
            pos.cost_basis = cost_basis

    def execute_transaction(self, txn):
        # Update Position
        # ----------------
        sid = txn.sid
        position = self.positions[sid]
        position.update(txn)
        self._position_amounts[sid] = position.amount
        self._position_last_sale_prices[sid] = position.last_sale_price
        self._update_multipliers(sid)

    def handle_commission(self, commission):
        # Adjust the cost basis of the stock if we own it
        if commission.sid in self.positions:
            self.positions[commission.sid].\
                adjust_commission_cost_basis(commission)

    @property
    def position_values(self):
        iter_amount_price_multiplier = zip(
            itervalues(self._position_amounts),
            itervalues(self._position_last_sale_prices),
            itervalues(self._position_value_multipliers),
        )
        return [
            price * amount * multiplier for
            price, amount, multiplier in iter_amount_price_multiplier
        ]

    @property
    def position_exposures(self):
        iter_amount_price_multiplier = zip(
            itervalues(self._position_amounts),
            itervalues(self._position_last_sale_prices),
            itervalues(self._position_exposure_multipliers),
        )
        return [
            price * amount * multiplier for
            price, amount, multiplier in iter_amount_price_multiplier
        ]

    def calculate_positions_value(self):
        if len(self.position_values) == 0:
            return np.float64(0)

        return sum(self.position_values)

    def calculate_positions_exposure(self):
        if len(self.position_exposures) == 0:
            return np.float64(0)

        return sum(self.position_exposures)

    def _longs_count(self):
        return sum(1 for i in self.position_exposures if i > 0)

    def _long_exposure(self):
        return sum(i for i in self.position_exposures if i > 0)

    def _long_value(self):
        return sum(i for i in self.position_values if i > 0)

    def _shorts_count(self):
        return sum(1 for i in self.position_exposures if i < 0)

    def _short_exposure(self):
        return sum(i for i in self.position_exposures if i < 0)

    def _short_value(self):
        return sum(i for i in self.position_values if i < 0)

    def _gross_exposure(self):
        return self._long_exposure() + abs(self._short_exposure())

    def _gross_value(self):
        return self._long_value() + abs(self._short_value())

    def _net_exposure(self):
        return self.calculate_positions_exposure()

    def _net_value(self):
        return self.calculate_positions_value()

    def handle_split(self, split):
        if split.sid in self.positions:
            # Make the position object handle the split. It returns the
            # leftover cash from a fractional share, if there is any.
            position = self.positions[split.sid]
            leftover_cash = position.handle_split(split)
            self._position_amounts[split.sid] = position.amount
            self._position_last_sale_prices[split.sid] = \
                position.last_sale_price
            self._update_multipliers(split.sid)
            return leftover_cash

    def _maybe_earn_dividend(self, dividend):
        """
        Take a historical dividend record and return a Series with fields in
        zipline.protocol.DIVIDEND_FIELDS (plus an 'id' field) representing
        the cash/stock amount we are owed when the dividend is paid.
        """
        if dividend['sid'] in self.positions:
            return self.positions[dividend['sid']].earn_dividend(dividend)
        else:
            return zp.dividend_payment()

    def earn_dividends(self, dividend_frame):
        """
        Given a frame of dividends whose ex_dates are all the next trading day,
        calculate and store the cash and/or stock payments to be paid on each
        dividend's pay date.
        """
        earned = dividend_frame.apply(self._maybe_earn_dividend, axis=1)\
                               .dropna(how='all')
        if len(earned) > 0:
            # Store the earned dividends so that they can be paid on the
            # dividends' pay_dates.
            self._unpaid_dividends = pd.concat(
                [self._unpaid_dividends, earned],
            )

    def _maybe_pay_dividend(self, dividend):
        """
        Take a historical dividend record, look up any stored record of
        cash/stock we are owed for that dividend, and return a Series
        with fields drawn from zipline.protocol.DIVIDEND_PAYMENT_FIELDS.
        """
        try:
            unpaid_dividend = self._unpaid_dividends.loc[dividend['id']]
            return unpaid_dividend
        except KeyError:
            return zp.dividend_payment()

    def pay_dividends(self, dividend_frame):
        """
        Given a frame of dividends whose pay_dates are all the next trading
        day, grant the cash and/or stock payments that were calculated on the
        given dividends' ex dates.
        """
        payments = dividend_frame.apply(self._maybe_pay_dividend, axis=1)\
                                 .dropna(how='all')

        # Mark these dividends as paid by dropping them from our unpaid
        # table.
        self._unpaid_dividends.drop(payments.index)

        # Add stock for any stock dividends paid.  Again, the values here may
        # be negative in the case of short positions.
        stock_payments = payments[payments['payment_sid'].notnull()]
        for _, row in stock_payments.iterrows():
            stock = row['payment_sid']
            share_count = row['share_count']
            # note we create a Position for stock dividend if we don't
            # already own the asset
            position = self.positions[stock]

            position.amount += share_count
            self._position_amounts[stock] = position.amount
            self._position_last_sale_prices[stock] = position.last_sale_price
            self._update_multipliers(stock)

        # Add cash equal to the net cash payed from all dividends.  Note that
        # "negative cash" is effectively paid if we're short an asset,
        # representing the fact that we're required to reimburse the owner of
        # the stock for any dividends paid while borrowing.
        net_cash_payment = payments['cash_amount'].fillna(0).sum()
        return net_cash_payment

    def create_close_position_transaction(self, event):
        if not self._position_amounts.get(event.sid):
            return None
        txn = Transaction(
            sid=event.sid,
            amount=(-1 * self._position_amounts[event.sid]),
            dt=event.dt,
            price=event.price,
            commission=0,
            order_id=0
        )
        return txn

    def get_positions(self):

        positions = self._positions_store

        for sid, pos in iteritems(self.positions):

            if pos.amount == 0:
                # Clear out the position if it has become empty since the last
                # time get_positions was called.  Catching the KeyError is
                # faster than checking `if sid in positions`, and this can be
                # potentially called in a tight inner loop.
                try:
                    del positions[sid]
                except KeyError:
                    pass
                continue

            # Note that this will create a position if we don't currently have
            # an entry
            position = positions[sid]
            position.amount = pos.amount
            position.cost_basis = pos.cost_basis
            position.last_sale_price = pos.last_sale_price
        return positions

    def get_positions_list(self):
        positions = []
        for sid, pos in iteritems(self.positions):
            if pos.amount != 0:
                positions.append(pos.to_dict())
        return positions

    def __getstate__(self):
        state_dict = {}

        state_dict['positions'] = dict(self.positions)
        state_dict['unpaid_dividends'] = self._unpaid_dividends

        STATE_VERSION = 1
        state_dict[VERSION_LABEL] = STATE_VERSION
        return state_dict

    def __setstate__(self, state):
        OLDEST_SUPPORTED_STATE = 1
        version = state.pop(VERSION_LABEL)

        if version < OLDEST_SUPPORTED_STATE:
            raise BaseException("PositionTracker saved state is too old.")

        self.positions = positiondict()
        # note that positions_store is temporary and gets regened from
        # .positions
        self._positions_store = zp.Positions()

        self._unpaid_dividends = state['unpaid_dividends']

        # Arrays for quick calculations of positions value
        self._position_amounts = OrderedDict()
        self._position_last_sale_prices = OrderedDict()
        self._position_value_multipliers = OrderedDict()
        self._position_exposure_multipliers = OrderedDict()
        self._position_payout_multipliers = OrderedDict()

        self.update_positions(state['positions'])
Пример #5
0
class Dimensioned(LabelledData):
    """
    Dimensioned is a base class that allows the data contents of a
    class to be associated with dimensions. The contents associated
    with dimensions may be partitioned into one of three types

    * key_dimensions: These are the dimensions that can be indexed via
                      the __getitem__ method. Dimension objects
                      supporting key dimensions must support indexing
                      over these dimensions and may also support
                      slicing. This list ordering of dimensions
                      describes the positional components of each
                      multi-dimensional indexing operation.

                      For instance, if the key dimension names are
                      'weight' followed by 'height' for Dimensioned
                      object 'obj', then obj[80,175] indexes a weight
                      of 80 and height of 175.

    * value_dimensions: These dimensions correspond to any data held
                        on the Dimensioned object not in the key
                        dimensions. Indexing by value dimension is
                        supported by dimension name (when there are
                        multiple possible value dimensions); no
                        slicing semantics is supported and all the
                        data associated with that dimension will be
                        returned at once. Note that it is not possible
                        to mix value_dimensions and deep_dimensions.

    * deep_dimensions: These are dynamically computed dimensions that
                       belong to other Dimensioned objects that are
                       nested in the data. Objects that support this
                       should enable the _deep_indexable flag. Note
                       that it is not possible to mix value_dimensions
                       and deep_dimensions.

    Dimensioned class support generalized methods for finding the
    range and type of values along a particular Dimension. The range
    method relies on the appropriate implementation of the
    dimension_values methods on subclasses.

    The index of an arbitrary dimension is its positional index in the
    list of all dimensions, starting with the key dimensions, followed
    by the value dimensions and ending with the deep dimensions.
    """

    constant_dimensions = param.Dict(default=OrderedDict(),
                                     doc="""
       A dictionary of Dimension:value pairs providing additional
       dimension information about the object.""")

    key_dimensions = param.List(bounds=(0, None),
                                constant=True,
                                doc="""
       The list of dimensions that may be used in indexing (and
       potential slicing) semantics. The order of the dimensions
       listed here determines the semantics of each component of a
       multi-dimensional indexing operation.""")

    value_dimensions = param.List(bounds=(0, None),
                                  constant=True,
                                  doc="""
       The list of dimensions used to describe the components of the
       data. If multiple value dimensions are supplied, a particular
       value dimension may be indexed by name after the key
       dimensions.""")

    group = param.String(default='Dimensioned',
                         constant=True,
                         doc="""
       A string describing the data wrapped by the object.""")

    __abstract = True
    _sorted = False
    _dim_groups = ['key_dimensions', 'value_dimensions', 'deep_dimensions']

    def __init__(self, data, **params):
        for group in self._dim_groups[0:2]:
            if group in params:
                if 'constant' in group:
                    dimensions = {
                        d if isinstance(d, Dimension) else Dimension(d): val
                        for d, val in params.pop(group)
                    }
                else:
                    dimensions = [
                        d if isinstance(d, Dimension) else Dimension(d)
                        for d in params.pop(group)
                    ]
                params[group] = dimensions
        super(Dimensioned, self).__init__(data, **params)
        self.ndims = len(self.key_dimensions)
        constant_dimensions = [(d.name, val)
                               for d, val in self.constant_dimensions.items()]
        self._cached_constants = OrderedDict(constant_dimensions)
        self._cached_index_names = [d.name for d in self.key_dimensions]
        self._cached_value_names = [d.name for d in self.value_dimensions]
        self._settings = None

    def _valid_dimensions(self, dimensions):
        "Validates key dimension input"
        if not dimensions:
            return dimensions
        elif not isinstance(dimensions, list):
            dimensions = [dimensions]

        for dim in dimensions:
            if dim not in self._cached_index_names:
                raise Exception("Supplied dimensions %s not found." % dim)
        return dimensions

    @property
    def deep_dimensions(self):
        "The list of deep dimensions"
        if self._deep_indexable and len(self):
            return self.values()[0].dimensions()
        else:
            return []

    def dimensions(self, selection='all', label=False):
        """
        Provides convenient access to Dimensions on nested
        Dimensioned objects. Dimensions can be selected
        by their type, i.e. 'key' or 'value' dimensions.
        By default 'all' dimensions are returned.
        """
        lambdas = {
            'key': (lambda x: x.key_dimensions, {
                'full_breadth': False
            }),
            'value': (lambda x: x.value_dimensions, {}),
            'constant': (lambda x: x.constant_dimensions, {})
        }
        if selection == 'all':
            dims = [
                dim for group in self._dim_groups
                for dim in getattr(self, group)
            ]
        elif selection in ['key', 'value', 'constant']:
            lmbd, kwargs = lambdas[selection]
            key_traversal = self.traverse(lmbd, **kwargs)
            dims = [dim for keydims in key_traversal for dim in keydims]
        else:
            raise KeyError("Invalid selection %r, valid selections include"
                           "'all', 'value' and 'key' dimensions" %
                           repr(selection))
        return [dim.name if label else dim for dim in dims]

    def get_dimension(self, dimension, default=None):
        "Access a Dimension object by name or index."
        all_dims = self.dimensions()
        if isinstance(dimension, int):
            return all_dims[dimension]
        else:
            return {dim.name: dim for dim in all_dims}.get(dimension, default)

    def get_dimension_index(self, dim):
        """
        Returns the index of the requested dimension.
        """
        if isinstance(dim, int):
            if dim < len(self.dimensions()):
                return dim
            else:
                return IndexError('Dimension index out of bounds')
        try:
            return [d.name for d in self.dimensions()].index(dim)
        except ValueError:
            raise Exception("Dimension %s not found in %s." %
                            (dim, self.__class__.__name__))

    def get_dimension_type(self, dim):
        """
        Returns the specified Dimension type if specified or
        if the dimension_values types are consistent otherwise
        None is returned.
        """
        dim_obj = self.get_dimension(dim)
        if dim_obj and dim_obj.type is not None:
            return dim_obj.type
        dim_vals = [type(v) for v in self.dimension_values(dim)]
        if len(set(dim_vals)) == 1:
            return dim_vals[0]
        else:
            return None

    def __getitem__(self, key):
        """
        Multi-dimensional indexing semantics is determined by the list
        of key_dimensions. For instance, the first indexing component
        will index the first key dimension.

        After the key dimensions are given, *either* a value dimension
        name may follow (if there are multiple value dimensions) *or*
        deep dimensions may then be listed (for applicable deep
        dimensions).
        """
        return self

    def select(self, ignore_invalid=False, **kwargs):
        """
        Allows slicing or indexing into the Dimensioned object
        by supplying the dimension and index/slice as key
        value pairs.
        """
        valid_kwargs = {
            k: v
            for k, v in kwargs.items() if k in self.dimensions(label=True)
        }
        if not len(valid_kwargs) == len(kwargs) and not ignore_invalid:
            raise KeyError("Invalid Dimension supplied.")
        kwargs = {k: kwargs[k] for k in valid_kwargs.keys()}
        deep_select = any([
            kw for kw in kwargs.keys() if (kw in self.deep_dimensions) and (
                kw not in self._cached_index_names)
        ])
        selection_depth = len(
            self.dimensions('key')) if deep_select else self.ndims
        selection = [slice(None) for i in range(selection_depth)]
        for dim, val in kwargs.items():
            if isinstance(val, tuple): val = slice(*val)
            selection[self.get_dimension_index(dim)] = val
        return self.__getitem__(tuple(selection))

    def dimension_values(self, dimension):
        """
        Returns the values along the specified dimension. This method
        must be implemented for all Dimensioned type.
        """
        val = self._cached_constants.get(dimension, None)
        if val:
            return val
        else:
            raise Exception("Dimension %s not found in %s." %
                            (dimension, self.__class__.__name__))

    def range(self, dim, data_range=True):
        """
        Returns the range of values along the specified dimension.

        If data_range is True, the data may be used to try and infer
        the appropriate range. Otherwise, (None,None) is returned to
        indicate that no range is defined.
        """
        dimension = self.get_dimension(dim)
        if dimension.range != (None, None):
            return dimension.range
        elif not data_range:
            return (None, None)
        soft_range = [r for r in dimension.soft_range if r is not None]
        dim_vals = self.dimension_values(dimension.name)
        try:
            dim_vals = np.concatenate([dim_vals, soft_range])
            return np.min(dim_vals), np.max(dim_vals)
        except:
            try:
                if dim in self.dimensions() and len(dim_vals):
                    if not self._sorted:
                        dim_vals = sorted(dim_vals)
                    return (dim_vals[0], dim_vals[-1])
            except:
                pass
            return (None, None)

    def __repr__(self):
        return PrettyPrinter.pprint(self)

    def __call__(self, options=None, **kwargs):
        """
        Apply the supplied options to a clone of the object which is
        then returned.
        """
        deep_clone = self.map(lambda x: x.clone(id=x.id))
        StoreOptions.set_options(deep_clone, options, **kwargs)
        return deep_clone
Пример #6
0
class Dimensioned(LabelledData):
    """
    Dimensioned is a base class that allows the data contents of a
    class to be associated with dimensions. The contents associated
    with dimensions may be partitioned into one of three types

    * key_dimensions: These are the dimensions that can be indexed via
                      the __getitem__ method. Dimension objects
                      supporting key dimensions must support indexing
                      over these dimensions and may also support
                      slicing. This list ordering of dimensions
                      describes the positional components of each
                      multi-dimensional indexing operation.

                      For instance, if the key dimension names are
                      'weight' followed by 'height' for Dimensioned
                      object 'obj', then obj[80,175] indexes a weight
                      of 80 and height of 175.

    * value_dimensions: These dimensions correspond to any data held
                        on the Dimensioned object not in the key
                        dimensions. Indexing by value dimension is
                        supported by dimension name (when there are
                        multiple possible value dimensions); no
                        slicing semantics is supported and all the
                        data associated with that dimension will be
                        returned at once. Note that it is not possible
                        to mix value_dimensions and deep_dimensions.

    * deep_dimensions: These are dynamically computed dimensions that
                       belong to other Dimensioned objects that are
                       nested in the data. Objects that support this
                       should enable the _deep_indexable flag. Note
                       that it is not possible to mix value_dimensions
                       and deep_dimensions.

    Dimensioned class support generalized methods for finding the
    range and type of values along a particular Dimension. The range
    method relies on the appropriate implementation of the
    dimension_values methods on subclasses.

    The index of an arbitrary dimension is its positional index in the
    list of all dimensions, starting with the key dimensions, followed
    by the value dimensions and ending with the deep dimensions.
    """

    constant_dimensions = param.Dict(default=OrderedDict(), doc="""
       A dictionary of Dimension:value pairs providing additional
       dimension information about the object.""")

    key_dimensions = param.List(bounds=(0, None), constant=True, doc="""
       The list of dimensions that may be used in indexing (and
       potential slicing) semantics. The order of the dimensions
       listed here determines the semantics of each component of a
       multi-dimensional indexing operation.""")

    value_dimensions = param.List(bounds=(0, None), constant=True, doc="""
       The list of dimensions used to describe the components of the
       data. If multiple value dimensions are supplied, a particular
       value dimension may be indexed by name after the key
       dimensions.""")

    group = param.String(default='Dimensioned', constant=True, doc="""
       A string describing the data wrapped by the object.""")


    __abstract = True
    _sorted = False
    _dim_groups = ['key_dimensions',
                   'value_dimensions',
                   'deep_dimensions']

    def __init__(self, data, **params):
        for group in self._dim_groups[0:2]:
            if group in params:
                if 'constant' in group:
                    dimensions = {d if isinstance(d, Dimension) else Dimension(d): val
                                  for d, val in params.pop(group)}
                else:
                    dimensions = [d if isinstance(d, Dimension) else Dimension(d)
                                  for d in params.pop(group)]
                params[group] = dimensions
        super(Dimensioned, self).__init__(data, **params)
        self.ndims = len(self.key_dimensions)
        constant_dimensions = [(d.name, val) for d, val in self.constant_dimensions.items()]
        self._cached_constants = OrderedDict(constant_dimensions)
        self._cached_index_names = [d.name for d in self.key_dimensions]
        self._cached_value_names = [d.name for d in self.value_dimensions]
        self._settings = None


    def _valid_dimensions(self, dimensions):
        "Validates key dimension input"
        if not dimensions:
            return dimensions
        elif not isinstance(dimensions, list):
            dimensions = [dimensions]

        for dim in dimensions:
            if dim not in self._cached_index_names:
                raise Exception("Supplied dimensions %s not found." % dim)
        return dimensions

    @property
    def deep_dimensions(self):
        "The list of deep dimensions"
        if self._deep_indexable and len(self):
            return self.values()[0].dimensions()
        else:
            return []


    def dimensions(self, selection='all', label=False):
        """
        Provides convenient access to Dimensions on nested
        Dimensioned objects. Dimensions can be selected
        by their type, i.e. 'key' or 'value' dimensions.
        By default 'all' dimensions are returned.
        """
        lambdas = {'key': (lambda x: x.key_dimensions, {'full_breadth': False}),
                   'value': (lambda x: x.value_dimensions, {}),
                   'constant': (lambda x: x.constant_dimensions, {})}
        if selection == 'all':
            dims = [dim for group in self._dim_groups
                    for dim in getattr(self, group)]
        elif selection in ['key', 'value', 'constant']:
            lmbd, kwargs = lambdas[selection]
            key_traversal = self.traverse(lmbd, **kwargs)
            dims = [dim for keydims in key_traversal for dim in keydims]
        else:
            raise KeyError("Invalid selection %r, valid selections include"
                           "'all', 'value' and 'key' dimensions" % repr(selection))
        return [dim.name if label else dim for dim in dims]


    def get_dimension(self, dimension, default=None):
        "Access a Dimension object by name or index."
        all_dims = self.dimensions()
        if isinstance(dimension, int):
            return all_dims[dimension]
        else:
            return {dim.name: dim for dim in all_dims}.get(dimension, default)


    def get_dimension_index(self, dim):
        """
        Returns the index of the requested dimension.
        """
        if isinstance(dim, int):
            if dim < len(self.dimensions()):
                return dim
            else:
                return IndexError('Dimension index out of bounds')
        try:
            return [d.name for d in self.dimensions()].index(dim)
        except ValueError:
            raise Exception("Dimension %s not found in %s." %
                            (dim, self.__class__.__name__))


    def get_dimension_type(self, dim):
        """
        Returns the specified Dimension type if specified or
        if the dimension_values types are consistent otherwise
        None is returned.
        """
        dim_obj = self.get_dimension(dim)
        if dim_obj and dim_obj.type is not None:
            return dim_obj.type
        dim_vals = [type(v) for v in self.dimension_values(dim)]
        if len(set(dim_vals)) == 1:
            return dim_vals[0]
        else:
            return None

    def __getitem__(self, key):
        """
        Multi-dimensional indexing semantics is determined by the list
        of key_dimensions. For instance, the first indexing component
        will index the first key dimension.

        After the key dimensions are given, *either* a value dimension
        name may follow (if there are multiple value dimensions) *or*
        deep dimensions may then be listed (for applicable deep
        dimensions).
        """
        return self


    def select(self, ignore_invalid=False, **kwargs):
        """
        Allows slicing or indexing into the Dimensioned object
        by supplying the dimension and index/slice as key
        value pairs.
        """
        valid_kwargs = {k: v for k, v in kwargs.items()
                        if k in self.dimensions(label=True)}
        if not len(valid_kwargs) == len(kwargs) and not ignore_invalid:
            raise KeyError("Invalid Dimension supplied.")
        kwargs = {k: kwargs[k] for k in valid_kwargs.keys()}
        deep_select = any([kw for kw in kwargs.keys() if (kw in self.deep_dimensions)
                           and (kw not in self._cached_index_names)])
        selection_depth = len(self.dimensions('key')) if deep_select else self.ndims
        selection = [slice(None) for i in range(selection_depth)]
        for dim, val in kwargs.items():
            if isinstance(val, tuple): val = slice(*val)
            selection[self.get_dimension_index(dim)] = val
        return self.__getitem__(tuple(selection))


    def dimension_values(self, dimension):
        """
        Returns the values along the specified dimension. This method
        must be implemented for all Dimensioned type.
        """
        val = self._cached_constants.get(dimension, None)
        if val:
            return val
        else:
            raise Exception("Dimension %s not found in %s." %
                            (dimension, self.__class__.__name__))


    def range(self, dim, data_range=True):
        """
        Returns the range of values along the specified dimension.

        If data_range is True, the data may be used to try and infer
        the appropriate range. Otherwise, (None,None) is returned to
        indicate that no range is defined.
        """
        dimension = self.get_dimension(dim)
        if dimension is None:
            return (None, None)
        if dimension.range != (None, None):
            return dimension.range
        elif not data_range:
            return (None, None)
        soft_range = [r for r in dimension.soft_range
                      if r is not None]
        dim_vals = self.dimension_values(dimension.name)
        try:
            dim_vals = np.concatenate([dim_vals, soft_range])
            return np.min(dim_vals), np.max(dim_vals)
        except:
            try:
                if dim in self.dimensions() and len(dim_vals):
                    if not self._sorted:
                        dim_vals = sorted(dim_vals)
                    return (dim_vals[0], dim_vals[-1])
            except:
                pass
            return (None, None)


    def __repr__(self):
        return PrettyPrinter.pprint(self)


    def __call__(self, options=None, **kwargs):
        """
        Apply the supplied options to a clone of the object which is
        then returned.
        """
        deep_clone = self.map(lambda x: x.clone(id=x.id))
        StoreOptions.set_options(deep_clone, options, **kwargs)
        return deep_clone
Пример #7
0
class Dimensioned(LabelledData):
    """
    Dimensioned is a base class that allows the data contents of a
    class to be associated with dimensions. The contents associated
    with dimensions may be partitioned into one of three types

    * key dimensions: These are the dimensions that can be indexed via
                      the __getitem__ method. Dimension objects
                      supporting key dimensions must support indexing
                      over these dimensions and may also support
                      slicing. This list ordering of dimensions
                      describes the positional components of each
                      multi-dimensional indexing operation.

                      For instance, if the key dimension names are
                      'weight' followed by 'height' for Dimensioned
                      object 'obj', then obj[80,175] indexes a weight
                      of 80 and height of 175.

                      Accessed using either kdims or key_dimensions.

    * value dimensions: These dimensions correspond to any data held
                        on the Dimensioned object not in the key
                        dimensions. Indexing by value dimension is
                        supported by dimension name (when there are
                        multiple possible value dimensions); no
                        slicing semantics is supported and all the
                        data associated with that dimension will be
                        returned at once. Note that it is not possible
                        to mix value dimensions and deep dimensions.

                        Accessed using either vdims or value_dimensions.


    * deep dimensions: These are dynamically computed dimensions that
                       belong to other Dimensioned objects that are
                       nested in the data. Objects that support this
                       should enable the _deep_indexable flag. Note
                       that it is not possible to mix value dimensions
                       and deep dimensions.

                       Accessed using either ddims or deep_dimensions.

    Dimensioned class support generalized methods for finding the
    range and type of values along a particular Dimension. The range
    method relies on the appropriate implementation of the
    dimension_values methods on subclasses.

    The index of an arbitrary dimension is its positional index in the
    list of all dimensions, starting with the key dimensions, followed
    by the value dimensions and ending with the deep dimensions.
    """

    cdims = param.Dict(default=OrderedDict(), doc="""
       The constant dimensions defined as a dictionary of Dimension:value
       pairs providing additional dimension information about the object.

       Aliased with constant_dimensions.""")

    kdims = param.List(bounds=(0, None), constant=True, doc="""
       The key dimensions defined as list of dimensions that may be
       used in indexing (and potential slicing) semantics. The order
       of the dimensions listed here determines the semantics of each
       component of a multi-dimensional indexing operation.

       Aliased with key_dimensions.""")

    vdims = param.List(bounds=(0, None), constant=True, doc="""
       The value dimensions defined as the list of dimensions used to
       describe the components of the data. If multiple value
       dimensions are supplied, a particular value dimension may be
       indexed by name after the key dimensions.

       Aliased with value_dimensions.""")

    group = param.String(default='Dimensioned', constant=True, doc="""
       A string describing the data wrapped by the object.""")

    __abstract = True
    _sorted = False
    _dim_groups = ['kdims', 'vdims', 'cdims', 'ddims']
    _dim_aliases = dict(key_dimensions='kdims', value_dimensions='vdims',
                        constant_dimensions='cdims', deep_dimensions='ddims')


    # Long-name aliases

    @property
    def key_dimensions(self): return self.kdims

    @property
    def value_dimensions(self): return self.vdims

    @property
    def constant_dimensions(self): return self.cdims

    @property
    def deep_dimensions(self): return self.ddims

    def __init__(self, data, **params):
        for group in self._dim_groups+list(self._dim_aliases.keys()):
            if group in ['deep_dimensions', 'ddims']: continue
            if group in params:
                if group in self._dim_aliases:
                    params[self._dim_aliases[group]] = params.pop(group)
                    group = self._dim_aliases[group]
                if group == 'cdims':
                    dimensions = {d if isinstance(d, Dimension) else Dimension(d): val
                                  for d, val in params.pop(group).items()}
                else:
                    dimensions = [d if isinstance(d, Dimension) else Dimension(d)
                                  for d in params.pop(group)]
                params[group] = dimensions
        super(Dimensioned, self).__init__(data, **params)
        self.ndims = len(self.kdims)
        cdims = [(d.name, val) for d, val in self.cdims.items()]
        self._cached_constants = OrderedDict(cdims)
        self._cached_index_names = [d.name for d in self.kdims]
        self._cached_value_names = [d.name for d in self.vdims]
        self._settings = None


    def _valid_dimensions(self, dimensions):
        "Validates key dimension input"
        if not dimensions:
            return dimensions
        elif not isinstance(dimensions, list):
            dimensions = [dimensions]

        for dim in dimensions:
            if dim not in self._cached_index_names:
                raise Exception("Supplied dimensions %s not found." % dim)
        return dimensions


    @property
    def ddims(self):
        "The list of deep dimensions"
        if self._deep_indexable and len(self):
            return self.values()[0].dimensions()
        else:
            return []


    def dimensions(self, selection='all', label=False):
        """
        Provides convenient access to Dimensions on nested
        Dimensioned objects. Dimensions can be selected
        by their type, i.e. 'key' or 'value' dimensions.
        By default 'all' dimensions are returned.
        """
        lambdas = {'k': (lambda x: x.kdims, {'full_breadth': False}),
                   'v': (lambda x: x.vdims, {}),
                   'c': (lambda x: x.cdims, {})}
        aliases = {'key': 'k', 'value': 'v', 'constant': 'c'}
        if selection == 'all':
            dims = [dim for group in self._dim_groups
                    for dim in getattr(self, group)]
        elif isinstance(selection, list):
            dims =  [dim for group in selection
                     for dim in getattr(self, '%sdims' % aliases.get(group))]
        elif aliases.get(selection) in lambdas:
            selection = aliases.get(selection, selection)
            lmbd, kwargs = lambdas[selection]
            key_traversal = self.traverse(lmbd, **kwargs)
            dims = [dim for keydims in key_traversal for dim in keydims]
        else:
            raise KeyError("Invalid selection %r, valid selections include"
                           "'all', 'value' and 'key' dimensions" % repr(selection))
        return [dim.name if label else dim for dim in dims]


    def get_dimension(self, dimension, default=None):
        "Access a Dimension object by name or index."
        all_dims = self.dimensions()
        if isinstance(dimension, Dimension):
            dimension = dimension.name
        if isinstance(dimension, int) and dimension < len(all_dims):
            return all_dims[dimension]
        else:
            return {dim.name: dim for dim in all_dims}.get(dimension, default)


    def get_dimension_index(self, dim):
        """
        Returns the index of the requested dimension.
        """
        if isinstance(dim, int):
            if dim < len(self.dimensions()):
                return dim
            else:
                return IndexError('Dimension index out of bounds')
        try:
            sanitized = {sanitize_identifier(kd): kd
                         for kd in self._cached_index_names}
            return [d.name for d in self.dimensions()].index(sanitized.get(dim, dim))
        except ValueError:
            raise Exception("Dimension %s not found in %s." %
                            (dim, self.__class__.__name__))


    def get_dimension_type(self, dim):
        """
        Returns the specified Dimension type if specified or
        if the dimension_values types are consistent otherwise
        None is returned.
        """
        dim_obj = self.get_dimension(dim)
        if dim_obj and dim_obj.type is not None:
            return dim_obj.type
        dim_vals = [type(v) for v in self.dimension_values(dim)]
        if len(set(dim_vals)) == 1:
            return dim_vals[0]
        else:
            return None

    def __getitem__(self, key):
        """
        Multi-dimensional indexing semantics is determined by the list
        of key dimensions. For instance, the first indexing component
        will index the first key dimension.

        After the key dimensions are given, *either* a value dimension
        name may follow (if there are multiple value dimensions) *or*
        deep dimensions may then be listed (for applicable deep
        dimensions).
        """
        return self


    def select(self, selection_specs=None, **kwargs):
        """
        Allows slicing or indexing into the Dimensioned object
        by supplying the dimension and index/slice as key
        value pairs. Select descends recursively through the
        data structure applying the key dimension selection.
        The 'value' keyword allows selecting the
        value dimensions on objects which have any declared.

        The selection may also be selectively applied to
        specific objects by supplying the selection_specs
        as an iterable of type.group.label specs, types or
        functions.
        """

        # Apply all indexes applying on this object
        val_dim = ['value'] if self.vdims else []
        sanitized = {sanitize_identifier(kd): kd
                     for kd in self._cached_index_names}
        local_dims = (self._cached_index_names
                      + list(sanitized.keys()) + val_dim)
        local_kwargs = {k: v for k, v in kwargs.items()
                        if k in local_dims}

        # Check selection_spec applies
        if selection_specs is not None:
            matches = any(self.matches(spec)
                          for spec in selection_specs)
        else:
            matches = True

        if local_kwargs and matches:
            select = [slice(None) for i in range(self.ndims)]
            for dim, val in local_kwargs.items():
                if dim == 'value':
                    select += [val]
                else:
                    if isinstance(val, tuple): val = slice(*val)
                    dim = sanitized.get(dim, dim)
                    select[self.get_dimension_index(dim)] = val
            if self._deep_indexable:
                selection = self.get(tuple(select),
                                     self.clone(shared_data=False))
            else:
                selection = self[tuple(select)]
        else:
            selection = self

        if type(selection) is not type(self):
            # Apply the selection on the selected object of a different type
            val_dim = ['value'] if selection.vdims else []
            key_dims = selection.dimensions('key', label=True) + val_dim
            if any(kw in key_dims for kw in kwargs):
                selection = selection.select(selection_specs, **kwargs)
        elif selection._deep_indexable:
            # Apply the deep selection on each item in local selection
            items = []
            for k, v in selection.items():
                val_dim = ['value'] if v.vdims else []
                dims = list(zip(*[(sanitize_identifier(kd), kd)
                                  for kd in v.dimensions('key', label=True)]))
                kdims, skdims = dims if dims else ([], [])
                key_dims = list(kdims) + list(skdims) + val_dim
                if any(kw in key_dims for kw in kwargs):
                    items.append((k, v.select(selection_specs, **kwargs)))
                else:
                    items.append((k, v))
            selection = selection.clone(items)
        return selection


    def dimension_values(self, dimension):
        """
        Returns the values along the specified dimension. This method
        must be implemented for all Dimensioned type.
        """
        val = self._cached_constants.get(dimension, None)
        if val:
            return val
        else:
            raise Exception("Dimension %s not found in %s." %
                            (dimension, self.__class__.__name__))


    def range(self, dimension, data_range=True):
        """
        Returns the range of values along the specified dimension.

        If data_range is True, the data may be used to try and infer
        the appropriate range. Otherwise, (None,None) is returned to
        indicate that no range is defined.
        """
        dimension = self.get_dimension(dimension)
        if dimension is None:
            return (None, None)
        if dimension.range != (None, None):
            return dimension.range
        elif not data_range:
            return (None, None)
        soft_range = [r for r in dimension.soft_range
                      if r is not None]
        if dimension in self.kdims or dimension in self.vdims:
            dim_vals = self.dimension_values(dimension.name)
            return find_range(dim_vals, soft_range)
        dname = dimension.name
        match_fn = lambda x: dname in x.dimensions(['key', 'value'], True)
        range_fn = lambda x: x.range(dname)
        ranges = self.traverse(range_fn, [match_fn])
        drange = max_range(ranges)
        return drange


    def __repr__(self):
        return PrettyPrinter.pprint(self)


    def __call__(self, options=None, **kwargs):
        """
        Apply the supplied options to a clone of the object which is
        then returned. Note that if no options are supplied at all,
        all ids are reset.
        """
        groups = set(Store.options().groups.keys())
        if kwargs and set(kwargs) <= groups:
            if not all(isinstance(v, dict) for v in kwargs.values()):
                raise Exception("The %s options must be specified using dictionary groups" %
                                ','.join(repr(k) for k in kwargs.keys()))

            # Check whether the user is specifying targets (such as 'Image.Foo')
            entries = Store.options().children
            targets = [k.split('.')[0] in entries for grp in kwargs.values() for k in grp]
            if any(targets) and not all(targets):
                raise Exception("Cannot mix target specification keys such as 'Image' with non-target keywords.")
            elif not any(targets):
                # Not targets specified - add current object as target
                sanitized_group = sanitize_identifier(self.group)
                if self.label:
                    identifier = ('%s.%s.%s' % (self.__class__.__name__,
                                                sanitized_group,
                                                sanitize_identifier(self.label)))
                elif  sanitized_group != self.__class__.__name__:
                    identifier = '%s.%s' % (self.__class__.__name__, sanitized_group)
                else:
                    identifier = self.__class__.__name__

                kwargs = {k:{identifier:v} for k,v in kwargs.items()}

        if options is None and kwargs=={}:
            deep_clone = self.map(lambda x: x.clone(id=None))
        else:
            deep_clone = self.map(lambda x: x.clone(id=x.id))
        StoreOptions.set_options(deep_clone, options, **kwargs)
        return deep_clone
Пример #8
0
class PositionTracker(object):

    def __init__(self):
        # sid => position object
        self.positions = positiondict()
        # Arrays for quick calculations of positions value
        self._position_amounts = OrderedDict()
        self._position_last_sale_prices = OrderedDict()
        self._unpaid_dividends = pd.DataFrame(
            columns=zp.DIVIDEND_PAYMENT_FIELDS,
        )
        self._positions_store = zp.Positions()

    def update_last_sale(self, event):
        # NOTE, PerformanceTracker already vetted as TRADE type
        sid = event.sid
        if sid not in self.positions:
            return

        price = event.price
        if not checknull(price):
            pos = self.positions[sid]
            pos.last_sale_date = event.dt
            pos.last_sale_price = price
            self._position_last_sale_prices[sid] = price
            self._position_values = None  # invalidate cache
        sid = event.sid
        price = event.price

    def update_positions(self, positions):
        # update positions in batch
        self.positions.update(positions)
        for sid, pos in iteritems(positions):
            self._position_amounts[sid] = pos.amount
            self._position_last_sale_prices[sid] = pos.last_sale_price
            # Invalidate cache.
            self._position_values = None  # invalidate cache

    def update_position(self, sid, amount=None, last_sale_price=None,
                        last_sale_date=None, cost_basis=None):
        pos = self.positions[sid]

        if amount is not None:
            pos.amount = amount
            self._position_amounts[sid] = amount
            self._position_values = None  # invalidate cache
        if last_sale_price is not None:
            pos.last_sale_price = last_sale_price
            self._position_last_sale_prices[sid] = last_sale_price
            self._position_values = None  # invalidate cache
        if last_sale_date is not None:
            pos.last_sale_date = last_sale_date
        if cost_basis is not None:
            pos.cost_basis = cost_basis

    def execute_transaction(self, txn):
        # Update Position
        # ----------------

        sid = txn.sid
        position = self.positions[sid]
        position.update(txn)
        self._position_amounts[sid] = position.amount
        self._position_last_sale_prices[sid] = position.last_sale_price
        self._position_values = None  # invalidate cache

    def handle_commission(self, commission):
        # Adjust the cost basis of the stock if we own it
        if commission.sid in self.positions:
            self.positions[commission.sid].\
                adjust_commission_cost_basis(commission)

    _position_values = None

    @property
    def position_values(self):
        """
        Invalidate any time self._position_amounts or
        self._position_last_sale_prices is changed.
        """
        if self._position_values is None:
            vals = list(map(mul, self._position_amounts.values(),
                        self._position_last_sale_prices.values()))
            self._position_values = vals
        return self._position_values

    def calculate_positions_value(self):
        if len(self.position_values) == 0:
            return np.float64(0)

        return sum(self.position_values)

    def _longs_count(self):
        return sum(map(lambda x: x > 0, self.position_values))

    def _long_exposure(self):
        return sum(filter(lambda x: x > 0, self.position_values))

    def _shorts_count(self):
        return sum(map(lambda x: x < 0, self.position_values))

    def _short_exposure(self):
        return sum(filter(lambda x: x < 0, self.position_values))

    def _gross_exposure(self):
        return self._long_exposure() + abs(self._short_exposure())

    def _net_exposure(self):
        return self.calculate_positions_value()

    def handle_split(self, split):
        if split.sid in self.positions:
            # Make the position object handle the split. It returns the
            # leftover cash from a fractional share, if there is any.
            position = self.positions[split.sid]
            leftover_cash = position.handle_split(split)
            self._position_amounts[split.sid] = position.amount
            self._position_last_sale_prices[split.sid] = \
                position.last_sale_price
            self._position_values = None  # invalidate cache
            return leftover_cash

    def _maybe_earn_dividend(self, dividend):
        """
        Take a historical dividend record and return a Series with fields in
        zipline.protocol.DIVIDEND_FIELDS (plus an 'id' field) representing
        the cash/stock amount we are owed when the dividend is paid.
        """
        if dividend['sid'] in self.positions:
            return self.positions[dividend['sid']].earn_dividend(dividend)
        else:
            return zp.dividend_payment()

    def earn_dividends(self, dividend_frame):
        """
        Given a frame of dividends whose ex_dates are all the next trading day,
        calculate and store the cash and/or stock payments to be paid on each
        dividend's pay date.
        """
        earned = dividend_frame.apply(self._maybe_earn_dividend, axis=1)\
                               .dropna(how='all')
        if len(earned) > 0:
            # Store the earned dividends so that they can be paid on the
            # dividends' pay_dates.
            self._unpaid_dividends = pd.concat(
                [self._unpaid_dividends, earned],
            )

    def _maybe_pay_dividend(self, dividend):
        """
        Take a historical dividend record, look up any stored record of
        cash/stock we are owed for that dividend, and return a Series
        with fields drawn from zipline.protocol.DIVIDEND_PAYMENT_FIELDS.
        """
        try:
            unpaid_dividend = self._unpaid_dividends.loc[dividend['id']]
            return unpaid_dividend
        except KeyError:
            return zp.dividend_payment()

    def pay_dividends(self, dividend_frame):
        """
        Given a frame of dividends whose pay_dates are all the next trading
        day, grant the cash and/or stock payments that were calculated on the
        given dividends' ex dates.
        """
        payments = dividend_frame.apply(self._maybe_pay_dividend, axis=1)\
                                 .dropna(how='all')

        # Mark these dividends as paid by dropping them from our unpaid
        # table.
        self._unpaid_dividends.drop(payments.index)

        # Add stock for any stock dividends paid.  Again, the values here may
        # be negative in the case of short positions.
        stock_payments = payments[payments['payment_sid'].notnull()]
        for _, row in stock_payments.iterrows():
            stock = row['payment_sid']
            share_count = row['share_count']
            # note we create a Position for stock dividend if we don't
            # already own the security
            position = self.positions[stock]

            position.amount += share_count
            self._position_amounts[stock] = position.amount
            self._position_last_sale_prices[stock] = position.last_sale_price

        # Add cash equal to the net cash payed from all dividends.  Note that
        # "negative cash" is effectively paid if we're short a security,
        # representing the fact that we're required to reimburse the owner of
        # the stock for any dividends paid while borrowing.
        net_cash_payment = payments['cash_amount'].fillna(0).sum()
        return net_cash_payment

    def create_close_position_transaction(self, event):
        if not self._position_amounts.get(event.sid):
            return None
        txn = Transaction(
            sid=event.sid,
            amount=(-1 * self._position_amounts[event.sid]),
            dt=event.dt,
            price=event.price,
            commission=0,
            order_id=0
        )
        return txn

    def get_positions(self):

        positions = self._positions_store

        for sid, pos in iteritems(self.positions):

            if pos.amount == 0:
                # Clear out the position if it has become empty since the last
                # time get_positions was called.  Catching the KeyError is
                # faster than checking `if sid in positions`, and this can be
                # potentially called in a tight inner loop.
                try:
                    del positions[sid]
                except KeyError:
                    pass
                continue

            # Note that this will create a position if we don't currently have
            # an entry
            position = positions[sid]
            position.amount = pos.amount
            position.cost_basis = pos.cost_basis
            position.last_sale_price = pos.last_sale_price
        return positions

    def get_positions_list(self):
        positions = []
        for sid, pos in iteritems(self.positions):
            if pos.amount != 0:
                positions.append(pos.to_dict())
        return positions

    def __getstate__(self):
        state_dict = {}

        state_dict['positions'] = dict(self.positions)
        state_dict['unpaid_dividends'] = self._unpaid_dividends

        STATE_VERSION = 1
        state_dict[VERSION_LABEL] = STATE_VERSION
        return state_dict

    def __setstate__(self, state):
        OLDEST_SUPPORTED_STATE = 1
        version = state.pop(VERSION_LABEL)

        if version < OLDEST_SUPPORTED_STATE:
            raise BaseException("PositionTracker saved state is too old.")

        self.positions = positiondict()
        # note that positions_store is temporary and gets regened from
        # .positions
        self._positions_store = zp.Positions()

        self._unpaid_dividends = state['unpaid_dividends']

        # Arrays for quick calculations of positions value
        self._position_amounts = OrderedDict()
        self._position_last_sale_prices = OrderedDict()

        self.update_positions(state['positions'])