示例#1
0
class SimpleDataFilterDummyTestCase(unittest.TestCase):
    def setUp(self):
        self.filter = SimpleDataFilterDummy()
        self.fetcher = QuoteFetcher(datetime_index=True, reindex=True)
        self.dates = dateutil.get_startfrom(DATES, '20140801', 20)
        self.startdate, self.enddate = self.dates[0], self.dates[-1]
        self.si, self.ei = map(DATES.index, [self.startdate, self.enddate])

    def tearDown(self):
        self.filter = None

    def test_synth_is_identity(self):
        synth = self.filter.synth
        objs = [None, 1, []]
        self.assertListEqual(objs, [synth(obj) for obj in objs])

    def test_datas_1(self):
        self.assertEqual(len(self.filter.datas), 1)

    def test_datas_2(self):
        self.assertIsInstance(self.filter.datas[0][0], FetcherBase)

    def test_filter_1(self):
        df = self.fetcher.fetch_window('close',
                                       DATES[self.si - window:self.ei + 1])
        df = pd.rolling_sum(df.fillna(0),
                            window) > 2 * pd.rolling_count(df, window)
        df1 = df.shift(1).iloc[window:].astype(bool)
        df2 = self.filter.filter(self.startdate, self.enddate)
        print 'bm', df1.sum(axis=1)
        self.assertTrue(frames_equal(df1, df2))

    def test_filter_2(self):
        df = self.fetcher.fetch_window('close',
                                       DATES[self.si - window:self.ei + 1])
        parent = df.notnull()
        df = df.shift(1)
        df[~parent] = None
        df = pd.rolling_sum(df.fillna(0),
                            window) > 2 * pd.rolling_count(df, window)
        df[~parent] = False
        df = df.iloc[window:]
        self.assertTrue(
            frames_equal(
                df, self.filter.filter(self.startdate, self.enddate, parent)))
示例#2
0
文件: tsret.py 项目: leeong05/orca
class TSRetUpdater(UpdaterBase):
    """The updater class for collections 'ts_ret'."""

    def __init__(self, timeout=60):
        UpdaterBase.__init__(self, timeout=timeout)
        self.interval = IntervalFetcher('1min')
        self.quote = QuoteFetcher()
        self.indexquote = IndexQuoteFetcher()
        self.times = dateutil.generate_intervals(60)

    def pre_update(self):
        self.__dict__.update({
                'dates': self.db.dates.distinct('date'),
                'collection': self.db['ts_ret'],
                })

    def pro_update(self):
        return

        self.logger.debug('Ensuring index dname_1_date_1 on collection {}', self.collection.name)
        self.collection.ensure_index([('dname', 1), ('date', 1)], background=True)

    def update(self, date):
        """Update TinySoft interval returns data(1min, 5min, 15min, 30min, 60min, 120min) for the **same** day after market close."""
        interval = self.interval.fetch_daily('close', self.times, date)
        interval.ix['093000'] = self.quote.fetch_daily('prev_close', date).reindex(index=interval.columns)
        interval = interval.sort_index()
        for i in (1, 5, 15, 30, 60, 120):
            sub_interval = interval.ix[::i]
            sub_ret = sub_interval.pct_change(1).ix[1:]
            key = {'dname': 'returns'+str(i), 'date': date}
            for time, ser in sub_ret.iterrows():
                key.update({'time': time})
                self.db.ts_ret.update(key, {'$set': {'dvalue': ser.dropna().to_dict()}}, upsert=True)
        self.logger.info('UPSERT documents for {} sids into (c: [{}]) of (d: [{}]) on {}', interval.shape[1], self.collection.name, self.db.name, date)

        indice = self.db.tsindex_1min.distinct('dname')
        for index in indice:
            query = {'dname': index, 'date': date}
            proj = {'_id': 0, 'close': 1}
            try:
                ser = pd.DataFrame(list(self.db.tsindex_1min.find(query, proj)))['close']
            except:
                continue
            ser.index = self.times
            prev_close = self.indexquote.fetch_daily('prev_close', date, index=index)
            ser.ix['093000'] = prev_close
            ser = ser.sort_index()
            for i in (5, 15, 30, 60, 120):
                sub_ser = ser.ix[::i]
                sub_ret = sub_ser.pct_change(1).ix[1:]
                key = {'dname': 'returns'+str(i), 'index': index, 'date': date}
                self.db.tsindex_ret.update(key, {'$set': {'dvalue': sub_ret.to_dict()}}, upsert=True)
        self.logger.info('UPSERT documents for {} indice into (c: [{}]) of (d: [{}]) on {}', len(indice), self.db.tsindex_ret.name, self.db.name, date)
示例#3
0
class SimpleDataFilterDummyTestCase(unittest.TestCase):

    def setUp(self):
        self.filter = SimpleDataFilterDummy()
        self.fetcher = QuoteFetcher(datetime_index=True, reindex=True)
        self.dates = dateutil.get_startfrom(DATES, '20140801', 20)
        self.startdate, self.enddate = self.dates[0], self.dates[-1]
        self.si, self.ei = map(DATES.index, [self.startdate, self.enddate])

    def tearDown(self):
        self.filter = None

    def test_synth_is_identity(self):
        synth = self.filter.synth
        objs = [None, 1, []]
        self.assertListEqual(objs, [synth(obj) for obj in objs])

    def test_datas_1(self):
        self.assertEqual(len(self.filter.datas), 1)

    def test_datas_2(self):
        self.assertIsInstance(self.filter.datas[0][0], FetcherBase)

    def test_filter_1(self):
        df = self.fetcher.fetch_window('close', DATES[self.si-window: self.ei+1])
        df = pd.rolling_sum(df.fillna(0), window) > 2 * pd.rolling_count(df, window)
        df1 = df.shift(1).iloc[window:].astype(bool)
        df2 = self.filter.filter(self.startdate, self.enddate)
        print 'bm', df1.sum(axis=1)
        self.assertTrue(frames_equal(df1, df2))

    def test_filter_2(self):
        df = self.fetcher.fetch_window('close', DATES[self.si-window: self.ei+1])
        parent = df.notnull()
        df = df.shift(1)
        df[~parent] = None
        df = pd.rolling_sum(df.fillna(0), window) > 2 * pd.rolling_count(df, window)
        df[~parent] = False
        df = df.iloc[window:]
        self.assertTrue(frames_equal(df, self.filter.filter(self.startdate, self.enddate, parent)))
示例#4
0
    pdobj.index = [ind_name[ind] for ind in pdobj.index]
    return pdobj


def name2industry(pdobj):
    pdobj = pdobj.copy()
    name_ind = {v: k for k, v in industry_fetcher.fetch_info().iteritems()}
    pdobj.index = [name_ind[name] for name in pdobj.index]
    return pdobj


import pandas as pd

from orca import DATES
from orca.mongo.quote import QuoteFetcher
quote_fetcher = QuoteFetcher(datetime_index=True)

import dateutil


def fetch_returns(dt_index, rshift, lshift=-1):
    res = {}
    for dt, date in zip(dt_index, dateutil.to_datestr(dt_index)):
        di, date = dateutil.parse_date(DATES, date, -1)
        if di - lshift < 0 or di + rshift + 1 > len(DATES):
            continue
        r = quote_fetcher.fetch_window('returns',
                                       DATES[di - lshift:di + rshift + 1])
        res[dt] = (1 + r).cumprod().iloc[-1] - 1.
    res = pd.DataFrame(res).T
    return res
示例#5
0
class Performance(object):
    """Class to provide analyser to examine the performance of an alpha from different perspective.

    :param alpha: Alpha to be examined, either a well formatted DataFrame or :py:class:`orca.alpha.base.AlphaBase`
    """

    mongo_lock = Lock()

    quote = QuoteFetcher(datetime_index=True, reindex=True)
    index_quote = IndexQuoteFetcher(datetime_index=True)
    components = ComponentsFetcher(datetime_index=True, reindex=True)

    returns = None
    index_returns = {
        'HS300': None,
    }
    index_components = {'HS300': None, 'CS500': None, 'other': None}

    @classmethod
    def get_returns(cls, startdate):
        if cls.returns is None or startdate < cls.returns.index[0]:
            with cls.mongo_lock:
                cls.returns = cls.quote.fetch(
                    'returns', startdate=startdate.strftime('%Y%m%d'))
        return cls.returns

    @classmethod
    def get_index_returns(cls, startdate, index='HS300'):
        if index not in cls.index_returns or cls.index_returns[
                index] is None or startdate < cls.index_returns[index].index[0]:
            with cls.mongo_lock:
                cls.index_returns[index] = cls.quote.fetch(
                    'returns', startdate=startdate.strftime('%Y%m%d'))
        return cls.index_returns[index]

    @classmethod
    def get_index_components(cls, startdate, index):
        if cls.index_components[
                index] is None or startdate < cls.index_components[
                    index].index[0]:
            with cls.mongo_lock:
                cls.index_components['HS300'] = cls.components.fetch(
                    'HS300', startdate=startdate.strftime('%Y%m%d'))
                cls.index_components['CS500'] = cls.components.fetch(
                    'CS500', startdate=startdate.strftime('%Y%m%d'))
                cls.index_components['other'] = ~(
                    cls.index_components['HS300']
                    | cls.index_components['CS500'])
        return cls.index_components[index]

    @classmethod
    def set_returns(cls, returns):
        """Call this method to set returns so that for future uses, there is no need to interact with MongoDB."""
        with cls.mongo_lock:
            cls.returns = api.format(returns)

    @classmethod
    def set_index_returns(cls, index, returns):
        """Call this method to set index returns so that for future uses, there is no need to interact with MongoDB."""
        with cls.mongo_lock:
            returns.index = pd.to_datetime(returns.index)
            cls.index_returns[index] = returns

    @classmethod
    def set_index_components(cls, index, components):
        """Call this method to set index components data so that for future uses, there is no need to interact with MongoDB."""
        with cls.mongo_lock:
            cls.index_components[index] = api.format(components).fillna(False)

    def __init__(self, alpha):
        if isinstance(alpha, AlphaBase):
            self.alpha = alpha.get_alphas()
        else:
            self.alpha = api.format(alpha)
        self.alpha = self.alpha[np.isfinite(self.alpha)]
        self.startdate = self.alpha.index[0]

    def get_original(self):
        """**Be sure** to use this method when either the alpha is neutralized or you know what you are doing."""
        return Analyser(self.alpha, Performance.get_returns(self.startdate))

    def get_shift(self, n):
        return Analyser(self.alpha.shift(n),
                        Performance.get_returns(self.alpha.index[n]))

    def get_longshort(self):
        """Pretend the alpha can be made into a long/short portfolio."""
        return Analyser(api.neutralize(self.alpha),
                        Performance.get_returns(self.startdate))

    def get_long(self, index=None):
        """Only analyse the long part."""
        return Analyser(self.alpha[self.alpha>0], Performance.get_returns(self.startdate),
                Performance.get_index_returns(self.startdate, index=index)) \
               if index is not None else \
               Analyser(self.alpha[self.alpha>0], Performance.get_returns(self.startdate))

    def get_short(self, index=None):
        """Only analyse the short part."""
        return Analyser(-self.alpha[self.alpha<0], Performance.get_returns(self.startdate),
                Performance.get_index_returns(self.startdate, index=index)) \
               if index is not None else \
               Analyser(-self.alpha[self.alpha<0], Performance.get_returns(self.startdate))

    def get_qtop(self, q, index=None):
        """Only analyse the top quantile as long holding."""
        return Analyser(api.qtop(self.alpha, q), Performance.get_returns(self.startdate),
                Performance.get_index_returns(self.startdate, index=index)) \
               if index is not None else \
               Analyser(api.qtop(self.alpha, q), Performance.get_returns(self.startdate))

    def get_qbottom(self, q, index=None):
        """Only analyse the bottom quantile as long holding."""
        return Analyser(api.qbottom(self.alpha, q), Performance.get_returns(self.startdate),
                Performance.get_index_returns(self.startdate, index=index)) \
               if index is not None else \
               Analyser(api.qbottom(self.alpha, q), Performance.get_returns(self.startdate))

    def get_ntop(self, n, index=None):
        """Only analyse the top n stocks as long holding."""
        return Analyser(api.top(self.alpha, n), Performance.get_returns(self.startdate),
                Performance.get_index_returns(self.startdate, index=index)) \
               if index is not None else \
               Analyser(api.top(self.alpha, n), Performance.get_returns(self.startdate))

    def get_nbottom(self, n, index=None):
        """Only analyse the bottom n stocks as long holding."""
        return Analyser(api.bottom(self.alpha, n), Performance.get_returns(self.startdate),
                Performance.get_index_returns(self.startdate, index=index)) \
               if index is not None else \
               Analyser(api.bottom(self.alpha, n), Performance.get_returns(self.startdate))

    def get_qtail(self, q):
        """Long the top quantile and at the same time short the bottom quantile."""
        return Analyser(
            api.qtop(self.alpha, q).astype(int) -
            api.qbottom(self.alpha, q).astype(int),
            Performance.get_returns(self.startdate))

    def get_ntail(self, n):
        """Long the top n stocks and at the same time short the bottom n stocks."""
        return Analyser(
            api.top(self.alpha, n).astype(int) -
            api.bottom(self.alpha, n).astype(int),
            Performance.get_returns(self.startdate))

    def get_quantiles(self, n):
        """Return a list of analysers for n quantiles."""
        return [Analyser(qt, Performance.get_returns(self.startdate)) \
                for qt in api.quantiles(self.alpha, n)]

    def get_universe(self, univ):
        """Return a performance object for alpha in this universe."""
        return Performance(api.intersect(self.alpha, univ))

    def get_bms(self):
        """Return a list of 3 performance objects for alphas in HS300, CS500 and other."""
        big = Performance.get_index_components(self.startdate,
                                               'HS300').ix[self.alpha.index]
        mid = Performance.get_index_components(self.startdate,
                                               'CS500').ix[self.alpha.index]
        sml = Performance.get_index_components(self.startdate,
                                               'other').ix[self.alpha.index]

        return [self.get_universe(univ) for univ in [big, mid, sml]]
示例#6
0
文件: tsret.py 项目: leeong05/orca
 def __init__(self, timeout=60):
     UpdaterBase.__init__(self, timeout=timeout)
     self.interval = IntervalFetcher('1min')
     self.quote = QuoteFetcher()
     self.indexquote = IndexQuoteFetcher()
     self.times = dateutil.generate_intervals(60)
示例#7
0
class QuoteFetcherTestCase(unittest.TestCase):

    def setUp(self):
        self.fetcher = QuoteFetcher()
        self.dates = get_startfrom(DATES, '2014010', 50)

    def tearDown(self):
        self.fetcher = None

    def test_fetch_window_classmethod(self):
        df1 = self.fetcher.fetch_window('close', self.dates)
        df2 = QuoteFetcher.fetch_window('close', self.dates)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_1(self):
        df1 = self.fetcher.fetch('returns', self.dates[5], self.dates[-1], backdays=5)
        df2 = self.fetcher.fetch('returnsN', 1, self.dates[5], self.dates[-1], backdays=5)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_2(self):
        df1 = self.fetcher.fetch_window('returns', self.dates)
        df2 = self.fetcher.fetch_window('returnsN', 1, self.dates)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_3(self):
        df1 = self.fetcher.fetch_history('returns', self.dates[-1], 45, delay=5)
        df2 = self.fetcher.fetch_history('returnsN', 1, self.dates[-1], 45, delay=5)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_4(self):
        s1 = self.fetcher.fetch_daily('returns', self.dates[-1], offset=49)
        s2 = self.fetcher.fetch_daily('returnsN', 1, self.dates[-1], offset=49)
        self.assertTrue(series_equal(s1, s2))

    def test_returns_N_gt1(self):
        df = self.fetcher.fetch_window('returns', self.dates[:2])
        s1 = (df.iloc[0].fillna(0) + 1) * (df.iloc[1] + 1) - 1
        s1.name = self.dates[1]
        s2 = self.fetcher.fetch_daily('returnsN', 2, self.dates[1])
        self.assertTrue(series_equal(s1, s2))
示例#8
0
 def test_fetch_window_classmethod(self):
     df1 = self.fetcher.fetch_window('close', self.dates)
     df2 = QuoteFetcher.fetch_window('close', self.dates)
     self.assertTrue(frames_equal(df1, df2))
示例#9
0
class QuoteFetcherTestCase(unittest.TestCase):
    def setUp(self):
        self.fetcher = QuoteFetcher()
        self.dates = get_startfrom(DATES, '2014010', 50)

    def tearDown(self):
        self.fetcher = None

    def test_fetch_window_classmethod(self):
        df1 = self.fetcher.fetch_window('close', self.dates)
        df2 = QuoteFetcher.fetch_window('close', self.dates)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_1(self):
        df1 = self.fetcher.fetch('returns',
                                 self.dates[5],
                                 self.dates[-1],
                                 backdays=5)
        df2 = self.fetcher.fetch('returnsN',
                                 1,
                                 self.dates[5],
                                 self.dates[-1],
                                 backdays=5)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_2(self):
        df1 = self.fetcher.fetch_window('returns', self.dates)
        df2 = self.fetcher.fetch_window('returnsN', 1, self.dates)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_3(self):
        df1 = self.fetcher.fetch_history('returns',
                                         self.dates[-1],
                                         45,
                                         delay=5)
        df2 = self.fetcher.fetch_history('returnsN',
                                         1,
                                         self.dates[-1],
                                         45,
                                         delay=5)
        self.assertTrue(frames_equal(df1, df2))

    def test_returns_N_eq1_4(self):
        s1 = self.fetcher.fetch_daily('returns', self.dates[-1], offset=49)
        s2 = self.fetcher.fetch_daily('returnsN', 1, self.dates[-1], offset=49)
        self.assertTrue(series_equal(s1, s2))

    def test_returns_N_gt1(self):
        df = self.fetcher.fetch_window('returns', self.dates[:2])
        s1 = (df.iloc[0].fillna(0) + 1) * (df.iloc[1] + 1) - 1
        s1.name = self.dates[1]
        s2 = self.fetcher.fetch_daily('returnsN', 2, self.dates[1])
        self.assertTrue(series_equal(s1, s2))
示例#10
0
class TSRetUpdater(UpdaterBase):
    """The updater class for collections 'ts_ret'."""
    def __init__(self, timeout=60):
        UpdaterBase.__init__(self, timeout=timeout)
        self.interval = IntervalFetcher('1min')
        self.quote = QuoteFetcher()
        self.indexquote = IndexQuoteFetcher()
        self.times = dateutil.generate_intervals(60)

    def pre_update(self):
        self.__dict__.update({
            'dates': self.db.dates.distinct('date'),
            'collection': self.db['ts_ret'],
        })

    def pro_update(self):
        return

        self.logger.debug('Ensuring index dname_1_date_1 on collection {}',
                          self.collection.name)
        self.collection.ensure_index([('dname', 1), ('date', 1)],
                                     background=True)

    def update(self, date):
        """Update TinySoft interval returns data(1min, 5min, 15min, 30min, 60min, 120min) for the **same** day after market close."""
        interval = self.interval.fetch_daily('close', self.times, date)
        interval.ix['093000'] = self.quote.fetch_daily(
            'prev_close', date).reindex(index=interval.columns)
        interval = interval.sort_index()
        for i in (1, 5, 15, 30, 60, 120):
            sub_interval = interval.ix[::i]
            sub_ret = sub_interval.pct_change(1).ix[1:]
            key = {'dname': 'returns' + str(i), 'date': date}
            for time, ser in sub_ret.iterrows():
                key.update({'time': time})
                self.db.ts_ret.update(
                    key, {'$set': {
                        'dvalue': ser.dropna().to_dict()
                    }},
                    upsert=True)
        self.logger.info(
            'UPSERT documents for {} sids into (c: [{}]) of (d: [{}]) on {}',
            interval.shape[1], self.collection.name, self.db.name, date)

        indice = self.db.tsindex_1min.distinct('dname')
        for index in indice:
            query = {'dname': index, 'date': date}
            proj = {'_id': 0, 'close': 1}
            try:
                ser = pd.DataFrame(list(self.db.tsindex_1min.find(
                    query, proj)))['close']
            except:
                continue
            ser.index = self.times
            prev_close = self.indexquote.fetch_daily('prev_close',
                                                     date,
                                                     index=index)
            ser.ix['093000'] = prev_close
            ser = ser.sort_index()
            for i in (5, 15, 30, 60, 120):
                sub_ser = ser.ix[::i]
                sub_ret = sub_ser.pct_change(1).ix[1:]
                key = {
                    'dname': 'returns' + str(i),
                    'index': index,
                    'date': date
                }
                self.db.tsindex_ret.update(
                    key, {'$set': {
                        'dvalue': sub_ret.to_dict()
                    }}, upsert=True)
        self.logger.info(
            'UPSERT documents for {} indice into (c: [{}]) of (d: [{}]) on {}',
            len(indice), self.db.tsindex_ret.name, self.db.name, date)
示例#11
0
 def setUp(self):
     self.filter = SimpleDataFilterDummy()
     self.fetcher = QuoteFetcher(datetime_index=True, reindex=True)
     self.dates = dateutil.get_startfrom(DATES, '20140801', 20)
     self.startdate, self.enddate = self.dates[0], self.dates[-1]
     self.si, self.ei = map(DATES.index, [self.startdate, self.enddate])
示例#12
0
 def setUp(self):
     self.filter = SimpleDataFilterDummy()
     self.fetcher = QuoteFetcher(datetime_index=True, reindex=True)
     self.dates = dateutil.get_startfrom(DATES, '20140801', 20)
     self.startdate, self.enddate = self.dates[0], self.dates[-1]
     self.si, self.ei = map(DATES.index, [self.startdate, self.enddate])
示例#13
0
class AlphaCombinerBase(object):
    """Base class to combine alphas.

    :param int periods: How many days of returns as the predicted variable?

    .. note::

       This is a base class and should not be used directly.
    """

    __metaclass__ = abc.ABCMeta

    LOGGER_NAME = 'combiner'

    mongo_lock = Lock()
    quote = QuoteFetcher(datetime_index=True, reindex=True)
    returns = None

    @classmethod
    def get_returns(cls, n, startdate):
        if cls.returns is None or n not in cls.returns or\
                cls.returns[n].index[0].strftime('%Y%m%d'):
            with cls.mongo_lock:
                cls.returns[n] = cls.quote.fetch('returnsN', n, startdate, None, n)
        return cls.returns[n]

    def __init__(self, periods, debug_on=True, **kwargs):
        self.periods = periods
        self.logger = logbook.Logger(AlphaCombinerBase.LOGGER_NAME)
        self.debug_on = debug_on
        self.name_alpha = OrderedDict()
        self.data = None
        self.__dict__.update(kwargs)

    def debug(self, msg):
        """Logs a message with level DEBUG on the alpha logger."""
        if self.debug_on:
            self.logger.debug(msg)

    def info(self, msg):
        """Logs a message with level INFO on the alpha logger."""
        self.logger.info(msg)

    def warning(self, msg):
        """Logs a message with level WARNING on the alpha logger."""
        self.logger.warning(msg)

    def error(self, msg):
        """Logs a message with level ERROR on the alpha logger."""
        self.logger.error(msg)

    def critical(self, msg):
        """Logs a message with level CRITICAL on the alpha logger."""
        self.logger.critical(msg)

    def add_alpha(self, name, alpha, ftype=None, preprocess=False):
        """
        :param DataFrame alpha: Alpha to be added
        """
        alpha.index = pd.to_datetime(alpha.index)
        if preprocess:
            alpha = self.preprocess(alpha)
        self.name_alpha[name] = alpha
        self.info('Added alpha {}'.format(name))

    def __setitem__(self, name, alpha):
        """Convenient method wrapper of :py:meth:`add_alpha`."""
        self.add_alpha(name, alpha)
        self.info('Added alpha {}'.format(name))

    def prepare_data(self):
        """Prepare inputs for regression."""
        X = pd.Panel.from_dict(self.name_alpha, intersect=False)
        X = X.to_frame(filter_observations=False).dropna(how='all')
        X.index.names = ['date', 'sid']
        self.data = X.reset_index()
        self.data.index = X.index

        startdate, enddate = self.data['date'].min().strftime('%Y%m%d'), self.data['date'].max().strftime('%Y%m%d')
        Y = self.get_returns(self.periods, startdate)[:enddate]
        Y = Y.shift(-self.periods).iloc[self.periods:]
        Y = Y.stack().ix[X.index]
        self.data['returns'] = Y

        self.info('Data prepared')

    def get_XY(self, start=None, end=None):
        data = self.data
        if start is not None:
            data = data.query('date >= {!r}'.format(str(start)))
        if end is not None:
            data = data.query('date <= {!r}'.format(str(end)))
        X, Y = data.iloc[:, 2:-1], data.iloc[:, -1]
        return X, Y

    @abc.abstractmethod
    def normalize(self):
        raise NotImplementedError

    @abc.abstractmethod
    def fit(self, X, Y):
        raise NotImplementedError

    @abc.abstractmethod
    def preprocess(self, alpha):
        raise NotImplementedError
示例#14
0
"""
.. moduleauthor:: Li, Wang <*****@*****.**>
"""

from orca.mongo.quote import QuoteFetcher
from orca.alpha.base import BacktestingAlpha

quote = QuoteFetcher()


class MyAlpha(BacktestingAlpha):
    def generate(self, date):
        self[date] = quote.fetch_daily('close', date, offset=1)


if __name__ == '__main__':
    start, end = '20140103', '20140131'
    alpha = MyAlpha()
    alpha.run(start, end)
    alpha.dump('alpha_mongo.csv')
示例#15
0
 def __init__(self, timeout=60):
     UpdaterBase.__init__(self, timeout=timeout)
     self.interval = IntervalFetcher('1min')
     self.quote = QuoteFetcher()
     self.indexquote = IndexQuoteFetcher()
     self.times = dateutil.generate_intervals(60)
示例#16
0
class AdjQuoteFetcherTestCase(unittest.TestCase):
    def setUp(self):
        self.adjfetcher = AdjQuoteFetcher()
        self.fetcher = QuoteFetcher()
        self.dates = get_startfrom(DATES, '20140101', 50)

    def tearDown(self):
        self.adjfetcher = None
        self.fetcher = None

    def test_noadjust(self):
        ret1 = self.adjfetcher.fetch_window('adj_returns', self.dates)
        ret2 = self.fetcher.fetch_window('returns', self.dates)
        amt1 = self.adjfetcher.fetch_window('adj_amount', self.dates)
        amt2 = self.fetcher.fetch_window('amount', self.dates)
        self.assertTrue(frames_equal(ret1, ret2) & frames_equal(amt1, amt2))

    def test_price1(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.assertTrue((cls1.index == cls2.index).all())

    def test_price2(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.assertTrue(series_equal(cls1.iloc[-1], cls2.iloc[-1]))

    def test_price3(self):
        self.adjfetcher.mode = AdjQuoteFetcher.FORWARD
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.adjfetcher.mode = AdjQuoteFetcher.BACKWARD
        self.assertTrue(series_equal(cls1.iloc[0], cls2.iloc[0]))

    def test_price4(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.assertTrue(frames_equal(cls1.notnull(), cls2.notnull()))

    def test_price5(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates).fillna(0)
        cls2 = self.fetcher.fetch_window('close', self.dates).fillna(0)
        print pd.concat(
            [cls1['000002'], cls2['000002'], cls1['000002'] <= cls2['000002']],
            axis=1)
        print self.adjfetcher.cax.fetch_window('adjfactor',
                                               self.dates)['000002']
        self.assertTrue((cls1 <= cls2 + 0.01).all().all())

    def test_volume1(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.assertTrue((vol1.index == vol2.index).all())

    def test_volume2(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.assertTrue(series_equal(vol1.iloc[-1], vol2.iloc[-1]))

    def test_volume3(self):
        self.adjfetcher.mode = AdjQuoteFetcher.FORWARD
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.adjfetcher.mode = AdjQuoteFetcher.BACKWARD
        self.assertTrue(series_equal(vol1.iloc[0], vol2.iloc[0]))

    def test_volume4(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.assertTrue(frames_equal(vol1.notnull(), vol2.notnull()))

    def test_volume5(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates).fillna(0)
        vol2 = self.fetcher.fetch_window('volume', self.dates).fillna(0)
        self.assertTrue((vol1 + 1 >= vol2).all().all())

    def test_history1(self):
        cls = self.adjfetcher.fetch_history('adj_close', self.dates[-1],
                                            len(self.dates) - 1)
        self.assertListEqual(list(cls.index), self.dates[:-1])

    def test_history2(self):
        cls = self.adjfetcher.fetch_history('adj_close',
                                            self.dates[-1],
                                            len(self.dates),
                                            delay=0)
        self.assertListEqual(list(cls.index), self.dates)

    def test_history3(self):
        cls1 = self.adjfetcher.fetch_history('adj_close', self.dates[-1],
                                             len(self.dates) - 1)
        cls2 = self.adjfetcher.fetch_window('adj_close', self.dates[:-1],
                                            self.dates[-1])
        self.assertTrue(frames_equal(cls1, cls2))

    def test_adjust(self):
        self.assertRaises(ValueError, self.adjfetcher.fetch_window,
                          'adj_close', self.dates, self.dates[2])
示例#17
0
"""
.. moduleauthor:: Li, Wang <*****@*****.**>
"""

from orca.alpha.base import BacktestingAlpha
from orca.mongo.quote import QuoteFetcher
from orca.utils import parallel

close = QuoteFetcher(datetime_index=True,
                     reindex=True).fetch('close', 20140101, 20140131)


class AlphaDummy(BacktestingAlpha):
    def __init__(self, n=None):
        super(AlphaDummy, self).__init__()
        self.n = n

    def generate(self, date):
        self.alphas[date] = close.ix[date]


def generate_params1():
    for i in xrange(10):
        yield {'n': i}


alphas = parallel.run(AlphaDummy, generate_params1(), 20140101, 20140131)

for param, alpha in alphas:
    print param
示例#18
0
"""
.. moduleauthor:: Li, Wang<*****@*****.**>
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

import magic

from orca.mongo.quote import QuoteFetcher
quote = QuoteFetcher(datetime_index=True, reindex=True)

from orca import DATES
from orca.utils.io import read_frame
from orca.utils import dateutil
from orca.operation import api


class Event(object):
    def __init__(self, alpha, rshifts, lshifts):
        self.alpha = self.rebase_index(alpha)
        unstacked = self.alpha.unstack()
        self.index = unstacked[unstacked].index
        self.rshifts, self.lshifts = rshifts, lshifts
        self.returns = self.fetch_returns()

    @staticmethod
    def rebase_index(alpha):
        res = {}
示例#19
0
 def test_fetch_window_classmethod(self):
     df1 = self.fetcher.fetch_window('close', self.dates)
     df2 = QuoteFetcher.fetch_window('close', self.dates)
     self.assertTrue(frames_equal(df1, df2))
示例#20
0
"""
.. moduleauthor:: Li, Wang <*****@*****.**>
"""

from string import Template

import numpy as np
import pandas as pd
import logbook
logbook.set_datetime_format('local')
logger = logbook.Logger('plotter')

from orca.mongo.quote import QuoteFetcher
quote_fetcher = QuoteFetcher(datetime_index=True)
from orca.mongo.index import IndexQuoteFetcher
index_quote_fetcher = IndexQuoteFetcher(datetime_index=True)


def generate_path(path_pattern, sid, date):
    return Template(path_pattern).substitute(sid=sid,
                                             YYYYMMDD=date,
                                             YYYYMM=date[:6],
                                             YYYY=date[:4],
                                             MM=date[4:6],
                                             DD=date[6:8])


def get_returns(sid, pattern=None):
    try:
        returns = index_quote_fetcher.fetch_window('returns',
                                                   shift_dates,
示例#21
0
文件: plot_pnl.py 项目: leeong05/orca
"""
.. moduleauthor:: Li, Wang <*****@*****.**>
"""

from string import Template

import numpy as np
import pandas as pd
import logbook
logbook.set_datetime_format('local')
logger = logbook.Logger('plotter')

from orca.mongo.quote import QuoteFetcher
quote_fetcher = QuoteFetcher(datetime_index=True)
from orca.mongo.index import IndexQuoteFetcher
index_quote_fetcher = IndexQuoteFetcher(datetime_index=True)

def generate_path(path_pattern, sid, date):
    return Template(path_pattern).substitute(sid=sid,
                YYYYMMDD=date, YYYYMM=date[:6], YYYY=date[:4], MM=date[4:6], DD=date[6:8])

def get_returns(sid, pattern=None):
    try:
        returns = index_quote_fetcher.fetch_window('returns', shift_dates, index=sid)
    except:
        assert pattern is not None
        path0 = generate_path(pattern, sid, dates[0])
        path1 = generate_path(pattern, sid, dates[-1])
        if path0 == path1:
            df = read_frame(path0)
            df.index = pd.to_datetime(shift_dates)
示例#22
0
)
from orca.utils import dateutil
from orca.universe.special import (
    TickerFilter,
    TradingDaysFilter,
    ActiveFilter,
    ComponentsFilter,
    IndustryFilter,
)
from orca.universe.rules import (
    startswith,
    count_gt,
)

window = dateutil.get_startfrom(DATES, '20140104', 50)
close = QuoteFetcher(datetime_index=True,
                     reindex=True).fetch_window('close', window)
hs300 = ComponentsFetcher(datetime_index=True,
                          reindex=True).fetch_window('HS300', window)
sector = IndustryFetcher(datetime_index=True,
                         reindex=True).fetch_window('sector', window)


class SpecialTestCase(unittest.TestCase):
    def test_ticker_filter(self):
        sh = TickerFilter(startswith('60')).filter_daily(window[0])
        dct = {sid: sid[:2] == '60' for sid in SIDS}
        self.assertEqual(sh.to_dict(), dct)

    def test_trading_days_filter(self):
        trd_filter = TradingDaysFilter(50, count_gt(40), delay=0)
        trd1 = trd_filter.filter_daily(window[-1])
示例#23
0
class AdjQuoteFetcherTestCase(unittest.TestCase):

    def setUp(self):
        self.adjfetcher = AdjQuoteFetcher()
        self.fetcher = QuoteFetcher()
        self.dates = get_startfrom(DATES, '20140101', 50)

    def tearDown(self):
        self.adjfetcher = None
        self.fetcher = None

    def test_noadjust(self):
        ret1 = self.adjfetcher.fetch_window('adj_returns', self.dates)
        ret2 = self.fetcher.fetch_window('returns', self.dates)
        amt1 = self.adjfetcher.fetch_window('adj_amount', self.dates)
        amt2 = self.fetcher.fetch_window('amount', self.dates)
        self.assertTrue(frames_equal(ret1, ret2) & frames_equal(amt1, amt2))

    def test_price1(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.assertTrue((cls1.index == cls2.index).all())

    def test_price2(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.assertTrue(series_equal(cls1.iloc[-1], cls2.iloc[-1]))

    def test_price3(self):
        self.adjfetcher.mode = AdjQuoteFetcher.FORWARD
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.adjfetcher.mode = AdjQuoteFetcher.BACKWARD
        self.assertTrue(series_equal(cls1.iloc[0], cls2.iloc[0]))

    def test_price4(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates)
        cls2 = self.fetcher.fetch_window('close', self.dates)
        self.assertTrue(frames_equal(cls1.notnull(), cls2.notnull()))

    def test_price5(self):
        cls1 = self.adjfetcher.fetch_window('adj_close', self.dates).fillna(0)
        cls2 = self.fetcher.fetch_window('close', self.dates).fillna(0)
        print pd.concat([cls1['000002'], cls2['000002'], cls1['000002'] <= cls2['000002']], axis=1)
        print self.adjfetcher.cax.fetch_window('adjfactor', self.dates)['000002']
        self.assertTrue((cls1 <= cls2+0.01).all().all())

    def test_volume1(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.assertTrue((vol1.index == vol2.index).all())

    def test_volume2(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.assertTrue(series_equal(vol1.iloc[-1], vol2.iloc[-1]))

    def test_volume3(self):
        self.adjfetcher.mode = AdjQuoteFetcher.FORWARD
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.adjfetcher.mode = AdjQuoteFetcher.BACKWARD
        self.assertTrue(series_equal(vol1.iloc[0], vol2.iloc[0]))

    def test_volume4(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates)
        vol2 = self.fetcher.fetch_window('volume', self.dates)
        self.assertTrue(frames_equal(vol1.notnull(), vol2.notnull()))

    def test_volume5(self):
        vol1 = self.adjfetcher.fetch_window('adj_volume', self.dates).fillna(0)
        vol2 = self.fetcher.fetch_window('volume', self.dates).fillna(0)
        self.assertTrue((vol1+1 >= vol2).all().all())

    def test_history1(self):
        cls = self.adjfetcher.fetch_history('adj_close', self.dates[-1], len(self.dates)-1)
        self.assertListEqual(list(cls.index), self.dates[:-1])

    def test_history2(self):
        cls = self.adjfetcher.fetch_history('adj_close', self.dates[-1], len(self.dates), delay=0)
        self.assertListEqual(list(cls.index), self.dates)

    def test_history3(self):
        cls1 = self.adjfetcher.fetch_history('adj_close', self.dates[-1], len(self.dates)-1)
        cls2 = self.adjfetcher.fetch_window('adj_close', self.dates[:-1], self.dates[-1])
        self.assertTrue(frames_equal(cls1, cls2))

    def test_adjust(self):
        self.assertRaises(ValueError, self.adjfetcher.fetch_window, 'adj_close', self.dates, self.dates[2])
示例#24
0
 def setUp(self):
     self.fetcher = QuoteFetcher()
     self.dates = get_startfrom(DATES, '2014010', 50)
示例#25
0
 def setUp(self):
     self.adjfetcher = AdjQuoteFetcher()
     self.fetcher = QuoteFetcher()
     self.dates = get_startfrom(DATES, '20140101', 50)
示例#26
0
"""
.. moduleauthor:: Li, Wang <*****@*****.**>
"""

from orca.mongo.quote import QuoteFetcher
from orca.data.csv import CSVSaver

quote = QuoteFetcher()
close = quote.fetch('close', '20140101', '20140131')

saver = CSVSaver('cache')
saver['close'] = close
示例#27
0
"""

import os

import pandas as pd
import warnings
warnings.simplefilter(action='ignore',
                      category=pd.core.common.SettingWithCopyWarning)
from lxml import etree

from orca import DATES
from orca.barra.base import BarraOptimizerBase
from orca.mongo.barra import BarraFetcher
barra_fetcher = BarraFetcher('short')
from orca.mongo.quote import QuoteFetcher
quote_fetcher = QuoteFetcher()
from orca.mongo.index import IndexQuoteFetcher
index_quote_fetcher = IndexQuoteFetcher()
from orca.mongo.components import ComponentsFetcher
components_fetcher = ComponentsFetcher(as_bool=False)

config = etree.XML("""<Optimize>
<Assets><Composite/></Assets>
<InitPortfolio/>
<Universe/>
<RiskModel path="/home/SambaServer/extend_data/Barra/short/${YYYY}/${MM}/${DD}" name="CNE5S"/>
<Case>
  <Utility/>
  <Constraints>
    <HedgeConstraints>
      <Leverage>