Пример #1
0
 def update(self, dates):
     """Update alpha performance metrics for the **same** day after market close."""
     dnames = self.db.alpha.distinct('dname')
     si, ei = map(self.dates.index, [dates[0], dates[-1]])
     BTOP70Q = univ_fetcher.fetch_window('BTOP70Q', self.dates[si-20: ei+1])
     cnt = 0
     for dname in dnames:
         if self.options['alphas'] and dname not in self.options['alphas']:
             continue
         cursor = self.db.alpha.find(
                 {'dname': dname, 'date': {'$gte': self.dates[si-20], '$lte': dates[-1]}},
                 {'_id': 0, 'dvalue': 1, 'date': 1})
         alpha = pd.DataFrame({row['date']: row['dvalue'] for row in cursor}).T
         if len(alpha) == 0:
             continue
         perf = Performance(alpha)
         # original
         analyser = perf.get_longshort()
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'longshort', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         # quantile
         analyser = perf.get_qtail(0.3)
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'quantile30', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         # universe(s)
         analyser = perf.get_universe(BTOP70Q).get_longshort()
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'BTOP70Q', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         # top
         analyser = perf.get_qtop(0.3)
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'top30', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         cnt += 1
     if len(dates) == 1:
         self.logger.info('UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) on {}',
                 cnt, self.collection.name, self.db.name, dates[0])
     else:
         self.logger.info('UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) from {} to {}',
                 cnt, self.collection.name, self.db.name, dates[0], dates[-1])
Пример #2
0
def run_hdf(store,
            alpha,
            params,
            startdate,
            enddate,
            predicate=None,
            threads=multiprocessing.cpu_count()):
    """Execute instances of an alpha in parallel and stores DataFrame in HDF5 file. Each item in params should be a ``dict``.

    :param store: File path of the to-be-created HDFStore
    :param function predicate: A function with :py:class:`orca.perf.performance.Performance` object as the only parameter; for example: ``lambda x: x.get_original().get_ir() > 0.1``. Default: None
    """
    if os.path.exists(store):
        os.remove(store)
    logger = logbook.Logger(store)
    store = pd.HDFStore(store)

    iterobj = ((i, alpha, param, startdate, enddate)
               for i, param in enumerate(params))
    pool = multiprocessing.Pool(threads)
    res = pool.imap_unordered(worker_hdf, iterobj)
    pool.close()
    pool.join()
    for i, param, alpha in res:
        if predicate is not None and not predicate(Performance(alpha)):
            continue
        store['alpha' + str(i)] = alpha
        store.append('params', pd.DataFrame({i: param}).T)
        store.flush()
        logger.debug('Saving alpha with parameter: {!r}'.format(param))
    store.close()
Пример #3
0
    def monitor(self, dates):
        """Monitor alpha performance metrics for the **same** day after market close."""
        dnames = self.db.alpha.distinct('dname')
        si, ei = map(self.dates.index, [dates[0], dates[-1]])
        tradable = misc_fetcher.fetch_window('tradable', self.dates[si-20: ei+1]).astype(bool)
        TOP70Q = univ_fetcher.fetch_window('TOP70Q', self.dates[si-20: ei+1])
        CS500 = components_fetcher.fetch_window('CS500', self.dates[si-20: ei+1])

        cursor = self.monitor_connection.cursor()
        SQL1 = "SELECT * FROM alpha_category WHERE name=%s"
        SQL2 = "INSERT INTO alpha_category (name, category) VALUES (%s, %s)"
        for dname in dnames:
            if self.options['alphas'] and dname not in self.options['alphas']:
                continue
            if self.options['excludes'] and dname in self.options['excludes']:
                continue
            if self.options['pattern'] is not None and not self.options['pattern'].search(dname):
                continue
            if self.options['exclude_pattern'] is not None and self.options['exclude_pattern'].search(dname):
                continue

            self.logger.info('Processing {}', dname)

            cursor.execute(SQL1, (dname,))
            if not list(cursor):
                if self.options['category'] is None:
                    category = raw_input('Specify a category for %s: ' % dname)
                else:
                    category = self.options['category']
                cursor.execute(SQL2, (dname, category))
            cursor.execute(SQL1, (dname,))
            alpha_id = list(cursor)[0][0]

            cur = self.db.alpha.find(
                    {'dname': dname, 'date': {'$gte': self.dates[si-21], '$lte': dates[-1]}},
                    {'_id': 0, 'dvalue': 1, 'date': 1})
            alpha = pd.DataFrame({row['date']: row['dvalue'] for row in cur}).T
            if len(alpha) == 0:
                continue

            perf = Performance(alpha)
            self.monitor_alpha(perf.get_universe(tradable), 'tradable', dates, alpha_id, cursor)
            self.monitor_alpha(perf.get_universe(TOP70Q), 'TOP70Q', dates, alpha_id, cursor)
            self.monitor_alpha(perf.get_universe(CS500), 'CS500', dates, alpha_id, cursor)
            self.logger.info('MONITOR for {} from {} to {}', dname, dates[0], dates[-1])
Пример #4
0
def run_separate_file(outdir,
                      alpha,
                      params,
                      startdate,
                      enddate,
                      predicate=None,
                      threads=multiprocessing.cpu_count(),
                      ftype='csv'):
    """Execute instances of an alpha in parallel and stores each DataFrame in separate file. Each item in params should be a ``dict``.

    :param outdir: Diretory to store output files
    :param function predicate: A function with :py:class:`orca.perf.performance.Performance` object as the only parameter; for example: ``lambda x: x.get_original().get_ir() > 0.1``. Default: None
    :param str ftype: File format; currently only supports ('csv', 'pickle', 'msgpack')
    """
    if os.path.exists(outdir) and os.path.isdir(outdir):
        shutil.rmtree(outdir)
    logger = logbook.Logger(outdir)
    os.makedirs(outdir)

    iterobj = ((i, alpha, param, startdate, enddate)
               for i, param in enumerate(params))
    pool = multiprocessing.Pool(threads)
    res = pool.imap_unordered(worker_hdf, iterobj)
    pool.close()
    pool.join()
    params = {}
    for i, param, alpha in res:
        if predicate is not None and not predicate(Performance(alpha)):
            continue
        params[i] = param
        logger.debug('Saving alpha with parameter: {!r}'.format(param))
        if ftype == 'csv':
            alpha.to_csv(os.path.join(outdir, 'alpha' + str(i)))
        elif ftype == 'pickle':
            alpha.to_pickle(os.path.join(outdir, 'alpha' + str(i)))
        elif ftype == 'msgpack':
            alpha.to_msgpack(os.path.join(outdir, 'alpha' + str(i)))

    with open(os.path.join(outdir, 'params.json'), 'w') as file:
        if ftype == 'csv':
            json.dump(params, file)
        elif ftype == 'pickle':
            cPickle.dump(params, file)
        elif ftype == 'msgpack':
            msgpack.dump(params, file)
Пример #5
0
 def setUp(self):
     self.alpha1 = Analyser(alpha1, data=data)
     self.alpha2 = Analyser(alpha2, data=data)
     self.alpha3 = Analyser(alpha3, data=data)
     self.perf = Performance(alpha1)
     self.perf.set_returns(data)
Пример #6
0
class PerfTestCase(unittest.TestCase):
    def setUp(self):
        self.alpha1 = Analyser(alpha1, data=data)
        self.alpha2 = Analyser(alpha2, data=data)
        self.alpha3 = Analyser(alpha3, data=data)
        self.perf = Performance(alpha1)
        self.perf.set_returns(data)

    def tearDown(self):
        self.alpha1 = None
        self.alpha2 = None
        self.alpha3 = None
        self.perf = None

    def test_get1(self):
        self.assertTrue(
            frames_equal(self.perf.get_long().alpha, self.alpha2.alpha)
            and frames_equal(self.perf.get_short().alpha, self.alpha3.alpha))

    def test_get_bms(self):
        b, m, s = self.perf.get_bms()
        b, m, s = b.alpha, m.alpha, s.alpha
        self.assertTrue(
            series_equal(self.perf.alpha.count(axis=1),
                         b.count(axis=1) + m.count(axis=1) + s.count(axis=1)))

    def test_init_1(self):
        self.assertTrue(np.allclose(np.abs(self.alpha1.alpha).sum(axis=1), 1))

    def test_init_2(self):
        self.assertTrue(np.allclose(np.abs(self.alpha2.alpha).sum(axis=1), 1))

    def test_returns1(self):
        ret1 = self.alpha1.get_returns(cost=0)
        ret2 = self.alpha2.get_returns(cost=0)
        ret3 = self.alpha3.get_returns(cost=0)
        self.assertTrue(np.allclose(ret1 * 2, ret2 - ret3))

    def test_returns2(self):
        ret1 = self.alpha1.get_returns(cost=0)
        self.assertTrue((ret1.index == dates[1:]).all())

    def test_turnover(self):
        # |((l1-s1)-(l2-s2))| = |(l1-l2) - (s1-s2)| <= |l1-l2| + |s1-s2|
        tvr1 = self.alpha1.get_turnover()
        tvr2 = self.alpha2.get_turnover()
        tvr3 = self.alpha3.get_turnover()
        self.assertTrue(np.allclose(tvr1 * 2, tvr2 + tvr3))

    def test_ac(self):
        ac1 = self.alpha1.get_ac()
        ac2 = self.alpha1.alpha.corrwith(self.alpha1.alpha.shift(1),
                                         axis=1).iloc[1:]
        self.assertFalse(np.allclose(ac1, ac2))

    def test_ic(self):
        ic1 = self.alpha1.get_ic()
        ic2 = data.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:]
        self.assertTrue(series_equal(ic1, ic2))

    def test_summary_ir1(self):
        ir = self.alpha1.summary_ir()
        self.assertListEqual(['days', 'IR1', 'rIR1'], list(ir.index))

    def test_summary_ir2(self):
        ir = self.alpha1.summary_ir(by='A')
        self.assertEqual(len(ir.columns), 1)

    def test_summary_turnover(self):
        tvr = self.alpha1.summary_turnover(freq='weekly')
        self.assertListEqual(['turnover', 'AC1', 'rAC1', 'AC5', 'rAC5'],
                             list(tvr.index))

    def test_summary_returns(self):
        self.alpha1.summary_returns(cost=0.001)
        self.assertTrue(True)

    def test_summary(self):
        ir1 = self.alpha1.summary(group='ir', by='A')
        ir2 = self.alpha1.summary_ir(by='A')
        self.assertTrue(frames_equal(ir1, ir2))
Пример #7
0
    if args.pdf and os.path.exists(args.pdf):
        with magic.Magic() as m:
            ftype = m.id_filename(args.pdf)
            if ftype[:3] != 'PDF':
                print 'The argument --pdf if exists must be a PDF file'
                exit(0)
    if args.png and os.path.exists(args.png):
        with magic.Magic() as m:
            ftype = m.id_filename(args.png)
            if ftype[:3] != 'PNG':
                print 'The argument --png if exists must be a PNG file'
                exit(0)

    alpha = read_frame(args.alpha, args.ftype)
    perf = Performance(alpha)
    plotter = QuantilesPlotter(perf.get_quantiles(args.quantile))

    figs = []
    if 'pnl' in args.plot:
        fig = plotter.plot_pnl(args.startdate, args.enddate)
        figs.append(fig)
    if 'returns' in args.plot:
        fig = plotter.plot_returns(args.by, args.startdate, args.enddate)
        figs.append(fig)

    if args.pdf:
        pdf = os.path.basename(args.alpha) + '-' + str(args.quantile) + '.pdf'
        if os.path.exists(pdf):
            os.remove(pdf)
        pp = PdfPages(pdf)
Пример #8
0
                print '[WARNING] Failed to parse file', alpha
    else:
        for alpha in args.alphas:
            try:
                alphadf = read_frame(alpha, args.ftype)
                if args.atype is None:
                    if len(alphadf.index) == len(np.unique(
                            alphadf.index.date)):
                        args.atype = 'daily'
                    else:
                        args.atype = 'intraday'

                if args.atype == 'intraday':
                    perf = IntPerformance(alphadf)
                else:
                    perf = Performance(alphadf)
                alphas[alpha] = perf
            except:
                print '[WARNING] Failed to parse file', alpha

    if args.univ:
        assert args.univ in univ_fetcher.dnames
        dates = np.unique([dt.strftime('%Y%m%d') for dt in perf.alpha.index])
        univ = univ_fetcher.fetch_window(args.univ, dates)
        alphas = {
            alpha: perf.get_universe(univ)
            for alpha, perf in alphas.iteritems()
        }

    if args.longonly:
        if args.quantile:
Пример #9
0
    if args.pdf and os.path.exists(args.pdf):
        with magic.Magic() as m:
            ftype = m.id_filename(args.pdf)
            if ftype[:3] != 'PDF':
                print 'The argument --pdf if exists must be a PDF file'
                exit(0)
    if args.png and os.path.exists(args.png):
        with magic.Magic() as m:
            ftype = m.id_filename(args.png)
            if ftype[:3] != 'PNG':
                print 'The argument --png if exists must be a PNG file'
                exit(0)

    alpha = read_frame(args.alpha, args.ftype)
    perf = Performance(alpha)
    plotter = QuantilesPlotter(perf.get_quantiles(args.quantile))

    figs = []
    if 'pnl' in args.plot:
        fig = plotter.plot_pnl(args.startdate, args.enddate)
        figs.append(fig)
    if 'returns' in args.plot:
        fig = plotter.plot_returns(args.by, args.startdate, args.enddate)
        figs.append(fig)

    if args.pdf:
        pdf = os.path.basename(args.alpha)+'-'+str(args.quantile)+'.pdf'
        if os.path.exists(pdf):
            os.remove(pdf)
        pp = PdfPages(pdf)
Пример #10
0
 def update(self, dates):
     """Update alpha performance metrics for the **same** day after market close."""
     dnames = self.db.alpha.distinct('dname')
     si, ei = map(self.dates.index, [dates[0], dates[-1]])
     BTOP70Q = univ_fetcher.fetch_window('BTOP70Q',
                                         self.dates[si - 20:ei + 1])
     cnt = 0
     for dname in dnames:
         if self.options['alphas'] and dname not in self.options['alphas']:
             continue
         cursor = self.db.alpha.find(
             {
                 'dname': dname,
                 'date': {
                     '$gte': self.dates[si - 20],
                     '$lte': dates[-1]
                 }
             }, {
                 '_id': 0,
                 'dvalue': 1,
                 'date': 1
             })
         alpha = pd.DataFrame(
             {row['date']: row['dvalue']
              for row in cursor}).T
         if len(alpha) == 0:
             continue
         perf = Performance(alpha)
         # original
         analyser = perf.get_longshort()
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'longshort', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         # quantile
         analyser = perf.get_qtail(0.3)
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'quantile30', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         # universe(s)
         analyser = perf.get_universe(BTOP70Q).get_longshort()
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'BTOP70Q', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         # top
         analyser = perf.get_qtop(0.3)
         for date in dates:
             if date not in alpha.index:
                 continue
             key = {'alpha': dname, 'mode': 'top30', 'date': date}
             metrics = self.get_metrics(analyser, date)
             self.collection.update(key, {'$set': metrics}, upsert=True)
         cnt += 1
     if len(dates) == 1:
         self.logger.info(
             'UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) on {}',
             cnt, self.collection.name, self.db.name, dates[0])
     else:
         self.logger.info(
             'UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) from {} to {}',
             cnt, self.collection.name, self.db.name, dates[0], dates[-1])
Пример #11
0
 def setUp(self):
     self.alpha1 = Analyser(alpha1, data=data)
     self.alpha2 = Analyser(alpha2, data=data)
     self.alpha3 = Analyser(alpha3, data=data)
     self.perf = Performance(alpha1)
     self.perf.set_returns(data)
Пример #12
0
class PerfTestCase(unittest.TestCase):

    def setUp(self):
        self.alpha1 = Analyser(alpha1, data=data)
        self.alpha2 = Analyser(alpha2, data=data)
        self.alpha3 = Analyser(alpha3, data=data)
        self.perf = Performance(alpha1)
        self.perf.set_returns(data)

    def tearDown(self):
        self.alpha1 = None
        self.alpha2 = None
        self.alpha3 = None
        self.perf = None

    def test_get1(self):
        self.assertTrue(frames_equal(self.perf.get_long().alpha, self.alpha2.alpha)
                and frames_equal(self.perf.get_short().alpha, self.alpha3.alpha))

    def test_get_bms(self):
        b, m, s = self.perf.get_bms()
        b, m, s = b.alpha, m.alpha, s.alpha
        self.assertTrue(series_equal(self.perf.alpha.count(axis=1),
            b.count(axis=1)+m.count(axis=1)+s.count(axis=1)))

    def test_init_1(self):
        self.assertTrue(np.allclose(np.abs(self.alpha1.alpha).sum(axis=1), 1))

    def test_init_2(self):
        self.assertTrue(np.allclose(np.abs(self.alpha2.alpha).sum(axis=1), 1))

    def test_returns1(self):
        ret1 = self.alpha1.get_returns(cost=0)
        ret2 = self.alpha2.get_returns(cost=0)
        ret3 = self.alpha3.get_returns(cost=0)
        self.assertTrue(np.allclose(ret1 * 2, ret2 - ret3))

    def test_returns2(self):
        ret1 = self.alpha1.get_returns(cost=0)
        self.assertTrue((ret1.index == dates[1:]).all())

    def test_turnover(self):
        # |((l1-s1)-(l2-s2))| = |(l1-l2) - (s1-s2)| <= |l1-l2| + |s1-s2|
        tvr1 = self.alpha1.get_turnover()
        tvr2 = self.alpha2.get_turnover()
        tvr3 = self.alpha3.get_turnover()
        self.assertTrue(np.allclose(tvr1 * 2, tvr2 + tvr3))

    def test_ac(self):
        ac1 = self.alpha1.get_ac()
        ac2 = self.alpha1.alpha.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:]
        self.assertFalse(np.allclose(ac1, ac2))

    def test_ic(self):
        ic1 = self.alpha1.get_ic()
        ic2 = data.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:]
        self.assertTrue(series_equal(ic1, ic2))

    def test_summary_ir1(self):
        ir = self.alpha1.summary_ir()
        self.assertListEqual(['days', 'IR1', 'rIR1'], list(ir.index))

    def test_summary_ir2(self):
        ir = self.alpha1.summary_ir(by='A')
        self.assertEqual(len(ir.columns), 1)

    def test_summary_turnover(self):
        tvr = self.alpha1.summary_turnover(freq='weekly')
        self.assertListEqual(['turnover', 'AC1', 'rAC1', 'AC5', 'rAC5'], list(tvr.index))

    def test_summary_returns(self):
        self.alpha1.summary_returns(cost=0.001)
        self.assertTrue(True)

    def test_summary(self):
        ir1 = self.alpha1.summary(group='ir', by='A')
        ir2 = self.alpha1.summary_ir(by='A')
        self.assertTrue(frames_equal(ir1, ir2))
Пример #13
0
                                     for rank in [True, False]
                                     for n in (1, 5, 20)])
    parser.add_argument('--metric',
                        choices=metrics,
                        default='rIC1',
                        help='What type of correlations is of interest?')
    parser.add_argument('--limit', type=int, default=10)
    parser.add_argument('-n', '--negate', action='store_true')
    args = parser.parse_args()

    alpha_metric, dates = {}, None
    if args.alpha:
        for path in args.alpha:
            for name in glob(path):
                df = read_frame(name, args.ftype)
                perf = Performance(df)
                name = os.path.basename(name)
                alpha_metric[name] = get_metric(perf, args.mode, args.metric)
                if dates is None:
                    dates = alpha_metric[name].index
                else:
                    dates = dates.union(alpha_metric[name].index)

    ext_alphas = {}
    if args.file:
        with open(args.file) as file:
            for line in file:
                name, fpath = line.strip().split()
                ext_alphas[name] = read_frame(fpath, args.ftype)
    if args.dir:
        assert os.path.exists(args.dir)